8228400: Remove built-in AArch64 simulator

Reviewed-by: adinn, aph, dsamersoff
2026-07-17 14:38:55 +00:00 · 2019-07-29 11:14:06 +02:00 · 2019-07-29 11:14:06 +02:00 · f5b92a4ca7
commit f5b92a4ca7
parent 5bbe479ade
25 changed files with 83 additions and 2306 deletions
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -1642,7 +1642,7 @@ int MachCallRuntimeNode::ret_addr_offset() {
  //   adr(rscratch2, retaddr)
  //   lea(rscratch1, RuntimeAddress(addr)
  //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
-  //   blrt rscratch1
+  //   blr(rscratch1)
  CodeBlob *cb = CodeCache::find_blob(_entry_point);
  if (cb) {
    return MacroAssembler::far_branch_size();
@ -1778,10 +1778,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {

  __ build_frame(framesize);

-  if (NotifySimulator) {
-    __ notify(Assembler::method_entry);
-  }
-
  if (VerifyStackAtCalls) {
    Unimplemented();
  }
@ -1842,10 +1838,6 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {

  __ remove_frame(framesize);

-  if (NotifySimulator) {
-    __ notify(Assembler::method_reentry);
-  }
-
  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
    __ reserved_stack_check();
  }
@ -2507,47 +2499,6 @@ bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, Ve
 void Compile::reshape_address(AddPNode* addp) {
 }

-// helper for encoding java_to_runtime calls on sim
-//
-// this is needed to compute the extra arguments required when
-// planting a call to the simulator blrt instruction. the TypeFunc
-// can be queried to identify the counts for integral, and floating
-// arguments and the return type
-
-static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
-{
-  int gps = 0;
-  int fps = 0;
-  const TypeTuple *domain = tf->domain();
-  int max = domain->cnt();
-  for (int i = TypeFunc::Parms; i < max; i++) {
-    const Type *t = domain->field_at(i);
-    switch(t->basic_type()) {
-    case T_FLOAT:
-    case T_DOUBLE:
-      fps++;
-    default:
-      gps++;
-    }
-  }
-  gpcnt = gps;
-  fpcnt = fps;
-  BasicType rt = tf->return_type();
-  switch (rt) {
-  case T_VOID:
-    rtype = MacroAssembler::ret_type_void;
-    break;
-  default:
-    rtype = MacroAssembler::ret_type_integral;
-    break;
-  case T_FLOAT:
-    rtype = MacroAssembler::ret_type_float;
-    break;
-  case T_DOUBLE:
-    rtype = MacroAssembler::ret_type_double;
-    break;
-  }
-}

 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
  MacroAssembler _masm(&cbuf);                                          \
@ -3497,7 +3448,7 @@ encode %{

    // some calls to generated routines (arraycopy code) are scheduled
    // by C2 as runtime calls. if so we can call them using a br (they
-    // will be in a reachable segment) otherwise we have to use a blrt
+    // will be in a reachable segment) otherwise we have to use a blr
    // which loads the absolute address into a register.
    address entry = (address)$meth$$method;
    CodeBlob *cb = CodeCache::find_blob(entry);
@ -3508,16 +3459,12 @@ encode %{
        return;
      }
    } else {
-      int gpcnt;
-      int fpcnt;
-      int rtype;
-      getCallInfo(tf(), gpcnt, fpcnt, rtype);
      Label retaddr;
      __ adr(rscratch2, retaddr);
      __ lea(rscratch1, RuntimeAddress(entry));
      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
      __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
-      __ blrt(rscratch1, gpcnt, fpcnt, rtype);
+      __ blr(rscratch1);
      __ bind(retaddr);
      __ add(sp, sp, 2 * wordSize);
    }
--- a/src/hotspot/cpu/aarch64/aarch64_call.cpp
+++ b/src/hotspot/cpu/aarch64/aarch64_call.cpp
@ -1,200 +0,0 @@
-/*
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifdef BUILTIN_SIM
-
-#include <stdio.h>
-#include <sys/types.h>
-#include "asm/macroAssembler.hpp"
-#include "asm/macroAssembler.inline.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "../../../../../../simulator/cpustate.hpp"
-#include "../../../../../../simulator/simulator.hpp"
-
-/*
- * a routine to initialise and enter ARM simulator execution when
- * calling into ARM code from x86 code.
- *
- * we maintain a simulator per-thread and provide it with 8 Mb of
- * stack space
- */
-#define SIM_STACK_SIZE (1024 * 1024) // in units of u_int64_t
-
-extern "C" u_int64_t get_alt_stack()
-{
-  return AArch64Simulator::altStack();
-}
-
-extern "C" void setup_arm_sim(void *sp, u_int64_t calltype)
-{
-  // n.b. this function runs on the simulator stack so as to avoid
-  // simulator frames appearing in between VM x86 and ARM frames. note
-  // that arfgument sp points to the old (VM) stack from which the
-  // call into the sim was made. The stack switch and entry into this
-  // routine is handled by x86 prolog code planted in the head of the
-  // ARM code buffer which the sim is about to start executing (see
-  // aarch64_linkage.S).
-  //
-  // The first ARM instruction in the buffer is identified by fnptr
-  // stored at the top of the old stack. x86 register contents precede
-  // fnptr. preceding that are the fp and return address of the VM
-  // caller into ARM code. any extra, non-register arguments passed to
-  // the linkage routine precede the fp (this is as per any normal x86
-  // call wirth extra args).
-  //
-  // note that the sim creates Java frames on the Java stack just
-  // above sp (i.e. directly above fnptr). it sets the sim FP register
-  // to the pushed fp for the caller effectively eliding the register
-  // data saved by the linkage routine.
-  //
-  // x86 register call arguments are loaded from the stack into ARM
-  // call registers. if extra arguments occur preceding the x86
-  // caller's fp then they are copied either into extra ARM registers
-  // (ARM has 8 rather than 6 gp call registers) or up the stack
-  // beyond the saved x86 registers so that they immediately precede
-  // the ARM frame where the ARM calling convention expects them to
-  // be.
-  //
-  // n.b. the number of register/stack values passed to the ARM code
-  // is determined by calltype
-  //
-  // +--------+
-  // | fnptr  |  <--- argument sp points here
-  // +--------+  |
-  // | rax    |  | return slot if we need to return a value
-  // +--------+  |
-  // | rdi    |  increasing
-  // +--------+  address
-  // | rsi    |  |
-  // +--------+  V
-  // | rdx    |
-  // +--------+
-  // | rcx    |
-  // +--------+
-  // | r8     |
-  // +--------+
-  // | r9     |
-  // +--------+
-  // | xmm0   |
-  // +--------+
-  // | xmm1   |
-  // +--------+
-  // | xmm2   |
-  // +--------+
-  // | xmm3   |
-  // +--------+
-  // | xmm4   |
-  // +--------+
-  // | xmm5   |
-  // +--------+
-  // | xmm6   |
-  // +--------+
-  // | xmm7   |
-  // +--------+
-  // | fp     |
-  // +--------+
-  // | caller |
-  // | ret ip |
-  // +--------+
-  // | arg0   | <-- any extra call args start here
-  // +--------+     offset = 18 * wordSize
-  // | . . .  |     (i.e. 1 * calladdr + 1 * rax  + 6 * gp call regs
-  //                      + 8 * fp call regs + 2 * frame words)
-  //
-  // we use a unique sim/stack per thread
-  const int cursor2_offset = 18;
-  const int fp_offset = 16;
-  u_int64_t *cursor = (u_int64_t *)sp;
-  u_int64_t *cursor2 = ((u_int64_t *)sp) + cursor2_offset;
-  u_int64_t *fp = ((u_int64_t *)sp) + fp_offset;
-  int gp_arg_count = calltype & 0xf;
-  int fp_arg_count = (calltype >> 4) & 0xf;
-  int return_type = (calltype >> 8) & 0x3;
-  AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-  // save previous cpu state in case this is a recursive entry
-  CPUState saveState = sim->getCPUState();
-  // set up initial sim pc, sp and fp registers
-  sim->init(*cursor++, (u_int64_t)sp, (u_int64_t)fp);
-  u_int64_t *return_slot = cursor++;
-
-  // if we need to pass the sim extra args on the stack then bump
-  // the stack pointer now
-  u_int64_t *cursor3 = (u_int64_t *)sim->getCPUState().xreg(SP, 1);
-  if (gp_arg_count > 8) {
-    cursor3 -= gp_arg_count - 8;
-  }
-  if (fp_arg_count > 8) {
-    cursor3 -= fp_arg_count - 8;
-  }
-  sim->getCPUState().xreg(SP, 1) = (u_int64_t)(cursor3++);
-
-  for (int i = 0; i < gp_arg_count; i++) {
-    if (i < 6) {
-      // copy saved register to sim register
-      GReg reg = (GReg)i;
-      sim->getCPUState().xreg(reg, 0) = *cursor++;
-    } else if (i < 8) {
-      // copy extra int arg to sim register
-      GReg reg = (GReg)i;
-      sim->getCPUState().xreg(reg, 0) = *cursor2++;
-    } else {
-      // copy extra fp arg to sim stack
-      *cursor3++ = *cursor2++;
-    }
-  }
-  for (int i = 0; i < fp_arg_count; i++) {
-    if (i < 8) {
-      // copy saved register to sim register
-      GReg reg = (GReg)i;
-      sim->getCPUState().xreg(reg, 0) = *cursor++;
-    } else {
-      // copy extra arg to sim stack
-      *cursor3++ = *cursor2++;
-    }
-  }
-  AArch64Simulator::status_t return_status = sim->run();
-  if (return_status != AArch64Simulator::STATUS_RETURN){
-    sim->simPrint0();
-    fatal("invalid status returned from simulator.run()\n");
-  }
-  switch (return_type) {
-  case MacroAssembler::ret_type_void:
-  default:
-    break;
-  case MacroAssembler::ret_type_integral:
-  // this overwrites the saved r0
-    *return_slot = sim->getCPUState().xreg(R0, 0);
-    break;
-  case MacroAssembler::ret_type_float:
-    *(float *)return_slot = sim->getCPUState().sreg(V0);
-    break;
-  case MacroAssembler::ret_type_double:
-    *(double *)return_slot = sim->getCPUState().dreg(V0);
-    break;
-  }
-  // restore incoimng cpu state
-  sim->getCPUState() = saveState;
-}
-
-#endif
--- a/src/hotspot/cpu/aarch64/aarch64_linkage.S
+++ b/src/hotspot/cpu/aarch64/aarch64_linkage.S
@ -1,167 +0,0 @@
-#
-# Copyright (c) 2012, Red Hat. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-
-# Routines used to enable x86 VM C++ code to invoke JIT-compiled ARM code
-# -- either Java methods or generated stub -- and to allow JIT-compiled
-# ARM code to invoke x86 VM C++ code
-#
-# the code for aarch64_stub_prolog below can be copied into the start
-# of the ARM code buffer and patched with a link to the
-# C++ routine which starts execution on the simulator. the ARM
-# code can be generated immediately following the copied code.
-
-#ifdef BUILTIN_SIM
-
-	.data
-        .globl setup_arm_sim,
-	.type  setup_arm_sim,@function
-        .globl get_alt_stack,
-	.type  get_alt_stack,@function
-        .globl aarch64_stub_prolog
-        .p2align  4
-aarch64_stub_prolog:
-	// entry point
-4:	lea 1f(%rip), %r11
-	mov (%r11), %r10
-	mov (%r10), %r10
-	jmp *%r10
-	.p2align 4
-1:
-	.set entry_offset, . - 1b
-	.quad aarch64_prolog_ptr
-	// 64 bit int used to idenitfy called fn arg/return types
-	.set calltype_offset, . - 1b
-	.quad 0
-	// arm JIT code follows the stub
-	.set arm_code_offset, . - 1b
-	.size aarch64_stub_prolog, .-aarch64_stub_prolog
-aarch64_stub_prolog_end:
-
-	.text
-aarch64_prolog_ptr:
-	.quad aarch64_prolog
-
-        .globl aarch64_prolog
-aarch64_prolog:
-	.cfi_startproc
-	pushq	%rbp
-	.cfi_def_cfa_offset 16
-	.cfi_offset 6, -16
-	movq	%rsp, %rbp
-	.cfi_def_cfa_register 6
-	// save all registers used to pass args
-	sub $8, %rsp
-	movd %xmm7, (%rsp)
-	sub $8, %rsp
-	movd %xmm6, (%rsp)
-	sub $8, %rsp
-	movd %xmm5, (%rsp)
-	sub $8, %rsp
-	movd %xmm4, (%rsp)
-	sub $8, %rsp
-	movd %xmm3, (%rsp)
-	sub $8, %rsp
-	movd %xmm2, (%rsp)
-	sub $8, %rsp
-	movd %xmm1, (%rsp)
-	sub $8, %rsp
-	movd %xmm0, (%rsp)
-	push %r9
-	push %r8
-	push %rcx
-	push %rdx
-	push %rsi
-	push %rdi
-	// save rax -- this stack slot will be rewritten with a
-	// return value if needed
-	push %rax
-	// temporarily save r11 while we find the other stack
-	push %r11
-	// retrieve alt stack
-	call get_alt_stack@PLT
-	pop %r11
-	// push start of arm code
-	lea (arm_code_offset)(%r11), %rsi
-	push %rsi
-	// load call type code in arg reg 1
-	mov (calltype_offset)(%r11), %rsi
-	// load current stack pointer in arg reg 0
-	mov %rsp, %rdi
-	// switch to alt stack
-	mov %rax, %rsp
-	// save previous stack pointer on new stack
-	push %rdi
-	// 16-align the new stack pointer
-	push %rdi
-	// call sim setup routine
-	call setup_arm_sim@PLT
-	// switch back to old stack
-	pop %rsp
-	// pop start of arm code
-	pop %rdi
-	// pop rax -- either restores old value or installs return value
-	pop %rax
-	// pop arg registers
-	pop %rdi
-	pop %rsi
-	pop %rdx
-	pop %rcx
-	pop %r8
-	pop %r9
-	movd (%rsp), %xmm0
-	add $8, %rsp
-	movd (%rsp), %xmm1
-	add $8, %rsp
-	movd (%rsp), %xmm2
-	add $8, %rsp
-	movd (%rsp), %xmm3
-	add $8, %rsp
-	movd (%rsp), %xmm4
-	add $8, %rsp
-	movd (%rsp), %xmm5
-	add $8, %rsp
-	movd (%rsp), %xmm6
-	add $8, %rsp
-	movd (%rsp), %xmm7
-	add $8, %rsp
-	leave
-	.cfi_def_cfa 7, 8
-	ret
-	.cfi_endproc
-
-
-        .p2align  4
-get_pc:
-	// get return pc in rdi and then push it back
-	pop %rdi
-	push %rdi
-	ret
-
-	.p2align 4
-	.long
-	.globl aarch64_stub_prolog_size
-	.type  aarch64_stub_prolog_size,@function
-aarch64_stub_prolog_size:
-	leaq  aarch64_stub_prolog_end - aarch64_stub_prolog, %rax
-	ret
-
-#endif
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
@ -38,11 +38,6 @@ const unsigned long Assembler::asm_bp = 0x00007fffee09ac88;
 #include "memory/resourceArea.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
-
-// for the moment we reuse the logical/floating point immediate encode
-// and decode functiosn provided by the simulator. when we move to
-// real hardware we will need to pull taht code into here
-
 #include "immediate_aarch64.hpp"

 extern "C" void entry(CodeBuffer *cb);
@ -1755,21 +1750,6 @@ int AbstractAssembler::code_fill_byte() {
 void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }


-// these are the functions provided by the simulator which are used to
-// encode and decode logical immediates and floating point immediates
-//
-//   u_int64_t logical_immediate_for_encoding(u_int32_t encoding);
-//
-//   u_int32_t encoding_for_logical_immediate(u_int64_t immediate);
-//
-//   u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp);
-//
-//   u_int32_t encoding_for_fp_immediate(float immediate);
-//
-// we currently import these from the simulator librray but the
-// definitions will need to be moved to here when we switch to real
-// hardware.
-
 // and now the routines called by the assembler which encapsulate the
 // above encode and decode functions

--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@ -2662,137 +2662,6 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V
    f(0, 10), rf(Vn, 5), rf(Vd, 0);
  }

-/* Simulator extensions to the ISA
-
-   haltsim
-
-   takes no arguments, causes the sim to enter a debug break and then
-   return from the simulator run() call with STATUS_HALT? The linking
-   code will call fatal() when it sees STATUS_HALT.
-
-   blrt Xn, Wm
-   blrt Xn, #gpargs, #fpargs, #type
-   Xn holds the 64 bit x86 branch_address
-   call format is encoded either as immediate data in the call
-   or in register Wm. In the latter case
-     Wm[13..6] = #gpargs,
-     Wm[5..2] = #fpargs,
-     Wm[1,0] = #type
-
-   calls the x86 code address 'branch_address' supplied in Xn passing
-   arguments taken from the general and floating point registers according
-   to the supplied counts 'gpargs' and 'fpargs'. may return a result in r0
-   or v0 according to the the return type #type' where
-
-   address branch_address;
-   uimm4 gpargs;
-   uimm4 fpargs;
-   enum ReturnType type;
-
-   enum ReturnType
-     {
-       void_ret = 0,
-       int_ret = 1,
-       long_ret = 1,
-       obj_ret = 1, // i.e. same as long
-       float_ret = 2,
-       double_ret = 3
-     }
-
-   notify
-
-   notifies the simulator of a transfer of control. instr[14:0]
-   identifies the type of change of control.
-
-   0 ==> initial entry to a method.
-
-   1 ==> return into a method from a submethod call.
-
-   2 ==> exit out of Java method code.
-
-   3 ==> start execution for a new bytecode.
-
-   in cases 1 and 2 the simulator is expected to use a JVM callback to
-   identify the name of the specific method being executed. in case 4
-   the simulator is expected to use a JVM callback to identify the
-   bytecode index.
-
-   Instruction encodings
-   ---------------------
-
-   These are encoded in the space with instr[28:25] = 00 which is
-   unallocated. Encodings are
-
-                     10987654321098765432109876543210
-   PSEUDO_HALT   = 0x11100000000000000000000000000000
-   PSEUDO_BLRT  = 0x11000000000000000_______________
-   PSEUDO_BLRTR = 0x1100000000000000100000__________
-   PSEUDO_NOTIFY = 0x10100000000000000_______________
-
-   instr[31,29] = op1 : 111 ==> HALT, 110 ==> BLRT/BLRTR, 101 ==> NOTIFY
-
-   for BLRT
-     instr[14,11] = #gpargs, instr[10,7] = #fpargs
-     instr[6,5] = #type, instr[4,0] = Rn
-   for BLRTR
-     instr[9,5] = Rm, instr[4,0] = Rn
-   for NOTIFY
-     instr[14:0] = type : 0 ==> entry, 1 ==> reentry, 2 ==> exit, 3 ==> bcstart
-*/
-
-  enum NotifyType { method_entry, method_reentry, method_exit, bytecode_start };
-
-  virtual void notify(int type) {
-    if (UseBuiltinSim) {
-      starti;
-      //  109
-      f(0b101, 31, 29);
-      //  87654321098765
-      f(0b00000000000000, 28, 15);
-      f(type, 14, 0);
-    }
-  }
-
-  void blrt(Register Rn, int gpargs, int fpargs, int type) {
-    if (UseBuiltinSim) {
-      starti;
-      f(0b110, 31 ,29);
-      f(0b00, 28, 25);
-      //  4321098765
-      f(0b0000000000, 24, 15);
-      f(gpargs, 14, 11);
-      f(fpargs, 10, 7);
-      f(type, 6, 5);
-      rf(Rn, 0);
-    } else {
-      blr(Rn);
-    }
-  }
-
-  void blrt(Register Rn, Register Rm) {
-    if (UseBuiltinSim) {
-      starti;
-      f(0b110, 31 ,29);
-      f(0b00, 28, 25);
-      //  4321098765
-      f(0b0000000001, 24, 15);
-      //  43210
-      f(0b00000, 14, 10);
-      rf(Rm, 5);
-      rf(Rn, 0);
-    } else {
-      blr(Rn);
-    }
-  }
-
-  void haltsim() {
-    starti;
-    f(0b111, 31 ,29);
-    f(0b00, 28, 27);
-    //  654321098765432109876543210
-    f(0b000000000000000000000000000, 26, 0);
-  }
-
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
  }

--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@ -2902,40 +2902,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
    __ far_call(RuntimeAddress(dest));
  } else {
    __ mov(rscratch1, RuntimeAddress(dest));
-    int len = args->length();
-    int type = 0;
-    if (! result->is_illegal()) {
-      switch (result->type()) {
-      case T_VOID:
-        type = 0;
-        break;
-      case T_INT:
-      case T_LONG:
-      case T_OBJECT:
-        type = 1;
-        break;
-      case T_FLOAT:
-        type = 2;
-        break;
-      case T_DOUBLE:
-        type = 3;
-        break;
-      default:
-        ShouldNotReachHere();
-        break;
-      }
-    }
-    int num_gpargs = 0;
-    int num_fpargs = 0;
-    for (int i = 0; i < args->length(); i++) {
-      LIR_Opr arg = args->at(i);
-      if (arg->type() == T_FLOAT || arg->type() == T_DOUBLE) {
-        num_fpargs++;
-      } else {
-        num_gpargs++;
-      }
-    }
-    __ blrt(rscratch1, num_gpargs, num_fpargs, type);
+    __ blr(rscratch1);
  }

  if (info != NULL) {
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
@ -336,16 +336,10 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
  // Note that we do this before doing an enter().
  generate_stack_overflow_check(bang_size_in_bytes);
  MacroAssembler::build_frame(framesize + 2 * wordSize);
-  if (NotifySimulator) {
-    notify(Assembler::method_entry);
-  }
 }

 void C1_MacroAssembler::remove_frame(int framesize) {
  MacroAssembler::remove_frame(framesize + 2 * wordSize);
-  if (NotifySimulator) {
-    notify(Assembler::method_reentry);
-  }
 }


--- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
@ -63,7 +63,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre

  // do the call
  lea(rscratch1, RuntimeAddress(entry));
-  blrt(rscratch1, args_size + 1, 8, 1);
+  blr(rscratch1);
  bind(retaddr);
  int call_offset = offset();
  // verify callee-saved register
@ -538,7 +538,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
  __ set_last_Java_frame(sp, rfp, retaddr, rscratch1);
  // do the call
  __ lea(rscratch1, RuntimeAddress(target));
-  __ blrt(rscratch1, 1, 0, 1);
+  __ blr(rscratch1);
  __ bind(retaddr);
  OopMapSet* oop_maps = new OopMapSet();
  oop_maps->add_gc_map(__ offset(), oop_map);
--- a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
@ -41,13 +41,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false);
 define_pd_global(bool, ProfileTraps,                 false);
 define_pd_global(bool, UseOnStackReplacement,        true );
 define_pd_global(bool, TieredCompilation,            false);
-#ifdef BUILTIN_SIM
-// We compile very aggressively with the builtin simulator because
-// doing so greatly reduces run times and tests more code.
-define_pd_global(intx, CompileThreshold,             150 );
-#else
 define_pd_global(intx, CompileThreshold,             1500 );
-#endif

 define_pd_global(intx, OnStackReplacePercentage,     933  );
 define_pd_global(intx, FreqInlineSize,               325  );
--- a/src/hotspot/cpu/aarch64/cpustate_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/cpustate_aarch64.hpp
@ -1,595 +0,0 @@
-/*
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef _CPU_STATE_H
-#define _CPU_STATE_H
-
-#include <sys/types.h>
-
-/*
- * symbolic names used to identify general registers which also match
- * the registers indices in machine code
- *
- * We have 32 general registers which can be read/written as 32 bit or
- * 64 bit sources/sinks and are appropriately referred to as Wn or Xn
- * in the assembly code.  Some instructions mix these access modes
- * (e.g. ADD X0, X1, W2) so the implementation of the instruction
- * needs to *know* which type of read or write access is required.
- */
-enum GReg {
-  R0,
-  R1,
-  R2,
-  R3,
-  R4,
-  R5,
-  R6,
-  R7,
-  R8,
-  R9,
-  R10,
-  R11,
-  R12,
-  R13,
-  R14,
-  R15,
-  R16,
-  R17,
-  R18,
-  R19,
-  R20,
-  R21,
-  R22,
-  R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28,
-  R29,
-  R30,
-  R31,
-  // and now the aliases
-  RSCRATCH1=R8,
-  RSCRATCH2=R9,
-  RMETHOD=R12,
-  RESP=R20,
-  RDISPATCH=R21,
-  RBCP=R22,
-  RLOCALS=R24,
-  RMONITORS=R25,
-  RCPOOL=R26,
-  RHEAPBASE=R27,
-  RTHREAD=R28,
-  FP = R29,
-  LR = R30,
-  SP = R31,
-  ZR = R31
-};
-
-/*
- * symbolic names used to refer to floating point registers which also
- * match the registers indices in machine code
- *
- * We have 32 FP registers which can be read/written as 8, 16, 32, 64
- * and 128 bit sources/sinks and are appropriately referred to as Bn,
- * Hn, Sn, Dn and Qn in the assembly code. Some instructions mix these
- * access modes (e.g. FCVT S0, D0) so the implementation of the
- * instruction needs to *know* which type of read or write access is
- * required.
- */
-
-enum VReg {
-  V0,
-  V1,
-  V2,
-  V3,
-  V4,
-  V5,
-  V6,
-  V7,
-  V8,
-  V9,
-  V10,
-  V11,
-  V12,
-  V13,
-  V14,
-  V15,
-  V16,
-  V17,
-  V18,
-  V19,
-  V20,
-  V21,
-  V22,
-  V23,
-  V24,
-  V25,
-  V26,
-  V27,
-  V28,
-  V29,
-  V30,
-  V31,
-};
-
-/**
- * all the different integer bit patterns for the components of a
- * general register are overlaid here using a union so as to allow all
- * reading and writing of the desired bits.
- *
- * n.b. the ARM spec says that when you write a 32 bit register you
- * are supposed to write the low 32 bits and zero the high 32
- * bits. But we don't actually have to care about this because Java
- * will only ever consume the 32 bits value as a 64 bit quantity after
- * an explicit extend.
- */
-union GRegisterValue
-{
-  int8_t s8;
-  int16_t s16;
-  int32_t s32;
-  int64_t s64;
-  u_int8_t u8;
-  u_int16_t u16;
-  u_int32_t u32;
-  u_int64_t u64;
-};
-
-class GRegister
-{
-public:
-  GRegisterValue value;
-};
-
-/*
- * float registers provide for storage of a single, double or quad
- * word format float in the same register. single floats are not
- * paired within each double register as per 32 bit arm. instead each
- * 128 bit register Vn embeds the bits for Sn, and Dn in the lower
- * quarter and half, respectively, of the bits for Qn.
- *
- * The upper bits can also be accessed as single or double floats by
- * the float vector operations using indexing e.g. V1.D[1], V1.S[3]
- * etc and, for SIMD operations using a horrible index range notation.
- *
- * The spec also talks about accessing float registers as half words
- * and bytes with Hn and Bn providing access to the low 16 and 8 bits
- * of Vn but it is not really clear what these bits represent. We can
- * probably ignore this for Java anyway. However, we do need to access
- * the raw bits at 32 and 64 bit resolution to load to/from integer
- * registers.
- */
-
-union FRegisterValue
-{
-  float s;
-  double d;
-  long double q;
-  // eventually we will need to be able to access the data as a vector
-  // the integral array elements allow us to access the bits in s, d,
-  // q, vs and vd at an appropriate level of granularity
-  u_int8_t vb[16];
-  u_int16_t vh[8];
-  u_int32_t vw[4];
-  u_int64_t vx[2];
-  float vs[4];
-  double vd[2];
-};
-
-class FRegister
-{
-public:
-  FRegisterValue value;
-};
-
-/*
- * CPSR register -- this does not exist as a directly accessible
- * register but we need to store the flags so we can implement
- * flag-seting and flag testing operations
- *
- * we can possibly use injected x86 asm to report the outcome of flag
- * setting operations. if so we will need to grab the flags
- * immediately after the operation in order to ensure we don't lose
- * them because of the actions of the simulator. so we still need
- * somewhere to store the condition codes.
- */
-
-class CPSRRegister
-{
-public:
-  u_int32_t value;
-
-/*
- * condition register bit select values
- *
- * the order of bits here is important because some of
- * the flag setting conditional instructions employ a
- * bit field to populate the flags when a false condition
- * bypasses execution of the operation and we want to
- * be able to assign the flags register using the
- * supplied value.
- */
-
-  enum CPSRIdx {
-    V_IDX,
-    C_IDX,
-    Z_IDX,
-    N_IDX
-  };
-
-  enum CPSRMask {
-    V = 1 << V_IDX,
-    C = 1 << C_IDX,
-    Z = 1 << Z_IDX,
-    N = 1 << N_IDX
-  };
-
-  static const int CPSR_ALL_FLAGS = (V | C | Z | N);
-};
-
-// auxiliary function to assemble the relevant bits from
-// the x86 EFLAGS register into an ARM CPSR value
-
-#define X86_V_IDX 11
-#define X86_C_IDX 0
-#define X86_Z_IDX 6
-#define X86_N_IDX 7
-
-#define X86_V (1 << X86_V_IDX)
-#define X86_C (1 << X86_C_IDX)
-#define X86_Z (1 << X86_Z_IDX)
-#define X86_N (1 << X86_N_IDX)
-
-inline u_int32_t convertX86Flags(u_int32_t x86flags)
-{
-  u_int32_t flags;
-  // set N flag
-  flags = ((x86flags & X86_N) >> X86_N_IDX);
-  // shift then or in Z flag
-  flags <<= 1;
-  flags |= ((x86flags & X86_Z) >> X86_Z_IDX);
-  // shift then or in C flag
-  flags <<= 1;
-  flags |= ((x86flags & X86_C) >> X86_C_IDX);
-  // shift then or in V flag
-  flags <<= 1;
-  flags |= ((x86flags & X86_V) >> X86_V_IDX);
-
-  return flags;
-}
-
-inline u_int32_t convertX86FlagsFP(u_int32_t x86flags)
-{
-  // x86 flags set by fcomi(x,y) are ZF:PF:CF
-  // (yes, that's PF for parity, WTF?)
-  // where
-  // 0) 0:0:0 means x > y
-  // 1) 0:0:1 means x < y
-  // 2) 1:0:0 means x = y
-  // 3) 1:1:1 means x and y are unordered
-  // note that we don't have to check PF so
-  // we really have a simple 2-bit case switch
-  // the corresponding ARM64 flags settings
-  //  in hi->lo bit order are
-  // 0) --C-
-  // 1) N---
-  // 2) -ZC-
-  // 3) --CV
-
-  static u_int32_t armFlags[] = {
-      0b0010,
-      0b1000,
-      0b0110,
-      0b0011
-  };
-  // pick out the ZF and CF bits
-  u_int32_t zc = ((x86flags & X86_Z) >> X86_Z_IDX);
-  zc <<= 1;
-  zc |= ((x86flags & X86_C) >> X86_C_IDX);
-
-  return armFlags[zc];
-}
-
-/*
- * FPSR register -- floating point status register
-
- * this register includes IDC, IXC, UFC, OFC, DZC, IOC and QC bits,
- * and the floating point N, Z, C, V bits but the latter are unused in
- * aarch64 mode. the sim ignores QC for now.
- *
- * bit positions are as per the ARMv7 FPSCR register
- *
- * IDC :  7 ==> Input Denormal (cumulative exception bit)
- * IXC :  4 ==> Inexact
- * UFC :  3 ==> Underflow
- * OFC :  2 ==> Overflow
- * DZC :  1 ==> Division by Zero
- * IOC :  0 ==> Invalid Operation
- */
-
-class FPSRRegister
-{
-public:
-  u_int32_t value;
-  // indices for bits in the FPSR register value
-  enum FPSRIdx {
-    IO_IDX = 0,
-    DZ_IDX = 1,
-    OF_IDX = 2,
-    UF_IDX = 3,
-    IX_IDX = 4,
-    ID_IDX = 7
-  };
-  // corresponding bits as numeric values
-  enum FPSRMask {
-    IO = (1 << IO_IDX),
-    DZ = (1 << DZ_IDX),
-    OF = (1 << OF_IDX),
-    UF = (1 << UF_IDX),
-    IX = (1 << IX_IDX),
-    ID = (1 << ID_IDX)
-  };
-  static const int FPSR_ALL_FPSRS = (IO | DZ | OF | UF | IX | ID);
-};
-
-// debugger support
-
-enum PrintFormat
-{
-  FMT_DECIMAL,
-  FMT_HEX,
-  FMT_SINGLE,
-  FMT_DOUBLE,
-  FMT_QUAD,
-  FMT_MULTI
-};
-
-/*
- * model of the registers and other state associated with the cpu
- */
-class CPUState
-{
-  friend class AArch64Simulator;
-private:
-  // this is the PC of the instruction being executed
-  u_int64_t pc;
-  // this is the PC of the instruction to be executed next
-  // it is defaulted to pc + 4 at instruction decode but
-  // execute may reset it
-
-  u_int64_t nextpc;
-  GRegister gr[33];             // extra register at index 32 is used
-                                // to hold zero value
-  FRegister fr[32];
-  CPSRRegister cpsr;
-  FPSRRegister fpsr;
-
-public:
-
-  CPUState() {
-    gr[20].value.u64 = 0;  // establish initial condition for
-                           // checkAssertions()
-    trace_counter = 0;
-  }
-
-  // General Register access macros
-
-  // only xreg or xregs can be used as an lvalue in order to update a
-  // register. this ensures that the top part of a register is always
-  // assigned when it is written by the sim.
-
-  inline u_int64_t &xreg(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.u64;
-    } else {
-      return gr[reg].value.u64;
-    }
-  }
-
-  inline int64_t &xregs(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.s64;
-    } else {
-      return gr[reg].value.s64;
-    }
-  }
-
-  inline u_int32_t wreg(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.u32;
-    } else {
-      return gr[reg].value.u32;
-    }
-  }
-
-  inline int32_t wregs(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.s32;
-    } else {
-      return gr[reg].value.s32;
-    }
-  }
-
-  inline u_int32_t hreg(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.u16;
-    } else {
-      return gr[reg].value.u16;
-    }
-  }
-
-  inline int32_t hregs(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.s16;
-    } else {
-      return gr[reg].value.s16;
-    }
-  }
-
-  inline u_int32_t breg(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.u8;
-    } else {
-      return gr[reg].value.u8;
-    }
-  }
-
-  inline int32_t bregs(GReg reg, int r31_is_sp) {
-    if (reg == R31 && !r31_is_sp) {
-      return gr[32].value.s8;
-    } else {
-      return gr[reg].value.s8;
-    }
-  }
-
-  // FP Register access macros
-
-  // all non-vector accessors return a reference so we can both read
-  // and assign
-
-  inline float &sreg(VReg reg) {
-    return fr[reg].value.s;
-  }
-
-  inline double &dreg(VReg reg) {
-    return fr[reg].value.d;
-  }
-
-  inline long double &qreg(VReg reg) {
-    return fr[reg].value.q;
-  }
-
-  // all vector register accessors return a pointer
-
-  inline float *vsreg(VReg reg) {
-    return &fr[reg].value.vs[0];
-  }
-
-  inline double *vdreg(VReg reg) {
-    return &fr[reg].value.vd[0];
-  }
-
-  inline u_int8_t *vbreg(VReg reg) {
-    return &fr[reg].value.vb[0];
-  }
-
-  inline u_int16_t *vhreg(VReg reg) {
-    return &fr[reg].value.vh[0];
-  }
-
-  inline u_int32_t *vwreg(VReg reg) {
-    return &fr[reg].value.vw[0];
-  }
-
-  inline u_int64_t *vxreg(VReg reg) {
-    return &fr[reg].value.vx[0];
-  }
-
-  union GRegisterValue prev_sp, prev_fp;
-
-  static const int trace_size = 256;
-  u_int64_t trace_buffer[trace_size];
-  int trace_counter;
-
-  bool checkAssertions()
-  {
-    // Make sure that SP is 16-aligned
-    // Also make sure that ESP is above SP.
-    // We don't care about checking ESP if it is null, i.e. it hasn't
-    // been used yet.
-    if (gr[31].value.u64 & 0x0f) {
-      asm volatile("nop");
-      return false;
-    }
-    return true;
-  }
-
-  // pc register accessors
-
-  // this instruction can be used to fetch the current PC
-  u_int64_t getPC();
-  // instead of setting the current PC directly you can
-  // first set the next PC (either absolute or PC-relative)
-  // and later copy the next PC into the current PC
-  // this supports a default increment by 4 at instruction
-  // fetch with an optional reset by control instructions
-  u_int64_t getNextPC();
-  void setNextPC(u_int64_t next);
-  void offsetNextPC(int64_t offset);
-  // install nextpc as current pc
-  void updatePC();
-
-  // this instruction can be used to save the next PC to LR
-  // just before installing a branch PC
-  inline void saveLR() { gr[LR].value.u64 = nextpc; }
-
-  // cpsr register accessors
-  u_int32_t getCPSRRegister();
-  void setCPSRRegister(u_int32_t flags);
-  // read a specific subset of the flags as a bit pattern
-  // mask should be composed using elements of enum FlagMask
-  u_int32_t getCPSRBits(u_int32_t mask);
-  // assign a specific subset of the flags as a bit pattern
-  // mask and value should be composed using elements of enum FlagMask
-  void setCPSRBits(u_int32_t mask, u_int32_t value);
-  // test the value of a single flag returned as 1 or 0
-  u_int32_t testCPSR(CPSRRegister::CPSRIdx idx);
-  // set a single flag
-  void setCPSR(CPSRRegister::CPSRIdx idx);
-  // clear a single flag
-  void clearCPSR(CPSRRegister::CPSRIdx idx);
-  // utility method to set ARM CSPR flags from an x86 bit mask generated by integer arithmetic
-  void setCPSRRegisterFromX86(u_int64_t x86Flags);
-  // utility method to set ARM CSPR flags from an x86 bit mask generated by floating compare
-  void setCPSRRegisterFromX86FP(u_int64_t x86Flags);
-
-  // fpsr register accessors
-  u_int32_t getFPSRRegister();
-  void setFPSRRegister(u_int32_t flags);
-  // read a specific subset of the fprs bits as a bit pattern
-  // mask should be composed using elements of enum FPSRRegister::FlagMask
-  u_int32_t getFPSRBits(u_int32_t mask);
-  // assign a specific subset of the flags as a bit pattern
-  // mask and value should be composed using elements of enum FPSRRegister::FlagMask
-  void setFPSRBits(u_int32_t mask, u_int32_t value);
-  // test the value of a single flag returned as 1 or 0
-  u_int32_t testFPSR(FPSRRegister::FPSRIdx idx);
-  // set a single flag
-  void setFPSR(FPSRRegister::FPSRIdx idx);
-  // clear a single flag
-  void clearFPSR(FPSRRegister::FPSRIdx idx);
-
-  // debugger support
-  void printPC(int pending, const char *trailing = "\n");
-  void printInstr(u_int32_t instr, void (*dasm)(u_int64_t), const char *trailing = "\n");
-  void printGReg(GReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n");
-  void printVReg(VReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n");
-  void printCPSR(const char *trailing = "\n");
-  void printFPSR(const char *trailing = "\n");
-  void dumpState();
-};
-
-#endif // ifndef _CPU_STATE_H
--- a/src/hotspot/cpu/aarch64/decode_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/decode_aarch64.hpp
@ -1,412 +0,0 @@
-/*
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef _DECODE_H
-#define _DECODE_H
-
-#include <sys/types.h>
-#include "cpustate_aarch64.hpp"
-
-// bitfield immediate expansion helper
-
-extern int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,
-                                    u_int32_t imms, u_int64_t &bimm);
-
-
-/*
- * codes used in conditional instructions
- *
- * these are passed to conditional operations to identify which
- * condition to test for
- */
-enum CondCode {
-  EQ = 0b0000, // meaning Z == 1
-  NE = 0b0001, // meaning Z == 0
-  HS = 0b0010, // meaning C == 1
-  CS = HS,
-  LO = 0b0011, // meaning C == 0
-  CC = LO,
-  MI = 0b0100, // meaning N == 1
-  PL = 0b0101, // meaning N == 0
-  VS = 0b0110, // meaning V == 1
-  VC = 0b0111, // meaning V == 0
-  HI = 0b1000, // meaning C == 1 && Z == 0
-  LS = 0b1001, // meaning !(C == 1 && Z == 0)
-  GE = 0b1010, // meaning N == V
-  LT = 0b1011, // meaning N != V
-  GT = 0b1100, // meaning Z == 0 && N == V
-  LE = 0b1101, // meaning !(Z == 0 && N == V)
-  AL = 0b1110, // meaning ANY
-  NV = 0b1111  // ditto
-};
-
-/*
- * certain addressing modes for load require pre or post writeback of
- * the computed address to a base register
- */
-enum WriteBack {
-  Post = 0,
-  Pre = 1
-};
-
-/*
- * certain addressing modes for load require an offset to
- * be optionally scaled so the decode needs to pass that
- * through to the execute routine
- */
-enum Scaling {
-  Unscaled = 0,
-  Scaled = 1
-};
-
-/*
- * when we do have to scale we do so by shifting using
- * log(bytes in data element - 1) as the shift count.
- * so we don't have to scale offsets when loading
- * bytes.
- */
-enum ScaleShift {
-  ScaleShift16 = 1,
-  ScaleShift32 = 2,
-  ScaleShift64 = 3,
-  ScaleShift128 = 4
-};
-
-/*
- * one of the addressing modes for load requires a 32-bit register
- * value to be either zero- or sign-extended for these instructions
- * UXTW or SXTW should be passed
- *
- * arithmetic register data processing operations can optionally
- * extend a portion of the second register value for these
- * instructions the value supplied must identify the portion of the
- * register which is to be zero- or sign-exended
- */
-enum Extension {
-  UXTB = 0,
-  UXTH = 1,
-  UXTW = 2,
-  UXTX = 3,
-  SXTB = 4,
-  SXTH = 5,
-  SXTW = 6,
-  SXTX = 7
-};
-
-/*
- * arithmetic and logical register data processing operations
- * optionally perform a shift on the second register value
- */
-enum Shift {
-  LSL = 0,
-  LSR = 1,
-  ASR = 2,
-  ROR = 3
-};
-
-/*
- * bit twiddling helpers for instruction decode
- */
-
-// 32 bit mask with bits [hi,...,lo] set
-
-static inline u_int32_t mask32(int hi = 31, int lo = 0)
-{
-  int nbits = (hi + 1) - lo;
-  return ((1 << nbits) - 1) << lo;
-}
-
-static inline u_int64_t mask64(int hi = 63, int lo = 0)
-{
-  int nbits = (hi + 1) - lo;
-  return ((1L << nbits) - 1) << lo;
-}
-
-// pick bits [hi,...,lo] from val
-static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0)
-{
-  return (val & mask32(hi, lo));
-}
-
-// pick bits [hi,...,lo] from val
-static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0)
-{
-  return (val & mask64(hi, lo));
-}
-
-// pick bits [hi,...,lo] from val and shift to [(hi-(newlo - lo)),newlo]
-static inline u_int32_t pickshift32(u_int32_t val, int hi = 31,
-                                    int lo = 0, int newlo = 0)
-{
-  u_int32_t bits = pick32(val, hi, lo);
-  if (lo < newlo) {
-    return (bits << (newlo - lo));
-  } else {
-    return (bits >> (lo - newlo));
-  }
-}
-// mask [hi,lo] and shift down to start at bit 0
-static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0)
-{
-  return (pick32(val, hi, lo) >> lo);
-}
-
-// mask [hi,lo] and shift down to start at bit 0
-static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0)
-{
-  return (pick64(val, hi, lo) >> lo);
-}
-
-/*
- * decode registers, immediates and constants of various types
- */
-
-static inline GReg greg(u_int32_t val, int lo)
-{
-  return (GReg)pickbits32(val, lo + 4, lo);
-}
-
-static inline VReg vreg(u_int32_t val, int lo)
-{
-  return (VReg)pickbits32(val, lo + 4, lo);
-}
-
-static inline u_int32_t uimm(u_int32_t val, int hi, int lo)
-{
-  return pickbits32(val, hi, lo);
-}
-
-static inline int32_t simm(u_int32_t val, int hi = 31, int lo = 0) {
-  union {
-    u_int32_t u;
-    int32_t n;
-  };
-
-  u = val << (31 - hi);
-  n = n >> (31 - hi + lo);
-  return n;
-}
-
-static inline int64_t simm(u_int64_t val, int hi = 63, int lo = 0) {
-  union {
-    u_int64_t u;
-    int64_t n;
-  };
-
-  u = val << (63 - hi);
-  n = n >> (63 - hi + lo);
-  return n;
-}
-
-static inline Shift shift(u_int32_t val, int lo)
-{
-  return (Shift)pickbits32(val, lo+1, lo);
-}
-
-static inline Extension extension(u_int32_t val, int lo)
-{
-  return (Extension)pickbits32(val, lo+2, lo);
-}
-
-static inline Scaling scaling(u_int32_t val, int lo)
-{
-  return (Scaling)pickbits32(val, lo, lo);
-}
-
-static inline WriteBack writeback(u_int32_t val, int lo)
-{
-  return (WriteBack)pickbits32(val, lo, lo);
-}
-
-static inline CondCode condcode(u_int32_t val, int lo)
-{
-  return (CondCode)pickbits32(val, lo+3, lo);
-}
-
-/*
- * operation decode
- */
-// bits [28,25] are the primary dispatch vector
-
-static inline u_int32_t dispatchGroup(u_int32_t val)
-{
-  return pickshift32(val, 28, 25, 0);
-}
-
-/*
- * the 16 possible values for bits [28,25] identified by tags which
- * map them to the 5 main instruction groups LDST, DPREG, ADVSIMD,
- * BREXSYS and DPIMM.
- *
- * An extra group PSEUDO is included in one of the unallocated ranges
- * for simulator-specific pseudo-instructions.
- */
-enum DispatchGroup {
-  GROUP_PSEUDO_0000,
-  GROUP_UNALLOC_0001,
-  GROUP_UNALLOC_0010,
-  GROUP_UNALLOC_0011,
-  GROUP_LDST_0100,
-  GROUP_DPREG_0101,
-  GROUP_LDST_0110,
-  GROUP_ADVSIMD_0111,
-  GROUP_DPIMM_1000,
-  GROUP_DPIMM_1001,
-  GROUP_BREXSYS_1010,
-  GROUP_BREXSYS_1011,
-  GROUP_LDST_1100,
-  GROUP_DPREG_1101,
-  GROUP_LDST_1110,
-  GROUP_ADVSIMD_1111
-};
-
-// bits [31, 29] of a Pseudo are the secondary dispatch vector
-
-static inline u_int32_t dispatchPseudo(u_int32_t val)
-{
-  return pickshift32(val, 31, 29, 0);
-}
-
-/*
- * the 8 possible values for bits [31,29] in a Pseudo Instruction.
- * Bits [28,25] are always 0000.
- */
-
-enum DispatchPseudo {
-  PSEUDO_UNALLOC_000, // unallocated
-  PSEUDO_UNALLOC_001, // ditto
-  PSEUDO_UNALLOC_010, // ditto
-  PSEUDO_UNALLOC_011, // ditto
-  PSEUDO_UNALLOC_100, // ditto
-  PSEUDO_UNALLOC_101, // ditto
-  PSEUDO_CALLOUT_110, // CALLOUT -- bits [24,0] identify call/ret sig
-  PSEUDO_HALT_111     // HALT -- bits [24, 0] identify halt code
-};
-
-// bits [25, 23] of a DPImm are the secondary dispatch vector
-
-static inline u_int32_t dispatchDPImm(u_int32_t instr)
-{
-  return pickshift32(instr, 25, 23, 0);
-}
-
-/*
- * the 8 possible values for bits [25,23] in a Data Processing Immediate
- * Instruction. Bits [28,25] are always 100_.
- */
-
-enum DispatchDPImm {
-  DPIMM_PCADR_000,  // PC-rel-addressing
-  DPIMM_PCADR_001,  // ditto
-  DPIMM_ADDSUB_010,  // Add/Subtract (immediate)
-  DPIMM_ADDSUB_011, // ditto
-  DPIMM_LOG_100,    // Logical (immediate)
-  DPIMM_MOV_101,    // Move Wide (immediate)
-  DPIMM_BITF_110,   // Bitfield
-  DPIMM_EXTR_111    // Extract
-};
-
-// bits [29,28:26] of a LS are the secondary dispatch vector
-
-static inline u_int32_t dispatchLS(u_int32_t instr)
-{
-  return (pickshift32(instr, 29, 28, 1) |
-          pickshift32(instr, 26, 26, 0));
-}
-
-/*
- * the 8 possible values for bits [29,28:26] in a Load/Store
- * Instruction. Bits [28,25] are always _1_0
- */
-
-enum DispatchLS {
-  LS_EXCL_000,    // Load/store exclusive (includes some unallocated)
-  LS_ADVSIMD_001, // AdvSIMD load/store (various -- includes some unallocated)
-  LS_LIT_010,     // Load register literal (includes some unallocated)
-  LS_LIT_011,     // ditto
-  LS_PAIR_100,    // Load/store register pair (various)
-  LS_PAIR_101,    // ditto
-  LS_OTHER_110,   // other load/store formats
-  LS_OTHER_111    // ditto
-};
-
-// bits [28:24:21] of a DPReg are the secondary dispatch vector
-
-static inline u_int32_t dispatchDPReg(u_int32_t instr)
-{
-  return (pickshift32(instr, 28, 28, 2) |
-          pickshift32(instr, 24, 24, 1) |
-          pickshift32(instr, 21, 21, 0));
-}
-
-/*
- * the 8 possible values for bits [28:24:21] in a Data Processing
- * Register Instruction. Bits [28,25] are always _101
- */
-
-enum DispatchDPReg {
-  DPREG_LOG_000,     // Logical (shifted register)
-  DPREG_LOG_001,     // ditto
-  DPREG_ADDSHF_010,  // Add/subtract (shifted register)
-  DPREG_ADDEXT_011,  // Add/subtract (extended register)
-  DPREG_ADDCOND_100, // Add/subtract (with carry) AND
-                     // Cond compare/select AND
-                     // Data Processing (1/2 source)
-  DPREG_UNALLOC_101, // Unallocated
-  DPREG_3SRC_110, // Data Processing (3 source)
-  DPREG_3SRC_111  // Data Processing (3 source)
-};
-
-// bits [31,29] of a BrExSys are the secondary dispatch vector
-
-static inline u_int32_t dispatchBrExSys(u_int32_t instr)
-{
-  return pickbits32(instr, 31, 29);
-}
-
-/*
- * the 8 possible values for bits [31,29] in a Branch/Exception/System
- * Instruction. Bits [28,25] are always 101_
- */
-
-enum DispatchBr {
-  BR_IMM_000,     // Unconditional branch (immediate)
-  BR_IMMCMP_001,  // Compare & branch (immediate) AND
-                  // Test & branch (immediate)
-  BR_IMMCOND_010, // Conditional branch (immediate) AND Unallocated
-  BR_UNALLOC_011, // Unallocated
-  BR_IMM_100,     // Unconditional branch (immediate)
-  BR_IMMCMP_101,  // Compare & branch (immediate) AND
-                  // Test & branch (immediate)
-  BR_REG_110,     // Unconditional branch (register) AND System AND
-                  // Excn gen AND Unallocated
-  BR_UNALLOC_111  // Unallocated
-};
-
-/*
- * TODO still need to provide secondary decode and dispatch for
- * AdvSIMD Insructions with instr[28,25] = 0111 or 1111
- */
-
-#endif // ifndef DECODE_H
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@ -306,7 +306,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler
  __ push_call_clobbered_registers();
  __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
  __ mov(r0, rscratch2);
-  __ blrt(lr, 1, 0, MacroAssembler::ret_type_integral);
+  __ blr(lr);
  __ mov(rscratch2, r0);
  __ pop_call_clobbered_registers();
  __ mov(dst, rscratch2);
@ -635,7 +635,7 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator
  __ push_call_clobbered_registers();

  __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
-  __ blrt(lr, 1, 0, MacroAssembler::ret_type_integral);
+  __ blr(lr);
  __ mov(rscratch1, r0);
  __ pop_call_clobbered_registers();
  __ mov(r0, rscratch1);
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@ -82,48 +82,6 @@ define_pd_global(bool, ThreadLocalHandshakes, true);
 define_pd_global(intx, InlineSmallCode,          1000);
 #endif

-#ifdef BUILTIN_SIM
-#define UseBuiltinSim           true
-#define ARCH_FLAGS(develop, \
-                   product, \
-                   diagnostic, \
-                   experimental, \
-                   notproduct, \
-                   range, \
-                   constraint, \
-                   writeable) \
-                                                                        \
-  product(bool, NotifySimulator, UseBuiltinSim,                         \
-         "tell the AArch64 sim where we are in method code")            \
-                                                                        \
-  product(bool, UseSimulatorCache, false,                               \
-         "tell sim to cache memory updates until exclusive op occurs")  \
-                                                                        \
-  product(bool, DisableBCCheck, true,                                   \
-          "tell sim not to invoke bccheck callback")                    \
-                                                                        \
-  product(bool, NearCpool, true,                                        \
-         "constant pool is close to instructions")                      \
-                                                                        \
-  product(bool, UseBarriersForVolatile, false,                          \
-          "Use memory barriers to implement volatile accesses")         \
-                                                                        \
-  product(bool, UseCRC32, false,                                        \
-          "Use CRC32 instructions for CRC32 computation")               \
-                                                                        \
-  product(bool, UseLSE, false,                                          \
-          "Use LSE instructions")                                       \
-
-// Don't attempt to use Neon on builtin sim until builtin sim supports it
-#define UseCRC32 false
-#define UseSIMDForMemoryOps    false
-#define AvoidUnalignedAcesses false
-
-#else
-#define UseBuiltinSim           false
-#define NotifySimulator         false
-#define UseSimulatorCache       false
-#define DisableBCCheck          true
 #define ARCH_FLAGS(develop, \
                   product, \
                   diagnostic, \
@ -162,7 +120,5 @@ define_pd_global(intx, InlineSmallCode,          1000);
          "Use prfm hint with specified distance in compiled code."     \
          "Value -1 means off.")                                        \
          range(-1, 4096)
-#endif
-

 #endif // CPU_AARCH64_GLOBALS_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/immediate_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/immediate_aarch64.cpp
@ -23,7 +23,6 @@
 */

 #include <stdlib.h>
-#include "decode_aarch64.hpp"
 #include "immediate_aarch64.hpp"

 // there are at most 2^13 possible logical immediate encodings
@ -69,12 +68,57 @@ static inline u_int64_t ones(int N)
  return (N == 64 ? (u_int64_t)-1UL : ((1UL << N) - 1));
 }

+/*
+ * bit twiddling helpers for instruction decode
+ */
+
+// 32 bit mask with bits [hi,...,lo] set
+static inline u_int32_t mask32(int hi = 31, int lo = 0)
+{
+  int nbits = (hi + 1) - lo;
+  return ((1 << nbits) - 1) << lo;
+}
+
+static inline u_int64_t mask64(int hi = 63, int lo = 0)
+{
+  int nbits = (hi + 1) - lo;
+  return ((1L << nbits) - 1) << lo;
+}
+
+// pick bits [hi,...,lo] from val
+static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0)
+{
+  return (val & mask32(hi, lo));
+}
+
+// pick bits [hi,...,lo] from val
+static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0)
+{
+  return (val & mask64(hi, lo));
+}
+
+// mask [hi,lo] and shift down to start at bit 0
+static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0)
+{
+  return (pick32(val, hi, lo) >> lo);
+}
+
+// mask [hi,lo] and shift down to start at bit 0
+static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0)
+{
+  return (pick64(val, hi, lo) >> lo);
+}
+
 // result<0> to val<N>
 static inline u_int64_t pickbit(u_int64_t val, int N)
 {
  return pickbits64(val, N, N);
 }

+static inline u_int32_t uimm(u_int32_t val, int hi, int lo)
+{
+  return pickbits32(val, hi, lo);
+}

 // SPEC bits(M*N) Replicate(bits(M) x, integer N);
 // this is just an educated guess
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@ -972,17 +972,6 @@ RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_ad
  return RegisterOrConstant(tmp);
 }

-
-void MacroAssembler:: notify(int type) {
-  if (type == bytecode_start) {
-    // set_last_Java_frame(esp, rfp, (address)NULL);
-    Assembler:: notify(type);
-    // reset_last_Java_frame(true);
-  }
-  else
-    Assembler:: notify(type);
-}
-
 // Look up the method for a megamorphic invokeinterface call.
 // The target method is determined by <intf_klass, itable_index>.
 // The receiver klass is in recv_klass.
@ -1425,22 +1414,12 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                       int number_of_arguments,
                                       Label *retaddr) {
-  call_VM_leaf_base1(entry_point, number_of_arguments, 0, ret_type_integral, retaddr);
-}
-
-void MacroAssembler::call_VM_leaf_base1(address entry_point,
-                                        int number_of_gp_arguments,
-                                        int number_of_fp_arguments,
-                                        ret_type type,
-                                        Label *retaddr) {
  Label E, L;

  stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize)));

-  // We add 1 to number_of_arguments because the thread in arg0 is
-  // not counted
  mov(rscratch1, entry_point);
-  blrt(rscratch1, number_of_gp_arguments + 1, number_of_fp_arguments, type);
+  blr(rscratch1);
  if (retaddr)
    bind(*retaddr);

@ -2198,8 +2177,7 @@ void MacroAssembler::stop(const char* msg) {
  mov(c_rarg1, (address)ip);
  mov(c_rarg2, sp);
  mov(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-  // call(c_rarg3);
-  blrt(c_rarg3, 3, 0, 1);
+  blr(c_rarg3);
  hlt(0);
 }

@ -2207,7 +2185,7 @@ void MacroAssembler::warn(const char* msg) {
  pusha();
  mov(c_rarg0, (address)msg);
  mov(lr, CAST_FROM_FN_PTR(address, warning));
-  blrt(lr, 1, 0, MacroAssembler::ret_type_void);
+  blr(lr);
  popa();
 }

@ -2588,50 +2566,6 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
  }
 }

-#ifdef BUILTIN_SIM
-// routine to generate an x86 prolog for a stub function which
-// bootstraps into the generated ARM code which directly follows the
-// stub
-//
-// the argument encodes the number of general and fp registers
-// passed by the caller and the callng convention (currently just
-// the number of general registers and assumes C argument passing)
-
-extern "C" {
-int aarch64_stub_prolog_size();
-void aarch64_stub_prolog();
-void aarch64_prolog();
-}
-
-void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type,
-                                   address *prolog_ptr)
-{
-  int calltype = (((ret_type & 0x3) << 8) |
-                  ((fp_arg_count & 0xf) << 4) |
-                  (gp_arg_count & 0xf));
-
-  // the addresses for the x86 to ARM entry code we need to use
-  address start = pc();
-  // printf("start = %lx\n", start);
-  int byteCount =  aarch64_stub_prolog_size();
-  // printf("byteCount = %x\n", byteCount);
-  int instructionCount = (byteCount + 3)/ 4;
-  // printf("instructionCount = %x\n", instructionCount);
-  for (int i = 0; i < instructionCount; i++) {
-    nop();
-  }
-
-  memcpy(start, (void*)aarch64_stub_prolog, byteCount);
-
-  // write the address of the setup routine and the call format at the
-  // end of into the copied code
-  u_int64_t *patch_end = (u_int64_t *)(start + byteCount);
-  if (prolog_ptr)
-    patch_end[-2] = (u_int64_t)prolog_ptr;
-  patch_end[-1] = calltype;
-}
-#endif
-
 void MacroAssembler::push_call_clobbered_registers() {
  int step = 4 * wordSize;
  push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
@ -5678,7 +5612,6 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,

      mov(result, len); // Save initial len

-#ifndef BUILTIN_SIM
      cmp(len, (u1)8); // handle shortest strings first
      br(LT, LOOP_1);
      cmp(len, (u1)32);
@ -5754,7 +5687,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
      br(GE, NEXT_8);

    BIND(LOOP_1);
-#endif
+
    cbz(len, DONE);
    BIND(NEXT_1);
      ldrh(tmp1, Address(post(src, 2)));
@ -5893,7 +5826,7 @@ void MacroAssembler::get_thread(Register dst) {
  push(saved_regs, sp);

  mov(lr, CAST_FROM_FN_PTR(address, JavaThread::aarch64_get_thread_helper));
-  blrt(lr, 1, 0, 1);
+  blr(lr);
  if (dst != c_rarg0) {
    mov(dst, c_rarg0);
  }
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@ -170,13 +170,10 @@ class MacroAssembler: public Assembler {

  virtual void _call_Unimplemented(address call_site) {
    mov(rscratch2, call_site);
-    haltsim();
  }

 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)

-  virtual void notify(int type);
-
  // aliases defined in AARCH64 spec

  template<class T>
@ -1188,26 +1185,6 @@ public:
  // enum used for aarch64--x86 linkage to define return type of x86 function
  enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};

-#ifdef BUILTIN_SIM
-  void c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type, address *prolog_ptr = NULL);
-#else
-  void c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type) { }
-#endif
-
-  // special version of call_VM_leaf_base needed for aarch64 simulator
-  // where we need to specify both the gp and fp arg counts and the
-  // return type so that the linkage routine from aarch64 to x86 and
-  // back knows which aarch64 registers to copy to x86 registers and
-  // which x86 result register to copy back to an aarch64 register
-
-  void call_VM_leaf_base1(
-    address  entry_point,             // the entry point
-    int      number_of_gp_arguments,  // the number of gp reg arguments to pass
-    int      number_of_fp_arguments,  // the number of fp reg arguments to pass
-    ret_type type,                    // the return type for the call
-    Label*   retaddr = NULL
-  );
-
  void ldr_constant(Register dest, const Address &const_addr) {
    if (NearCpool) {
      ldr(dest, const_addr);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@ -50,10 +50,6 @@
 #include "jvmci/jvmciJavaClasses.hpp"
 #endif

-#ifdef BUILTIN_SIM
-#include "../../../../../../simulator/simulator.hpp"
-#endif
-
 #define __ masm->

 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
@ -342,7 +338,7 @@ static void patch_callers_callsite(MacroAssembler *masm) {
  __ mov(c_rarg0, rmethod);
  __ mov(c_rarg1, lr);
  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
-  __ blrt(rscratch1, 2, 0, 0);
+  __ blr(rscratch1);
  __ maybe_isb();

  __ pop_CPU_state();
@ -662,71 +658,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
  __ br(rscratch1);
 }

-#ifdef BUILTIN_SIM
-static void generate_i2c_adapter_name(char *result, int total_args_passed, const BasicType *sig_bt)
-{
-  strcpy(result, "i2c(");
-  int idx = 4;
-  for (int i = 0; i < total_args_passed; i++) {
-    switch(sig_bt[i]) {
-    case T_BOOLEAN:
-      result[idx++] = 'Z';
-      break;
-    case T_CHAR:
-      result[idx++] = 'C';
-      break;
-    case T_FLOAT:
-      result[idx++] = 'F';
-      break;
-    case T_DOUBLE:
-      assert((i < (total_args_passed - 1)) && (sig_bt[i+1] == T_VOID),
-             "double must be followed by void");
-      i++;
-      result[idx++] = 'D';
-      break;
-    case T_BYTE:
-      result[idx++] = 'B';
-      break;
-    case T_SHORT:
-      result[idx++] = 'S';
-      break;
-    case T_INT:
-      result[idx++] = 'I';
-      break;
-    case T_LONG:
-      assert((i < (total_args_passed - 1)) && (sig_bt[i+1] == T_VOID),
-             "long must be followed by void");
-      i++;
-      result[idx++] = 'L';
-      break;
-    case T_OBJECT:
-      result[idx++] = 'O';
-      break;
-    case T_ARRAY:
-      result[idx++] = '[';
-      break;
-    case T_ADDRESS:
-      result[idx++] = 'P';
-      break;
-    case T_NARROWOOP:
-      result[idx++] = 'N';
-      break;
-    case T_METADATA:
-      result[idx++] = 'M';
-      break;
-    case T_NARROWKLASS:
-      result[idx++] = 'K';
-      break;
-    default:
-      result[idx++] = '?';
-      break;
-    }
-  }
-  result[idx++] = ')';
-  result[idx] = '\0';
-}
-#endif
-
 // ---------------------------------------------------------------
 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
                                                            int total_args_passed,
@ -735,20 +666,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
                                                            const VMRegPair *regs,
                                                            AdapterFingerPrint* fingerprint) {
  address i2c_entry = __ pc();
-#ifdef BUILTIN_SIM
-  char *name = NULL;
-  AArch64Simulator *sim = NULL;
-  size_t len = 65536;
-  if (NotifySimulator) {
-    name = NEW_C_HEAP_ARRAY(char, len, mtInternal);
-  }

-  if (name) {
-    generate_i2c_adapter_name(name, total_args_passed, sig_bt);
-    sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-    sim->notifyCompile(name, i2c_entry);
-  }
-#endif
  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);

  address c2i_unverified_entry = __ pc();
@ -790,15 +708,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm

  address c2i_entry = __ pc();

-#ifdef BUILTIN_SIM
-  if (name) {
-    name[0] = 'c';
-    name[2] = 'i';
-    sim->notifyCompile(name, c2i_entry);
-    FREE_C_HEAP_ARRAY(char, name, mtInternal);
-  }
-#endif
-
  // Class initialization barrier for static methods
  address c2i_no_clinit_check_entry = NULL;
  if (VM_Version::supports_fast_class_init_checks()) {
@ -1219,8 +1128,7 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs,
    assert((unsigned)gpargs < 256, "eek!");
    assert((unsigned)fpargs < 32, "eek!");
    __ lea(rscratch1, RuntimeAddress(dest));
-    if (UseBuiltinSim)   __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
-    __ blrt(rscratch1, rscratch2);
+    __ blr(rscratch1);
    __ maybe_isb();
  }
 }
@ -1341,24 +1249,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                BasicType* in_sig_bt,
                                                VMRegPair* in_regs,
                                                BasicType ret_type) {
-#ifdef BUILTIN_SIM
-  if (NotifySimulator) {
-    // Names are up to 65536 chars long.  UTF8-coded strings are up to
-    // 3 bytes per character.  We concatenate three such strings.
-    // Yes, I know this is ridiculous, but it's debug code and glibc
-    // allocates large arrays very efficiently.
-    size_t len = (65536 * 3) * 3;
-    char *name = new char[len];
-
-    strncpy(name, method()->method_holder()->name()->as_utf8(), len);
-    strncat(name, ".", len);
-    strncat(name, method()->name()->as_utf8(), len);
-    strncat(name, method()->signature()->as_utf8(), len);
-    AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck)->notifyCompile(name, __ pc());
-    delete[] name;
-  }
-#endif
-
  if (method->is_method_handle_intrinsic()) {
    vmIntrinsics::ID iid = method->intrinsic_id();
    intptr_t start = (intptr_t)__ pc();
@ -1623,11 +1513,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  // Frame is now completed as far as size and linkage.
  int frame_complete = ((intptr_t)__ pc()) - start;

-  // record entry into native wrapper code
-  if (NotifySimulator) {
-    __ notify(Assembler::method_entry);
-  }
-
  // We use r20 as the oop handle for the receiver/klass
  // It is callee save so it survives the call to native

@ -2089,11 +1974,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
    __ cbnz(rscratch1, exception_pending);
  }

-  // record exit from native wrapper code
-  if (NotifySimulator) {
-    __ notify(Assembler::method_reentry);
-  }
-
  // We're done
  __ ret(lr);

@ -2208,7 +2088,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
    } else {
      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
    }
-    __ blrt(rscratch1, 1, 0, 1);
+    __ blr(rscratch1);
    __ maybe_isb();
    // Restore any method result value
    restore_native_result(masm, ret_type, stack_slots);
@ -2305,14 +2185,6 @@ void SharedRuntime::generate_deopt_blob() {
  OopMap* map = NULL;
  OopMapSet *oop_maps = new OopMapSet();

-#ifdef BUILTIN_SIM
-  AArch64Simulator *simulator;
-  if (NotifySimulator) {
-    simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-    simulator->notifyCompile(const_cast<char*>("SharedRuntime::deopt_blob"), __ pc());
-  }
-#endif
-
  // -------------
  // This code enters when returning to a de-optimized nmethod.  A return
  // address has been pushed on the the stack, and return values are in
@ -2401,7 +2273,7 @@ void SharedRuntime::generate_deopt_blob() {
    __ lea(rscratch1,
           RuntimeAddress(CAST_FROM_FN_PTR(address,
                                           Deoptimization::uncommon_trap)));
-    __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
+    __ blr(rscratch1);
    __ bind(retaddr);
    oop_maps->add_gc_map( __ pc()-start, map->deep_copy());

@ -2493,7 +2365,7 @@ void SharedRuntime::generate_deopt_blob() {
  __ mov(c_rarg0, rthread);
  __ mov(c_rarg1, rcpool);
  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
-  __ blrt(rscratch1, 1, 0, 1);
+  __ blr(rscratch1);
  __ bind(retaddr);

  // Need to have an oopmap that tells fetch_unroll_info where to
@ -2633,7 +2505,7 @@ void SharedRuntime::generate_deopt_blob() {
  __ mov(c_rarg0, rthread);
  __ movw(c_rarg1, rcpool); // second arg: exec_mode
  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
-  __ blrt(rscratch1, 2, 0, 0);
+  __ blr(rscratch1);

  // Set an oopmap for the call site
  // Use the same PC we used for the last java frame
@ -2666,12 +2538,6 @@ void SharedRuntime::generate_deopt_blob() {
    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
  }
 #endif
-#ifdef BUILTIN_SIM
-  if (NotifySimulator) {
-    unsigned char *base = _deopt_blob->code_begin();
-    simulator->notifyRelocate(start, base - start);
-  }
-#endif
 }

 uint SharedRuntime::out_preserve_stack_slots() {
@ -2687,14 +2553,6 @@ void SharedRuntime::generate_uncommon_trap_blob() {
  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
  MacroAssembler* masm = new MacroAssembler(&buffer);

-#ifdef BUILTIN_SIM
-  AArch64Simulator *simulator;
-  if (NotifySimulator) {
-    simulator = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-    simulator->notifyCompile(const_cast<char*>("SharedRuntime:uncommon_trap_blob"), __ pc());
-  }
-#endif
-
  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");

  address start = __ pc();
@ -2733,7 +2591,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
  __ lea(rscratch1,
         RuntimeAddress(CAST_FROM_FN_PTR(address,
                                         Deoptimization::uncommon_trap)));
-  __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
+  __ blr(rscratch1);
  __ bind(retaddr);

  // Set an oopmap for the call site
@ -2856,7 +2714,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
  __ mov(c_rarg0, rthread);
  __ movw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
-  __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
+  __ blr(rscratch1);

  // Set an oopmap for the call site
  // Use the same PC we used for the last java frame
@ -2876,13 +2734,6 @@ void SharedRuntime::generate_uncommon_trap_blob() {

  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
                                                 SimpleRuntimeFrame::framesize >> 1);
-
-#ifdef BUILTIN_SIM
-  if (NotifySimulator) {
-    unsigned char *base = _deopt_blob->code_begin();
-    simulator->notifyRelocate(start, base - start);
-  }
-#endif
 }
 #endif // COMPILER2_OR_JVMCI

@ -2932,7 +2783,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
  // Do the call
  __ mov(c_rarg0, rthread);
  __ lea(rscratch1, RuntimeAddress(call_ptr));
-  __ blrt(rscratch1, 1, 0, 1);
+  __ blr(rscratch1);
  __ bind(retaddr);

  // Set an oopmap for the call site.  This oopmap will map all
@ -3037,7 +2888,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
    __ mov(c_rarg0, rthread);
    __ lea(rscratch1, RuntimeAddress(destination));

-    __ blrt(rscratch1, 1, 0, 1);
+    __ blr(rscratch1);
    __ bind(retaddr);
  }

@ -3169,7 +3020,7 @@ void OptoRuntime::generate_exception_blob() {
  __ set_last_Java_frame(sp, noreg, the_pc, rscratch1);
  __ mov(c_rarg0, rthread);
  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
-  __ blrt(rscratch1, 1, 0, MacroAssembler::ret_type_integral);
+  __ blr(rscratch1);
  __ maybe_isb();

  // Set an oopmap for the call site.  This oopmap will only be used if we
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@ -50,10 +50,6 @@
 #include "gc/z/zThreadLocalData.hpp"
 #endif

-#ifdef BUILTIN_SIM
-#include "../../../../../../simulator/simulator.hpp"
-#endif
-
 // Declaration and definition of StubGenerator (no .hpp file).
 // For a more detailed description of the stub routine structure
 // see the comment in stubRoutines.hpp
@ -221,16 +217,8 @@ class StubGenerator: public StubCodeGenerator {

    // stub code

-    // we need a C prolog to bootstrap the x86 caller into the sim
-    __ c_stub_prolog(8, 0, MacroAssembler::ret_type_void);
-
    address aarch64_entry = __ pc();

-#ifdef BUILTIN_SIM
-    // Save sender's SP for stack traces.
-    __ mov(rscratch1, sp);
-    __ str(rscratch1, Address(__ pre(sp, -2 * wordSize)));
-#endif
    // set up frame and move sp to end of save area
    __ enter();
    __ sub(sp, rfp, -sp_after_call_off * wordSize);
@ -301,8 +289,6 @@ class StubGenerator: public StubCodeGenerator {
    __ mov(r13, sp);
    __ blr(c_rarg4);

-    // tell the simulator we have returned to the stub
-
    // we do this here because the notify will already have been done
    // if we get to the next instruction via an exception
    //
@ -312,9 +298,6 @@ class StubGenerator: public StubCodeGenerator {
    // pc against the address saved below. so we may need to allow for
    // this extra instruction in the check.

-    if (NotifySimulator) {
-      __ notify(Assembler::method_reentry);
-    }
    // save current address for use by exception handling code

    return_address = __ pc();
@ -377,12 +360,6 @@ class StubGenerator: public StubCodeGenerator {
    __ ldp(c_rarg4, c_rarg5,  entry_point);
    __ ldp(c_rarg6, c_rarg7,  parameter_size);

-#ifndef PRODUCT
-    // tell the simulator we are about to end Java execution
-    if (NotifySimulator) {
-      __ notify(Assembler::method_exit);
-    }
-#endif
    // leave frame and return to caller
    __ leave();
    __ ret(lr);
@ -416,13 +393,6 @@ class StubGenerator: public StubCodeGenerator {
  //
  // r0: exception oop

-  // NOTE: this is used as a target from the signal handler so it
-  // needs an x86 prolog which returns into the current simulator
-  // executing the generated catch_exception code. so the prolog
-  // needs to install rax in a sim register and adjust the sim's
-  // restart pc to enter the generated code at the start position
-  // then return from native to simulated execution.
-
  address generate_catch_exception() {
    StubCodeMark mark(this, "StubRoutines", "catch_exception");
    address start = __ pc();
@ -627,7 +597,7 @@ class StubGenerator: public StubCodeGenerator {
 #endif
    BLOCK_COMMENT("call MacroAssembler::debug");
    __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-    __ blrt(rscratch1, 3, 0, 1);
+    __ blr(rscratch1);

    return start;
  }
@ -1401,12 +1371,6 @@ class StubGenerator: public StubCodeGenerator {
    __ leave();
    __ mov(r0, zr); // return 0
    __ ret(lr);
-#ifdef BUILTIN_SIM
-    {
-      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-      sim->notifyCompile(const_cast<char*>(name), start);
-    }
-#endif
    return start;
  }

@ -1475,12 +1439,6 @@ class StubGenerator: public StubCodeGenerator {
    __ leave();
    __ mov(r0, zr); // return 0
    __ ret(lr);
-#ifdef BUILTIN_SIM
-    {
-      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-      sim->notifyCompile(const_cast<char*>(name), start);
-    }
-#endif
    return start;
 }

@ -3128,7 +3086,6 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

-#ifndef BUILTIN_SIM
  // Safefetch stubs.
  void generate_safefetch(const char* name, int size, address* entry,
                          address* fault_pc, address* continuation_pc) {
@ -3168,7 +3125,6 @@ class StubGenerator: public StubCodeGenerator {
    __ mov(r0, c_rarg1);
    __ ret(lr);
  }
-#endif

  /**
   *  Arguments:
@ -4804,7 +4760,7 @@ class StubGenerator: public StubCodeGenerator {
    __ mov(c_rarg0, rthread);
    BLOCK_COMMENT("call runtime_entry");
    __ mov(rscratch1, runtime_entry);
-    __ blrt(rscratch1, 3 /* number_of_arguments */, 0, 1);
+    __ blr(rscratch1);

    // Generate oop map
    OopMap* map = new OopMap(framesize, 0);
@ -5778,7 +5734,6 @@ class StubGenerator: public StubCodeGenerator {
    }
 #endif // COMPILER2

-#ifndef BUILTIN_SIM
    // generate GHASH intrinsics code
    if (UseGHASHIntrinsics) {
      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
@ -5812,7 +5767,6 @@ class StubGenerator: public StubCodeGenerator {
    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
                                                       &StubRoutines::_safefetchN_fault_pc,
                                                       &StubRoutines::_safefetchN_continuation_pc);
-#endif
    StubRoutines::aarch64::set_completed();
  }

--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
@ -30,13 +30,8 @@
 // definition. See stubRoutines.hpp for a description on how to
 // extend it.

-// n.b. if we are notifying entry/exit to the simulator then the call
-// stub does a notify at normal return placing
-// call_stub_return_address one instruction beyond the notify. the
-// latter address is sued by the stack unwind code when doign an
-// exception return.
 static bool    returns_to_call_stub(address return_pc)   {
-  return return_pc == _call_stub_return_address + (NotifySimulator ? -4 : 0);
+  return return_pc == _call_stub_return_address;
 }

 enum platform_dependent_constants {
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@ -55,10 +55,6 @@
 #include "oops/method.hpp"
 #endif // !PRODUCT

-#ifdef BUILTIN_SIM
-#include "../../../../../../simulator/simulator.hpp"
-#endif
-
 // Size of interpreter code.  Increase if too small.  Interpreter will
 // fail with a guarantee ("not enough space for interpreter generation");
 // if too small.
@ -300,9 +296,8 @@ void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpr
    ShouldNotReachHere();
    fn = NULL;  // unreachable
  }
-  const int gpargs = 0, rtype = 3;
  __ mov(rscratch1, fn);
-  __ blrt(rscratch1, gpargs, fpargs, rtype);
+  __ blr(rscratch1);
 }

 // Abstract method entry
@ -469,13 +464,6 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
  __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3);
  __ andr(sp, rscratch1, -16);

-#ifndef PRODUCT
-  // tell the simulator that the method has been reentered
-  if (NotifySimulator) {
-    __ notify(Assembler::method_reentry);
-  }
-#endif
-
 __ check_and_handle_popframe(rthread);
 __ check_and_handle_earlyret(rthread);

@ -1185,12 +1173,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {

  // initialize fixed part of activation frame
  generate_fixed_frame(true);
-#ifndef PRODUCT
-  // tell the simulator that a method has been entered
-  if (NotifySimulator) {
-    __ notify(Assembler::method_entry);
-  }
-#endif

  // make sure method is native & not abstract
 #ifdef ASSERT
@ -1375,7 +1357,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  __ stlrw(rscratch1, rscratch2);

  // Call the native method.
-  __ blrt(r10, rscratch1);
+  __ blr(r10);
  __ bind(native_return);
  __ maybe_isb();
  __ get_method(rmethod);
@ -1415,7 +1397,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
    //
    __ mov(c_rarg0, rthread);
    __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
-    __ blrt(rscratch2, 1, 0, 0);
+    __ blr(rscratch2);
    __ maybe_isb();
    __ get_method(rmethod);
    __ reinit_heapbase();
@ -1466,7 +1448,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
    __ pusha(); // XXX only save smashed registers
    __ mov(c_rarg0, rthread);
    __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
-    __ blrt(rscratch2, 0, 0, 0);
+    __ blr(rscratch2);
    __ popa(); // XXX only restore smashed registers
    __ bind(no_reguard);
  }
@ -1621,12 +1603,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {

  // initialize fixed part of activation frame
  generate_fixed_frame(false);
-#ifndef PRODUCT
-  // tell the simulator that a method has been entered
-  if (NotifySimulator) {
-    __ notify(Assembler::method_entry);
-  }
-#endif
+
  // make sure method is not native & not abstract
 #ifdef ASSERT
  __ ldrw(r0, access_flags);
@ -1762,13 +1739,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
  __ reinit_heapbase();  // restore rheapbase as heapbase.
  __ get_dispatch();

-#ifndef PRODUCT
-  // tell the simulator that the caller method has been reentered
-  if (NotifySimulator) {
-    __ get_method(rmethod);
-    __ notify(Assembler::method_reentry);
-  }
-#endif
  // Entry point for exceptions thrown within interpreter code
  Interpreter::_throw_exception_entry = __ pc();
  // If we came here via a NullPointerException on the receiver of a
@ -2086,121 +2056,4 @@ void TemplateInterpreterGenerator::stop_interpreter_at() {
  __ pop(rscratch1);
 }

-#ifdef BUILTIN_SIM
-
-#include <sys/mman.h>
-#include <unistd.h>
-
-extern "C" {
-  static int PAGESIZE = getpagesize();
-  int is_mapped_address(u_int64_t address)
-  {
-    address = (address & ~((u_int64_t)PAGESIZE - 1));
-    if (msync((void *)address, PAGESIZE, MS_ASYNC) == 0) {
-      return true;
-    }
-    if (errno != ENOMEM) {
-      return true;
-    }
-    return false;
-  }
-
-  void bccheck1(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode)
-  {
-    if (method != 0) {
-      method[0] = '\0';
-    }
-    if (bcidx != 0) {
-      *bcidx = -2;
-    }
-    if (decode != 0) {
-      decode[0] = 0;
-    }
-
-    if (framesize != 0) {
-      *framesize = -1;
-    }
-
-    if (Interpreter::contains((address)pc)) {
-      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
-      Method* meth;
-      address bcp;
-      if (fp) {
-#define FRAME_SLOT_METHOD 3
-#define FRAME_SLOT_BCP 7
-        meth = (Method*)sim->getMemory()->loadU64(fp - (FRAME_SLOT_METHOD << 3));
-        bcp = (address)sim->getMemory()->loadU64(fp - (FRAME_SLOT_BCP << 3));
-#undef FRAME_SLOT_METHOD
-#undef FRAME_SLOT_BCP
-      } else {
-        meth = (Method*)sim->getCPUState().xreg(RMETHOD, 0);
-        bcp = (address)sim->getCPUState().xreg(RBCP, 0);
-      }
-      if (meth->is_native()) {
-        return;
-      }
-      if(method && meth->is_method()) {
-        ResourceMark rm;
-        method[0] = 'I';
-        method[1] = ' ';
-        meth->name_and_sig_as_C_string(method + 2, 398);
-      }
-      if (bcidx) {
-        if (meth->contains(bcp)) {
-          *bcidx = meth->bci_from(bcp);
-        } else {
-          *bcidx = -2;
-        }
-      }
-      if (decode) {
-        if (!BytecodeTracer::closure()) {
-          BytecodeTracer::set_closure(BytecodeTracer::std_closure());
-        }
-        stringStream str(decode, 400);
-        BytecodeTracer::trace(meth, bcp, &str);
-      }
-    } else {
-      if (method) {
-        CodeBlob *cb = CodeCache::find_blob((address)pc);
-        if (cb != NULL) {
-          if (cb->is_nmethod()) {
-            ResourceMark rm;
-            nmethod* nm = (nmethod*)cb;
-            method[0] = 'C';
-            method[1] = ' ';
-            nm->method()->name_and_sig_as_C_string(method + 2, 398);
-          } else if (cb->is_adapter_blob()) {
-            strcpy(method, "B adapter blob");
-          } else if (cb->is_runtime_stub()) {
-            strcpy(method, "B runtime stub");
-          } else if (cb->is_exception_stub()) {
-            strcpy(method, "B exception stub");
-          } else if (cb->is_deoptimization_stub()) {
-            strcpy(method, "B deoptimization stub");
-          } else if (cb->is_safepoint_stub()) {
-            strcpy(method, "B safepoint stub");
-          } else if (cb->is_uncommon_trap_stub()) {
-            strcpy(method, "B uncommon trap stub");
-          } else if (cb->contains((address)StubRoutines::call_stub())) {
-            strcpy(method, "B call stub");
-          } else {
-            strcpy(method, "B unknown blob : ");
-            strcat(method, cb->name());
-          }
-          if (framesize != NULL) {
-            *framesize = cb->frame_size();
-          }
-        }
-      }
-    }
-  }
-
-
-  JNIEXPORT void bccheck(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode)
-  {
-    bccheck1(pc, fp, method, bcidx, framesize, decode);
-  }
-}
-
-#endif // BUILTIN_SIM
 #endif // !PRODUCT
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@ -1478,8 +1478,7 @@ void TemplateTable::fop2(Operation op)
  case rem:
    __ fmovs(v1, v0);
    __ pop_f(v0);
-    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::frem),
-                         0, 2, MacroAssembler::ret_type_float);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
    break;
  default:
    ShouldNotReachHere();
@ -1511,8 +1510,7 @@ void TemplateTable::dop2(Operation op)
  case rem:
    __ fmovd(v1, v0);
    __ pop_d(v0);
-    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::drem),
-                         0, 2, MacroAssembler::ret_type_double);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
    break;
  default:
    ShouldNotReachHere();
@ -1653,8 +1651,7 @@ void TemplateTable::convert()
    __ fcvtzsw(r0, v0);
    __ get_fpsr(r1);
    __ cbzw(r1, L_Okay);
-    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2i),
-                         0, 1, MacroAssembler::ret_type_integral);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i));
    __ bind(L_Okay);
  }
    break;
@ -1665,8 +1662,7 @@ void TemplateTable::convert()
    __ fcvtzs(r0, v0);
    __ get_fpsr(r1);
    __ cbzw(r1, L_Okay);
-    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2l),
-                         0, 1, MacroAssembler::ret_type_integral);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l));
    __ bind(L_Okay);
  }
    break;
@ -1680,8 +1676,7 @@ void TemplateTable::convert()
    __ fcvtzdw(r0, v0);
    __ get_fpsr(r1);
    __ cbzw(r1, L_Okay);
-    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2i),
-                         0, 1, MacroAssembler::ret_type_integral);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
    __ bind(L_Okay);
  }
    break;
@ -1692,8 +1687,7 @@ void TemplateTable::convert()
    __ fcvtzd(r0, v0);
    __ get_fpsr(r1);
    __ cbzw(r1, L_Okay);
-    __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2l),
-                         0, 1, MacroAssembler::ret_type_integral);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
    __ bind(L_Okay);
  }
    break;
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@ -34,12 +34,8 @@

 #include OS_HEADER_INLINE(os)

-#ifndef BUILTIN_SIM
 #include <sys/auxv.h>
 #include <asm/hwcap.h>
-#else
-#define getauxval(hwcap) 0
-#endif

 #ifndef HWCAP_AES
 #define HWCAP_AES   (1<<3)
@ -92,10 +88,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
 #   define __ _masm->
    address start = __ pc();

-#ifdef BUILTIN_SIM
-    __ c_stub_prolog(1, 0, MacroAssembler::ret_type_void);
-#endif
-
    // void getPsrInfo(VM_Version::PsrInfo* psr_info);

    address entry = __ pc();
--- a/src/hotspot/os_cpu/linux_aarch64/linux_aarch64.S
+++ b/src/hotspot/os_cpu/linux_aarch64/linux_aarch64.S
@ -1,48 +0,0 @@
-//
-// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2014, Red Hat Inc. All rights reserved.
-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-//
-// This code is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License version 2 only, as
-// published by the Free Software Foundation.
-//
-// This code is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-// version 2 for more details (a copy is included in the LICENSE file that
-// accompanied this code).
-//
-// You should have received a copy of the GNU General Public License version
-// 2 along with this work; if not, write to the Free Software Foundation,
-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-// or visit www.oracle.com if you need additional information or have any
-// questions.
-
-#ifdef BUILTIN_SIM
-
-        .globl SafeFetch32, Fetch32PFI, Fetch32Resume
-        .align  16
-        .type   SafeFetch32,@function
-        // Prototype: int SafeFetch32 (int * Adr, int ErrValue) 
-SafeFetch32:
-        movl    %esi, %eax
-Fetch32PFI:
-        movl    (%rdi), %eax
-Fetch32Resume:
-        ret
-
-        .globl SafeFetchN, FetchNPFI, FetchNResume
-        .align  16
-        .type   SafeFetchN,@function
-        // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) 
-SafeFetchN:
-        movq    %rsi, %rax
-FetchNPFI:
-        movq    (%rdi), %rax
-FetchNResume:
-        ret
-
-#endif
--- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
+++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
@ -53,9 +53,6 @@
 #include "utilities/debug.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
-#ifdef BUILTIN_SIM
-#include "../../../../../../simulator/simulator.hpp"
-#endif

 // put OS-includes here
 # include <sys/types.h>
@ -79,14 +76,8 @@
 # include <ucontext.h>
 # include <fpu_control.h>

-#ifdef BUILTIN_SIM
-#define REG_SP REG_RSP
-#define REG_PC REG_RIP
-#define REG_FP REG_RBP
-#else
 #define REG_FP 29
 #define REG_LR 30
-#endif

 NOINLINE address os::current_stack_pointer() {
  return (address)__builtin_frame_address(0);
@ -101,35 +92,19 @@ char* os::non_memory_address_word() {
 }

 address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
-#ifdef BUILTIN_SIM
-  return (address)uc->uc_mcontext.gregs[REG_PC];
-#else
  return (address)uc->uc_mcontext.pc;
-#endif
 }

 void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
-#ifdef BUILTIN_SIM
-  uc->uc_mcontext.gregs[REG_PC] = (intptr_t)pc;
-#else
  uc->uc_mcontext.pc = (intptr_t)pc;
-#endif
 }

 intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
-#ifdef BUILTIN_SIM
-  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
-#else
  return (intptr_t*)uc->uc_mcontext.sp;
-#endif
 }

 intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
-#ifdef BUILTIN_SIM
-  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
-#else
  return (intptr_t*)uc->uc_mcontext.regs[REG_FP];
-#endif
 }

 // For Forte Analyzer AsyncGetCallTrace profiling support - thread
@ -217,11 +192,7 @@ bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t*
 // By default, gcc always saves frame pointer rfp on this stack. This
 // may get turned off by -fomit-frame-pointer.
 frame os::get_sender_for_C_frame(frame* fr) {
-#ifdef BUILTIN_SIM
-  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
-#else
  return frame(fr->link(), fr->link(), fr->sender_pc());
-#endif
 }

 NOINLINE frame os::current_frame() {
@ -237,14 +208,6 @@ NOINLINE frame os::current_frame() {
  }
 }

-// Utility functions
-#ifdef BUILTIN_SIM
-extern "C" void Fetch32PFI () ;
-extern "C" void Fetch32Resume () ;
-extern "C" void FetchNPFI () ;
-extern "C" void FetchNResume () ;
-#endif
-
 extern "C" JNIEXPORT int
 JVM_handle_linux_signal(int sig,
                        siginfo_t* info,
@ -315,21 +278,10 @@ JVM_handle_linux_signal(int sig,
  if (info != NULL && uc != NULL && thread != NULL) {
    pc = (address) os::Linux::ucontext_get_pc(uc);

-#ifdef BUILTIN_SIM
-    if (pc == (address) Fetch32PFI) {
-       uc->uc_mcontext.gregs[REG_PC] = intptr_t(Fetch32Resume) ;
-       return 1 ;
-    }
-    if (pc == (address) FetchNPFI) {
-       uc->uc_mcontext.gregs[REG_PC] = intptr_t (FetchNResume) ;
-       return 1 ;
-    }
-#else
    if (StubRoutines::is_safefetch_fault(pc)) {
      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
      return 1;
    }
-#endif

    address addr = (address) info->si_addr;

@ -543,40 +495,10 @@ void os::print_context(outputStream *st, const void *context) {

  const ucontext_t *uc = (const ucontext_t*)context;
  st->print_cr("Registers:");
-#ifdef BUILTIN_SIM
-  st->print(  "RAX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RAX]);
-  st->print(", RBX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RBX]);
-  st->print(", RCX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RCX]);
-  st->print(", RDX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RDX]);
-  st->cr();
-  st->print(  "RSP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RSP]);
-  st->print(", RBP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RBP]);
-  st->print(", RSI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RSI]);
-  st->print(", RDI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RDI]);
-  st->cr();
-  st->print(  "R8 =" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R8]);
-  st->print(", R9 =" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R9]);
-  st->print(", R10=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R10]);
-  st->print(", R11=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R11]);
-  st->cr();
-  st->print(  "R12=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R12]);
-  st->print(", R13=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R13]);
-  st->print(", R14=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R14]);
-  st->print(", R15=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R15]);
-  st->cr();
-  st->print(  "RIP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RIP]);
-  st->print(", EFLAGS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EFL]);
-  st->print(", CSGSFS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_CSGSFS]);
-  st->print(", ERR=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ERR]);
-  st->cr();
-  st->print("  TRAPNO=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_TRAPNO]);
-  st->cr();
-#else
  for (int r = 0; r < 31; r++) {
    st->print("R%-2d=", r);
    print_location(st, uc->uc_mcontext.regs[r]);
  }
-#endif
  st->cr();

  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
@ -606,27 +528,8 @@ void os::print_register_info(outputStream *st, const void *context) {

  // this is only for the "general purpose" registers

-#ifdef BUILTIN_SIM
-  st->print("RAX="); print_location(st, uc->uc_mcontext.gregs[REG_RAX]);
-  st->print("RBX="); print_location(st, uc->uc_mcontext.gregs[REG_RBX]);
-  st->print("RCX="); print_location(st, uc->uc_mcontext.gregs[REG_RCX]);
-  st->print("RDX="); print_location(st, uc->uc_mcontext.gregs[REG_RDX]);
-  st->print("RSP="); print_location(st, uc->uc_mcontext.gregs[REG_RSP]);
-  st->print("RBP="); print_location(st, uc->uc_mcontext.gregs[REG_RBP]);
-  st->print("RSI="); print_location(st, uc->uc_mcontext.gregs[REG_RSI]);
-  st->print("RDI="); print_location(st, uc->uc_mcontext.gregs[REG_RDI]);
-  st->print("R8 ="); print_location(st, uc->uc_mcontext.gregs[REG_R8]);
-  st->print("R9 ="); print_location(st, uc->uc_mcontext.gregs[REG_R9]);
-  st->print("R10="); print_location(st, uc->uc_mcontext.gregs[REG_R10]);
-  st->print("R11="); print_location(st, uc->uc_mcontext.gregs[REG_R11]);
-  st->print("R12="); print_location(st, uc->uc_mcontext.gregs[REG_R12]);
-  st->print("R13="); print_location(st, uc->uc_mcontext.gregs[REG_R13]);
-  st->print("R14="); print_location(st, uc->uc_mcontext.gregs[REG_R14]);
-  st->print("R15="); print_location(st, uc->uc_mcontext.gregs[REG_R15]);
-#else
  for (int r = 0; r < 31; r++)
    st->print_cr(  "R%d=" INTPTR_FORMAT, r, (uintptr_t)uc->uc_mcontext.regs[r]);
-#endif
  st->cr();
 }

--- a/src/hotspot/os_cpu/linux_aarch64/prefetch_linux_aarch64.inline.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/prefetch_linux_aarch64.inline.hpp
@ -30,17 +30,13 @@


 inline void Prefetch::read (void *loc, intx interval) {
-#ifndef BUILTIN_SIM
  if (interval >= 0)
    asm("prfm PLDL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval));
-#endif
 }

 inline void Prefetch::write(void *loc, intx interval) {
-#ifndef BUILTIN_SIM
  if (interval >= 0)
    asm("prfm PSTL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval));
-#endif
 }

 #endif // OS_CPU_LINUX_AARCH64_PREFETCH_LINUX_AARCH64_INLINE_HPP