8241436: C2: Factor out C2-specific code from MacroAssembler

Reviewed-by: mdoerr, kvn, adinn
2026-04-29 00:02:34 +00:00 · 2020-03-27 13:42:57 +03:00 · 2020-03-27 13:42:57 +03:00 · 536e062a56
commit 536e062a56
parent 1dd60a35d1
33 changed files with 6696 additions and 6305 deletions
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -1545,7 +1545,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 #endif

 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ brk(0);
 }

@ -1562,7 +1562,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 #endif

  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    for (int i = 0; i < _count; i++) {
      __ nop();
    }
@ -1622,7 +1622,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {

 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  // n.b. frame size includes space for return pc and rfp
  const long framesize = C->output()->frame_size_in_bytes();
@ -1704,7 +1704,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {

 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  int framesize = C->output()->frame_slots() << LogBytesPerInt;

  __ remove_frame(framesize);
@ -1806,7 +1806,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
    uint ireg = ideal_reg();
    assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
    if (cbuf) {
-      MacroAssembler _masm(cbuf);
+      C2_MacroAssembler _masm(cbuf);
      assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
        // stack->stack
@ -1834,7 +1834,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
      }
    }
  } else if (cbuf) {
-    MacroAssembler _masm(cbuf);
+    C2_MacroAssembler _masm(cbuf);
    switch (src_lo_rc) {
    case rc_int:
      if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
@ -1842,7 +1842,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
            __ mov(as_Register(Matcher::_regEncode[dst_lo]),
                   as_Register(Matcher::_regEncode[src_lo]));
        } else {
-            MacroAssembler _masm(cbuf);
+            C2_MacroAssembler _masm(cbuf);
            __ movw(as_Register(Matcher::_regEncode[dst_lo]),
                    as_Register(Matcher::_regEncode[src_lo]));
        }
@ -1952,7 +1952,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 #endif

 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg    = ra_->get_encode(this);
@ -1991,7 +1991,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 {
  // This is the unverified entry point.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  __ cmp_klass(j_rarg0, rscratch2, rscratch1);
  Label skip;
@ -2018,7 +2018,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
  // br rscratch1
  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
@ -2036,7 +2036,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
 {
  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
@ -2403,7 +2403,7 @@ void Compile::reshape_address(AddPNode* addp) {


 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
-  MacroAssembler _masm(&cbuf);                                          \
+  C2_MacroAssembler _masm(&cbuf);                                       \
  {                                                                     \
    guarantee(INDEX == -1, "mode not permitted for volatile");          \
    guarantee(DISP == 0, "mode not permitted for volatile");            \
@ -2448,7 +2448,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
  // Used for all non-volatile memory accesses.  The use of
  // $mem->opcode() to discover whether this pattern uses sign-extended
  // offsets is something of a kludge.
-  static void loadStore(MacroAssembler masm, mem_insn insn,
+  static void loadStore(C2_MacroAssembler masm, mem_insn insn,
                        Register reg, int opcode,
                        Register base, int index, int scale, int disp,
                        int size_in_memory)
@ -2467,7 +2467,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
    (masm.*insn)(reg, addr);
  }

-  static void loadStore(MacroAssembler masm, mem_float_insn insn,
+  static void loadStore(C2_MacroAssembler masm, mem_float_insn insn,
                        FloatRegister reg, int opcode,
                        Register base, int index, int size, int disp,
                        int size_in_memory)
@ -2498,7 +2498,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
    }
  }

-  static void loadStore(MacroAssembler masm, mem_vector_insn insn,
+  static void loadStore(C2_MacroAssembler masm, mem_vector_insn insn,
                        FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
                        int opcode, Register base, int index, int size, int disp)
  {
@ -2551,7 +2551,7 @@ encode %{

  // catch all for unimplemented encodings
  enc_class enc_unimplemented %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ unimplemented("C2 catch all");
  %}

@ -2561,7 +2561,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
  %}

@ -2569,7 +2569,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
  %}

@ -2577,7 +2577,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
  %}

@ -2585,7 +2585,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
  %}

@ -2593,7 +2593,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
  %}

@ -2601,7 +2601,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
  %}

@ -2609,7 +2609,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
  %}

@ -2617,7 +2617,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
  %}

@ -2625,7 +2625,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}

@ -2633,7 +2633,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}

@ -2641,7 +2641,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}

@ -2649,7 +2649,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
    Register dst_reg = as_Register($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
  %}

@ -2657,7 +2657,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}

@ -2665,7 +2665,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
  %}

@ -2673,14 +2673,14 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
    Register src_reg = as_Register($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
  %}

  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strb0(memory1 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
  %}
@ -2689,14 +2689,14 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
    Register src_reg = as_Register($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
  %}

  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strh0(memory2 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
  %}
@ -2705,14 +2705,14 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
    Register src_reg = as_Register($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}

  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strw0(memory4 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}
@ -2724,19 +2724,19 @@ encode %{
    // we sometimes get asked to store the stack pointer into the
    // current thread -- we cannot do that directly on AArch64
    if (src_reg == r31_sp) {
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
      __ mov(rscratch2, sp);
      src_reg = rscratch2;
    }
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
  %}

  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_str0(memory8 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
  %}
@ -2745,7 +2745,7 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
    FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
  %}

@ -2753,14 +2753,14 @@ encode %{
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
    FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
  %}

  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strw_immn(immN src, memory1 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    address con = (address)$src$$constant;
    // need to do this the hard way until we can manage relocs
    // for 32 bit constants
@ -2773,7 +2773,7 @@ encode %{
  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strw_immnk(immN src, memory4 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    address con = (address)$src$$constant;
    // need to do this the hard way until we can manage relocs
    // for 32 bit constants
@ -2786,7 +2786,7 @@ encode %{
  // This encoding class is generated automatically from ad_encode.m4.
  // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
  enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      __ membar(Assembler::StoreStore);
      loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@ -2797,37 +2797,37 @@ encode %{
  // Vector loads and stores
  enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
  %}

  enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
  %}

  enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
  %}

  enc_class aarch64_enc_strvS(vecD src, memory mem) %{
    FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
  %}

  enc_class aarch64_enc_strvD(vecD src, memory mem) %{
    FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
  %}

  enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
    FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
       $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
  %}

@ -2929,7 +2929,7 @@ encode %{
    // we sometimes get asked to store the stack pointer into the
    // current thread -- we cannot do that directly on AArch64
    if (src_reg == r31_sp) {
-        MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
      __ mov(rscratch2, sp);
      src_reg = rscratch2;
@ -2940,7 +2940,7 @@ encode %{

  enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
    {
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      FloatRegister src_reg = as_FloatRegister($src$$reg);
      __ fmovs(rscratch2, src_reg);
    }
@ -2950,7 +2950,7 @@ encode %{

  enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
    {
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      FloatRegister src_reg = as_FloatRegister($src$$reg);
      __ fmovd(rscratch2, src_reg);
    }
@ -2961,7 +2961,7 @@ encode %{
  // synchronized read/update encodings

  enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register base = as_Register($mem$$base);
    int index = $mem$$index;
@ -2990,7 +2990,7 @@ encode %{
  %}

  enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register src_reg = as_Register($src$$reg);
    Register base = as_Register($mem$$base);
    int index = $mem$$index;
@ -3020,7 +3020,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::xword, /*acquire*/ false, /*release*/ true,
@ -3028,7 +3028,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::word, /*acquire*/ false, /*release*/ true,
@ -3036,7 +3036,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::halfword, /*acquire*/ false, /*release*/ true,
@ -3044,7 +3044,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::byte, /*acquire*/ false, /*release*/ true,
@ -3057,7 +3057,7 @@ encode %{
  // CompareAndSwap sequence to serve as a barrier on acquiring a
  // lock.
  enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::xword, /*acquire*/ true, /*release*/ true,
@ -3065,7 +3065,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::word, /*acquire*/ true, /*release*/ true,
@ -3073,7 +3073,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::halfword, /*acquire*/ true, /*release*/ true,
@ -3081,7 +3081,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
               Assembler::byte, /*acquire*/ true, /*release*/ true,
@ -3090,7 +3090,7 @@ encode %{

  // auxiliary used for CompareAndSwapX to set result register
  enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register res_reg = as_Register($res$$reg);
    __ cset(res_reg, Assembler::EQ);
  %}
@ -3098,7 +3098,7 @@ encode %{
  // prefetch encodings

  enc_class aarch64_enc_prefetchw(memory mem) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register base = as_Register($mem$$base);
    int index = $mem$$index;
    int scale = $mem$$scale;
@ -3119,7 +3119,7 @@ encode %{
  /// mov envcodings

  enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    u_int32_t con = (u_int32_t)$src$$constant;
    Register dst_reg = as_Register($dst$$reg);
    if (con == 0) {
@ -3130,7 +3130,7 @@ encode %{
  %}

  enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    u_int64_t con = (u_int64_t)$src$$constant;
    if (con == 0) {
@ -3141,7 +3141,7 @@ encode %{
  %}

  enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    address con = (address)$src$$constant;
    if (con == NULL || con == (address)1) {
@ -3166,19 +3166,19 @@ encode %{
  %}

  enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    __ mov(dst_reg, zr);
  %}

  enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    __ mov(dst_reg, (u_int64_t)1);
  %}

  enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    address page = (address)$src$$constant;
    Register dst_reg = as_Register($dst$$reg);
    unsigned long off;
@ -3187,12 +3187,12 @@ encode %{
  %}

  enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ load_byte_map_base($dst$$Register);
  %}

  enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    address con = (address)$src$$constant;
    if (con == NULL) {
@ -3205,13 +3205,13 @@ encode %{
  %}

  enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    __ mov(dst_reg, zr);
  %}

  enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    address con = (address)$src$$constant;
    if (con == NULL) {
@ -3226,7 +3226,7 @@ encode %{
  // arithmetic encodings

  enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src_reg = as_Register($src1$$reg);
    int32_t con = (int32_t)$src2$$constant;
@ -3240,7 +3240,7 @@ encode %{
  %}

  enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src_reg = as_Register($src1$$reg);
    int32_t con = (int32_t)$src2$$constant;
@ -3254,7 +3254,7 @@ encode %{
  %}

  enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
   Register dst_reg = as_Register($dst$$reg);
   Register src1_reg = as_Register($src1$$reg);
   Register src2_reg = as_Register($src2$$reg);
@ -3262,7 +3262,7 @@ encode %{
  %}

  enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
   Register dst_reg = as_Register($dst$$reg);
   Register src1_reg = as_Register($src1$$reg);
   Register src2_reg = as_Register($src2$$reg);
@ -3270,7 +3270,7 @@ encode %{
  %}

  enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
   Register dst_reg = as_Register($dst$$reg);
   Register src1_reg = as_Register($src1$$reg);
   Register src2_reg = as_Register($src2$$reg);
@ -3278,7 +3278,7 @@ encode %{
  %}

  enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
   Register dst_reg = as_Register($dst$$reg);
   Register src1_reg = as_Register($src1$$reg);
   Register src2_reg = as_Register($src2$$reg);
@ -3288,14 +3288,14 @@ encode %{
  // compare instruction encodings

  enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg1 = as_Register($src1$$reg);
    Register reg2 = as_Register($src2$$reg);
    __ cmpw(reg1, reg2);
  %}

  enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg = as_Register($src1$$reg);
    int32_t val = $src2$$constant;
    if (val >= 0) {
@ -3306,7 +3306,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg1 = as_Register($src1$$reg);
    u_int32_t val = (u_int32_t)$src2$$constant;
    __ movw(rscratch1, val);
@ -3314,14 +3314,14 @@ encode %{
  %}

  enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg1 = as_Register($src1$$reg);
    Register reg2 = as_Register($src2$$reg);
    __ cmp(reg1, reg2);
  %}

  enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg = as_Register($src1$$reg);
    int64_t val = $src2$$constant;
    if (val >= 0) {
@ -3336,7 +3336,7 @@ encode %{
  %}

  enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg1 = as_Register($src1$$reg);
    u_int64_t val = (u_int64_t)$src2$$constant;
    __ mov(rscratch1, val);
@ -3344,45 +3344,45 @@ encode %{
  %}

  enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg1 = as_Register($src1$$reg);
    Register reg2 = as_Register($src2$$reg);
    __ cmp(reg1, reg2);
  %}

  enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg1 = as_Register($src1$$reg);
    Register reg2 = as_Register($src2$$reg);
    __ cmpw(reg1, reg2);
  %}

  enc_class aarch64_enc_testp(iRegP src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg = as_Register($src$$reg);
    __ cmp(reg, zr);
  %}

  enc_class aarch64_enc_testn(iRegN src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register reg = as_Register($src$$reg);
    __ cmpw(reg, zr);
  %}

  enc_class aarch64_enc_b(label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label *L = $lbl$$label;
    __ b(*L);
  %}

  enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label *L = $lbl$$label;
    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
  %}

  enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label *L = $lbl$$label;
    __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
  %}
@ -3395,7 +3395,7 @@ encode %{
     Register result_reg = as_Register($result$$reg);

     Label miss;
-     MacroAssembler _masm(&cbuf);
+     C2_MacroAssembler _masm(&cbuf);
     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
                                     NULL, &miss,
                                     /*set_cond_codes:*/ true);
@ -3406,7 +3406,7 @@ encode %{
  %}

  enc_class aarch64_enc_java_static_call(method meth) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    address addr = (address)$meth$$method;
    address call;
@ -3433,7 +3433,7 @@ encode %{
  %}

  enc_class aarch64_enc_java_dynamic_call(method meth) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int method_index = resolved_method_index(cbuf);
    address call = __ ic_call((address)$meth$$method, method_index);
    if (call == NULL) {
@ -3443,7 +3443,7 @@ encode %{
  %}

  enc_class aarch64_enc_call_epilog() %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    if (VerifyStackAtCalls) {
      // Check that stack depth is unchanged: find majik cookie on stack
      __ call_Unimplemented();
@ -3451,7 +3451,7 @@ encode %{
  %}

  enc_class aarch64_enc_java_to_runtime(method meth) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    // some calls to generated routines (arraycopy code) are scheduled
    // by C2 as runtime calls. if so we can call them using a br (they
@ -3478,23 +3478,23 @@ encode %{
  %}

  enc_class aarch64_enc_rethrow() %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
  %}

  enc_class aarch64_enc_ret() %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ ret(lr);
  %}

  enc_class aarch64_enc_tail_call(iRegP jump_target) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register target_reg = as_Register($jump_target$$reg);
    __ br(target_reg);
  %}

  enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register target_reg = as_Register($jump_target$$reg);
    // exception oop should be in r0
    // ret addr has been popped into lr
@ -3504,7 +3504,7 @@ encode %{
  %}

  enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register oop = as_Register($object$$reg);
    Register box = as_Register($box$$reg);
    Register disp_hdr = as_Register($tmp$$reg);
@ -3582,7 +3582,7 @@ encode %{
  %}

  enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register oop = as_Register($object$$reg);
    Register box = as_Register($box$$reg);
    Register disp_hdr = as_Register($tmp$$reg);
@ -8075,7 +8075,7 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
            "mov    $dst, $tmp\t# vector (1D)" %}
  ins_encode %{
    FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
              as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
@ -8118,7 +8118,7 @@ instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
            "mov    $dst, $tmp\t# vector (1D)" %}
  ins_encode %{
    FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
-    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
+    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
              as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "opto/intrinsicnode.hpp"
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
+
+// Search for str1 in str2 and return index or -1
+void C2_MacroAssembler::string_indexof(Register str2, Register str1,
+                                       Register cnt2, Register cnt1,
+                                       Register tmp1, Register tmp2,
+                                       Register tmp3, Register tmp4,
+                                       Register tmp5, Register tmp6,
+                                       int icnt1, Register result, int ae) {
+  // NOTE: tmp5, tmp6 can be zr depending on specific method version
+  Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
+
+  Register ch1 = rscratch1;
+  Register ch2 = rscratch2;
+  Register cnt1tmp = tmp1;
+  Register cnt2tmp = tmp2;
+  Register cnt1_neg = cnt1;
+  Register cnt2_neg = cnt2;
+  Register result_tmp = tmp4;
+
+  bool isL = ae == StrIntrinsicNode::LL;
+
+  bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+  bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+  int str1_chr_shift = str1_isL ? 0:1;
+  int str2_chr_shift = str2_isL ? 0:1;
+  int str1_chr_size = str1_isL ? 1:2;
+  int str2_chr_size = str2_isL ? 1:2;
+  chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
+                                      (chr_insn)&MacroAssembler::ldrh;
+  chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
+                                      (chr_insn)&MacroAssembler::ldrh;
+  chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
+  chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
+
+  // Note, inline_string_indexOf() generates checks:
+  // if (substr.count > string.count) return -1;
+  // if (substr.count == 0) return 0;
+
+  // We have two strings, a source string in str2, cnt2 and a pattern string
+  // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
+
+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
+  // With a small pattern and source we use linear scan.
+
+  if (icnt1 == -1) {
+    sub(result_tmp, cnt2, cnt1);
+    cmp(cnt1, (u1)8);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
+    br(LT, LINEARSEARCH);
+    dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
+    subs(zr, cnt1, 256);
+    lsr(tmp1, cnt2, 2);
+    ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
+    br(GE, LINEARSTUB);
+  }
+
+// The Boyer Moore alogorithm is based on the description here:-
+//
+// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
+//
+// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
+// and the 'Good Suffix' rule.
+//
+// These rules are essentially heuristics for how far we can shift the
+// pattern along the search string.
+//
+// The implementation here uses the 'Bad Character' rule only because of the
+// complexity of initialisation for the 'Good Suffix' rule.
+//
+// This is also known as the Boyer-Moore-Horspool algorithm:-
+//
+// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
+//
+// This particular implementation has few java-specific optimizations.
+//
+// #define ASIZE 256
+//
+//    int bm(unsigned char *x, int m, unsigned char *y, int n) {
+//       int i, j;
+//       unsigned c;
+//       unsigned char bc[ASIZE];
+//
+//       /* Preprocessing */
+//       for (i = 0; i < ASIZE; ++i)
+//          bc[i] = m;
+//       for (i = 0; i < m - 1; ) {
+//          c = x[i];
+//          ++i;
+//          // c < 256 for Latin1 string, so, no need for branch
+//          #ifdef PATTERN_STRING_IS_LATIN1
+//          bc[c] = m - i;
+//          #else
+//          if (c < ASIZE) bc[c] = m - i;
+//          #endif
+//       }
+//
+//       /* Searching */
+//       j = 0;
+//       while (j <= n - m) {
+//          c = y[i+j];
+//          if (x[m-1] == c)
+//            for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
+//          if (i < 0) return j;
+//          // c < 256 for Latin1 string, so, no need for branch
+//          #ifdef SOURCE_STRING_IS_LATIN1
+//          // LL case: (c< 256) always true. Remove branch
+//          j += bc[y[j+m-1]];
+//          #endif
+//          #ifndef PATTERN_STRING_IS_UTF
+//          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
+//          if (c < ASIZE)
+//            j += bc[y[j+m-1]];
+//          else
+//            j += 1
+//          #endif
+//          #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
+//          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
+//          if (c < ASIZE)
+//            j += bc[y[j+m-1]];
+//          else
+//            j += m
+//          #endif
+//       }
+//    }
+
+  if (icnt1 == -1) {
+    Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
+        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
+    Register cnt1end = tmp2;
+    Register str2end = cnt2;
+    Register skipch = tmp2;
+
+    // str1 length is >=8, so, we can read at least 1 register for cases when
+    // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
+    // UL case. We'll re-read last character in inner pre-loop code to have
+    // single outer pre-loop load
+    const int firstStep = isL ? 7 : 3;
+
+    const int ASIZE = 256;
+    const int STORED_BYTES = 32; // amount of bytes stored per instruction
+    sub(sp, sp, ASIZE);
+    mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
+    mov(ch1, sp);
+    BIND(BM_INIT_LOOP);
+      stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
+      subs(tmp5, tmp5, 1);
+      br(GT, BM_INIT_LOOP);
+
+      sub(cnt1tmp, cnt1, 1);
+      mov(tmp5, str2);
+      add(str2end, str2, result_tmp, LSL, str2_chr_shift);
+      sub(ch2, cnt1, 1);
+      mov(tmp3, str1);
+    BIND(BCLOOP);
+      (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
+      if (!str1_isL) {
+        subs(zr, ch1, ASIZE);
+        br(HS, BCSKIP);
+      }
+      strb(ch2, Address(sp, ch1));
+    BIND(BCSKIP);
+      subs(ch2, ch2, 1);
+      br(GT, BCLOOP);
+
+      add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
+      if (str1_isL == str2_isL) {
+        // load last 8 bytes (8LL/4UU symbols)
+        ldr(tmp6, Address(tmp6, -wordSize));
+      } else {
+        ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
+        // convert Latin1 to UTF. We'll have to wait until load completed, but
+        // it's still faster than per-character loads+checks
+        lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
+        ubfx(ch1, tmp6, 8, 8); // str1[N-2]
+        ubfx(ch2, tmp6, 16, 8); // str1[N-3]
+        andr(tmp6, tmp6, 0xFF); // str1[N-4]
+        orr(ch2, ch1, ch2, LSL, 16);
+        orr(tmp6, tmp6, tmp3, LSL, 48);
+        orr(tmp6, tmp6, ch2, LSL, 16);
+      }
+    BIND(BMLOOPSTR2);
+      (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
+      sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
+      if (str1_isL == str2_isL) {
+        // re-init tmp3. It's for free because it's executed in parallel with
+        // load above. Alternative is to initialize it before loop, but it'll
+        // affect performance on in-order systems with 2 or more ld/st pipelines
+        lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
+      }
+      if (!isL) { // UU/UL case
+        lsl(ch2, cnt1tmp, 1); // offset in bytes
+      }
+      cmp(tmp3, skipch);
+      br(NE, BMSKIP);
+      ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
+      mov(ch1, tmp6);
+      if (isL) {
+        b(BMLOOPSTR1_AFTER_LOAD);
+      } else {
+        sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
+        b(BMLOOPSTR1_CMP);
+      }
+    BIND(BMLOOPSTR1);
+      (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
+      (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
+    BIND(BMLOOPSTR1_AFTER_LOAD);
+      subs(cnt1tmp, cnt1tmp, 1);
+      br(LT, BMLOOPSTR1_LASTCMP);
+    BIND(BMLOOPSTR1_CMP);
+      cmp(ch1, ch2);
+      br(EQ, BMLOOPSTR1);
+    BIND(BMSKIP);
+      if (!isL) {
+        // if we've met UTF symbol while searching Latin1 pattern, then we can
+        // skip cnt1 symbols
+        if (str1_isL != str2_isL) {
+          mov(result_tmp, cnt1);
+        } else {
+          mov(result_tmp, 1);
+        }
+        subs(zr, skipch, ASIZE);
+        br(HS, BMADV);
+      }
+      ldrb(result_tmp, Address(sp, skipch)); // load skip distance
+    BIND(BMADV);
+      sub(cnt1tmp, cnt1, 1);
+      add(str2, str2, result_tmp, LSL, str2_chr_shift);
+      cmp(str2, str2end);
+      br(LE, BMLOOPSTR2);
+      add(sp, sp, ASIZE);
+      b(NOMATCH);
+    BIND(BMLOOPSTR1_LASTCMP);
+      cmp(ch1, ch2);
+      br(NE, BMSKIP);
+    BIND(BMMATCH);
+      sub(result, str2, tmp5);
+      if (!str2_isL) lsr(result, result, 1);
+      add(sp, sp, ASIZE);
+      b(DONE);
+
+    BIND(LINEARSTUB);
+    cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
+    br(LT, LINEAR_MEDIUM);
+    mov(result, zr);
+    RuntimeAddress stub = NULL;
+    if (isL) {
+      stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
+      assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
+    } else if (str1_isL) {
+      stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
+       assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
+    } else {
+      stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
+      assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
+    }
+    trampoline_call(stub);
+    b(DONE);
+  }
+
+  BIND(LINEARSEARCH);
+  {
+    Label DO1, DO2, DO3;
+
+    Register str2tmp = tmp2;
+    Register first = tmp3;
+
+    if (icnt1 == -1)
+    {
+        Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+
+        cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
+        br(LT, DOSHORT);
+      BIND(LINEAR_MEDIUM);
+        (this->*str1_load_1chr)(first, Address(str1));
+        lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
+        sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
+        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
+
+      BIND(FIRST_LOOP);
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
+        cmp(first, ch2);
+        br(EQ, STR1_LOOP);
+      BIND(STR2_NEXT);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
+        br(LE, FIRST_LOOP);
+        b(NOMATCH);
+
+      BIND(STR1_LOOP);
+        adds(cnt1tmp, cnt1_neg, str1_chr_size);
+        add(cnt2tmp, cnt2_neg, str2_chr_size);
+        br(GE, MATCH);
+
+      BIND(STR1_NEXT);
+        (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
+        cmp(ch1, ch2);
+        br(NE, STR2_NEXT);
+        adds(cnt1tmp, cnt1tmp, str1_chr_size);
+        add(cnt2tmp, cnt2tmp, str2_chr_size);
+        br(LT, STR1_NEXT);
+        b(MATCH);
+
+      BIND(DOSHORT);
+      if (str1_isL == str2_isL) {
+        cmp(cnt1, (u1)2);
+        br(LT, DO1);
+        br(GT, DO3);
+      }
+    }
+
+    if (icnt1 == 4) {
+      Label CH1_LOOP;
+
+        (this->*load_4chr)(ch1, str1);
+        sub(result_tmp, cnt2, 4);
+        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
+
+      BIND(CH1_LOOP);
+        (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
+        cmp(ch1, ch2);
+        br(EQ, MATCH);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
+        br(LE, CH1_LOOP);
+        b(NOMATCH);
+      }
+
+    if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
+      Label CH1_LOOP;
+
+      BIND(DO2);
+        (this->*load_2chr)(ch1, str1);
+        if (icnt1 == 2) {
+          sub(result_tmp, cnt2, 2);
+        }
+        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
+      BIND(CH1_LOOP);
+        (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
+        cmp(ch1, ch2);
+        br(EQ, MATCH);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
+        br(LE, CH1_LOOP);
+        b(NOMATCH);
+    }
+
+    if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
+      Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
+
+      BIND(DO3);
+        (this->*load_2chr)(first, str1);
+        (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
+        if (icnt1 == 3) {
+          sub(result_tmp, cnt2, 3);
+        }
+        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
+      BIND(FIRST_LOOP);
+        (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
+        cmpw(first, ch2);
+        br(EQ, STR1_LOOP);
+      BIND(STR2_NEXT);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
+        br(LE, FIRST_LOOP);
+        b(NOMATCH);
+
+      BIND(STR1_LOOP);
+        add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
+        cmp(ch1, ch2);
+        br(NE, STR2_NEXT);
+        b(MATCH);
+    }
+
+    if (icnt1 == -1 || icnt1 == 1) {
+      Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
+
+      BIND(DO1);
+        (this->*str1_load_1chr)(ch1, str1);
+        cmp(cnt2, (u1)8);
+        br(LT, DO1_SHORT);
+
+        sub(result_tmp, cnt2, 8/str2_chr_size);
+        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
+        mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
+        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
+
+        if (str2_isL) {
+          orr(ch1, ch1, ch1, LSL, 8);
+        }
+        orr(ch1, ch1, ch1, LSL, 16);
+        orr(ch1, ch1, ch1, LSL, 32);
+      BIND(CH1_LOOP);
+        ldr(ch2, Address(str2, cnt2_neg));
+        eor(ch2, ch1, ch2);
+        sub(tmp1, ch2, tmp3);
+        orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
+        bics(tmp1, tmp1, tmp2);
+        br(NE, HAS_ZERO);
+        adds(cnt2_neg, cnt2_neg, 8);
+        br(LT, CH1_LOOP);
+
+        cmp(cnt2_neg, (u1)8);
+        mov(cnt2_neg, 0);
+        br(LT, CH1_LOOP);
+        b(NOMATCH);
+
+      BIND(HAS_ZERO);
+        rev(tmp1, tmp1);
+        clz(tmp1, tmp1);
+        add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
+        b(MATCH);
+
+      BIND(DO1_SHORT);
+        mov(result_tmp, cnt2);
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
+      BIND(DO1_LOOP);
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
+        cmpw(ch1, ch2);
+        br(EQ, MATCH);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
+        br(LT, DO1_LOOP);
+    }
+  }
+  BIND(NOMATCH);
+    mov(result, -1);
+    b(DONE);
+  BIND(MATCH);
+    add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
+  BIND(DONE);
+}
+
+typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
+typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
+
+void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
+                                            Register ch, Register result,
+                                            Register tmp1, Register tmp2, Register tmp3)
+{
+  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
+  Register cnt1_neg = cnt1;
+  Register ch1 = rscratch1;
+  Register result_tmp = rscratch2;
+
+  cbz(cnt1, NOMATCH);
+
+  cmp(cnt1, (u1)4);
+  br(LT, DO1_SHORT);
+
+  orr(ch, ch, ch, LSL, 16);
+  orr(ch, ch, ch, LSL, 32);
+
+  sub(cnt1, cnt1, 4);
+  mov(result_tmp, cnt1);
+  lea(str1, Address(str1, cnt1, Address::uxtw(1)));
+  sub(cnt1_neg, zr, cnt1, LSL, 1);
+
+  mov(tmp3, 0x0001000100010001);
+
+  BIND(CH1_LOOP);
+    ldr(ch1, Address(str1, cnt1_neg));
+    eor(ch1, ch, ch1);
+    sub(tmp1, ch1, tmp3);
+    orr(tmp2, ch1, 0x7fff7fff7fff7fff);
+    bics(tmp1, tmp1, tmp2);
+    br(NE, HAS_ZERO);
+    adds(cnt1_neg, cnt1_neg, 8);
+    br(LT, CH1_LOOP);
+
+    cmp(cnt1_neg, (u1)8);
+    mov(cnt1_neg, 0);
+    br(LT, CH1_LOOP);
+    b(NOMATCH);
+
+  BIND(HAS_ZERO);
+    rev(tmp1, tmp1);
+    clz(tmp1, tmp1);
+    add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
+    b(MATCH);
+
+  BIND(DO1_SHORT);
+    mov(result_tmp, cnt1);
+    lea(str1, Address(str1, cnt1, Address::uxtw(1)));
+    sub(cnt1_neg, zr, cnt1, LSL, 1);
+  BIND(DO1_LOOP);
+    ldrh(ch1, Address(str1, cnt1_neg));
+    cmpw(ch, ch1);
+    br(EQ, MATCH);
+    adds(cnt1_neg, cnt1_neg, 2);
+    br(LT, DO1_LOOP);
+  BIND(NOMATCH);
+    mov(result, -1);
+    b(DONE);
+  BIND(MATCH);
+    add(result, result_tmp, cnt1_neg, ASR, 1);
+  BIND(DONE);
+}
+
+// Compare strings.
+void C2_MacroAssembler::string_compare(Register str1, Register str2,
+    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
+    FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
+      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
+      SHORT_LOOP_START, TAIL_CHECK;
+
+  bool isLL = ae == StrIntrinsicNode::LL;
+  bool isLU = ae == StrIntrinsicNode::LU;
+  bool isUL = ae == StrIntrinsicNode::UL;
+
+  // The stub threshold for LL strings is: 72 (64 + 8) chars
+  // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
+  // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
+  const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
+
+  bool str1_isL = isLL || isLU;
+  bool str2_isL = isLL || isUL;
+
+  int str1_chr_shift = str1_isL ? 0 : 1;
+  int str2_chr_shift = str2_isL ? 0 : 1;
+  int str1_chr_size = str1_isL ? 1 : 2;
+  int str2_chr_size = str2_isL ? 1 : 2;
+  int minCharsInWord = isLL ? wordSize : wordSize/2;
+
+  FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
+  chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
+                                      (chr_insn)&MacroAssembler::ldrh;
+  chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
+                                      (chr_insn)&MacroAssembler::ldrh;
+  uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
+                            (uxt_insn)&MacroAssembler::uxthw;
+
+  BLOCK_COMMENT("string_compare {");
+
+  // Bizzarely, the counts are passed in bytes, regardless of whether they
+  // are L or U strings, however the result is always in characters.
+  if (!str1_isL) asrw(cnt1, cnt1, 1);
+  if (!str2_isL) asrw(cnt2, cnt2, 1);
+
+  // Compute the minimum of the string lengths and save the difference.
+  subsw(result, cnt1, cnt2);
+  cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
+
+  // A very short string
+  cmpw(cnt2, minCharsInWord);
+  br(Assembler::LE, SHORT_STRING);
+
+  // Compare longwords
+  // load first parts of strings and finish initialization while loading
+  {
+    if (str1_isL == str2_isL) { // LL or UU
+      ldr(tmp1, Address(str1));
+      cmp(str1, str2);
+      br(Assembler::EQ, DONE);
+      ldr(tmp2, Address(str2));
+      cmp(cnt2, stub_threshold);
+      br(GE, STUB);
+      subsw(cnt2, cnt2, minCharsInWord);
+      br(EQ, TAIL_CHECK);
+      lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
+      lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
+      sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
+    } else if (isLU) {
+      ldrs(vtmp, Address(str1));
+      ldr(tmp2, Address(str2));
+      cmp(cnt2, stub_threshold);
+      br(GE, STUB);
+      subw(cnt2, cnt2, 4);
+      eor(vtmpZ, T16B, vtmpZ, vtmpZ);
+      lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
+      lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
+      zip1(vtmp, T8B, vtmp, vtmpZ);
+      sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
+      sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
+      add(cnt1, cnt1, 4);
+      fmovd(tmp1, vtmp);
+    } else { // UL case
+      ldr(tmp1, Address(str1));
+      ldrs(vtmp, Address(str2));
+      cmp(cnt2, stub_threshold);
+      br(GE, STUB);
+      subw(cnt2, cnt2, 4);
+      lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
+      eor(vtmpZ, T16B, vtmpZ, vtmpZ);
+      lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
+      sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
+      zip1(vtmp, T8B, vtmp, vtmpZ);
+      sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
+      add(cnt1, cnt1, 8);
+      fmovd(tmp2, vtmp);
+    }
+    adds(cnt2, cnt2, isUL ? 4 : 8);
+    br(GE, TAIL);
+    eor(rscratch2, tmp1, tmp2);
+    cbnz(rscratch2, DIFFERENCE);
+    // main loop
+    bind(NEXT_WORD);
+    if (str1_isL == str2_isL) {
+      ldr(tmp1, Address(str1, cnt2));
+      ldr(tmp2, Address(str2, cnt2));
+      adds(cnt2, cnt2, 8);
+    } else if (isLU) {
+      ldrs(vtmp, Address(str1, cnt1));
+      ldr(tmp2, Address(str2, cnt2));
+      add(cnt1, cnt1, 4);
+      zip1(vtmp, T8B, vtmp, vtmpZ);
+      fmovd(tmp1, vtmp);
+      adds(cnt2, cnt2, 8);
+    } else { // UL
+      ldrs(vtmp, Address(str2, cnt2));
+      ldr(tmp1, Address(str1, cnt1));
+      zip1(vtmp, T8B, vtmp, vtmpZ);
+      add(cnt1, cnt1, 8);
+      fmovd(tmp2, vtmp);
+      adds(cnt2, cnt2, 4);
+    }
+    br(GE, TAIL);
+
+    eor(rscratch2, tmp1, tmp2);
+    cbz(rscratch2, NEXT_WORD);
+    b(DIFFERENCE);
+    bind(TAIL);
+    eor(rscratch2, tmp1, tmp2);
+    cbnz(rscratch2, DIFFERENCE);
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+    if (str1_isL == str2_isL) {
+      ldr(tmp1, Address(str1));
+      ldr(tmp2, Address(str2));
+    } else if (isLU) {
+      ldrs(vtmp, Address(str1));
+      ldr(tmp2, Address(str2));
+      zip1(vtmp, T8B, vtmp, vtmpZ);
+      fmovd(tmp1, vtmp);
+    } else { // UL
+      ldrs(vtmp, Address(str2));
+      ldr(tmp1, Address(str1));
+      zip1(vtmp, T8B, vtmp, vtmpZ);
+      fmovd(tmp2, vtmp);
+    }
+    bind(TAIL_CHECK);
+    eor(rscratch2, tmp1, tmp2);
+    cbz(rscratch2, DONE);
+
+    // Find the first different characters in the longwords and
+    // compute their difference.
+    bind(DIFFERENCE);
+    rev(rscratch2, rscratch2);
+    clz(rscratch2, rscratch2);
+    andr(rscratch2, rscratch2, isLL ? -8 : -16);
+    lsrv(tmp1, tmp1, rscratch2);
+    (this->*ext_chr)(tmp1, tmp1);
+    lsrv(tmp2, tmp2, rscratch2);
+    (this->*ext_chr)(tmp2, tmp2);
+    subw(result, tmp1, tmp2);
+    b(DONE);
+  }
+
+  bind(STUB);
+    RuntimeAddress stub = NULL;
+    switch(ae) {
+      case StrIntrinsicNode::LL:
+        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
+        break;
+      case StrIntrinsicNode::UU:
+        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
+        break;
+      case StrIntrinsicNode::LU:
+        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
+        break;
+      case StrIntrinsicNode::UL:
+        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
+        break;
+      default:
+        ShouldNotReachHere();
+     }
+    assert(stub.target() != NULL, "compare_long_string stub has not been generated");
+    trampoline_call(stub);
+    b(DONE);
+
+  bind(SHORT_STRING);
+  // Is the minimum length zero?
+  cbz(cnt2, DONE);
+  // arrange code to do most branches while loading and loading next characters
+  // while comparing previous
+  (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
+  subs(cnt2, cnt2, 1);
+  br(EQ, SHORT_LAST_INIT);
+  (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
+  b(SHORT_LOOP_START);
+  bind(SHORT_LOOP);
+  subs(cnt2, cnt2, 1);
+  br(EQ, SHORT_LAST);
+  bind(SHORT_LOOP_START);
+  (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
+  (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
+  cmp(tmp1, cnt1);
+  br(NE, SHORT_LOOP_TAIL);
+  subs(cnt2, cnt2, 1);
+  br(EQ, SHORT_LAST2);
+  (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
+  (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
+  cmp(tmp2, rscratch1);
+  br(EQ, SHORT_LOOP);
+  sub(result, tmp2, rscratch1);
+  b(DONE);
+  bind(SHORT_LOOP_TAIL);
+  sub(result, tmp1, cnt1);
+  b(DONE);
+  bind(SHORT_LAST2);
+  cmp(tmp2, rscratch1);
+  br(EQ, DONE);
+  sub(result, tmp2, rscratch1);
+
+  b(DONE);
+  bind(SHORT_LAST_INIT);
+  (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
+  bind(SHORT_LAST);
+  cmp(tmp1, cnt1);
+  br(EQ, DONE);
+  sub(result, tmp1, cnt1);
+
+  bind(DONE);
+
+  BLOCK_COMMENT("} string_compare");
+}
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+ public:
+
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      Register tmp1, Register tmp2, FloatRegister vtmp1,
+                      FloatRegister vtmp2, FloatRegister vtmp3, int ae);
+
+  void string_indexof(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      Register tmp1, Register tmp2,
+                      Register tmp3, Register tmp4,
+                      Register tmp5, Register tmp6,
+                      int int_cnt1, Register result, int ae);
+
+  void string_indexof_char(Register str1, Register cnt1,
+                           Register ch, Register result,
+                           Register tmp1, Register tmp2, Register tmp3);
+
+#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@ -54,7 +54,6 @@
 #ifdef COMPILER2
 #include "oops/oop.hpp"
 #include "opto/compile.hpp"
-#include "opto/intrinsicnode.hpp"
 #include "opto/node.hpp"
 #include "opto/output.hpp"
 #endif
@ -4414,737 +4413,6 @@ void MacroAssembler::remove_frame(int framesize) {
  }
 }

-#ifdef COMPILER2
-typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
-
-// Search for str1 in str2 and return index or -1
-void MacroAssembler::string_indexof(Register str2, Register str1,
-                                    Register cnt2, Register cnt1,
-                                    Register tmp1, Register tmp2,
-                                    Register tmp3, Register tmp4,
-                                    Register tmp5, Register tmp6,
-                                    int icnt1, Register result, int ae) {
-  // NOTE: tmp5, tmp6 can be zr depending on specific method version
-  Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
-
-  Register ch1 = rscratch1;
-  Register ch2 = rscratch2;
-  Register cnt1tmp = tmp1;
-  Register cnt2tmp = tmp2;
-  Register cnt1_neg = cnt1;
-  Register cnt2_neg = cnt2;
-  Register result_tmp = tmp4;
-
-  bool isL = ae == StrIntrinsicNode::LL;
-
-  bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-  bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-  int str1_chr_shift = str1_isL ? 0:1;
-  int str2_chr_shift = str2_isL ? 0:1;
-  int str1_chr_size = str1_isL ? 1:2;
-  int str2_chr_size = str2_isL ? 1:2;
-  chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
-                                      (chr_insn)&MacroAssembler::ldrh;
-  chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
-                                      (chr_insn)&MacroAssembler::ldrh;
-  chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
-  chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
-
-  // Note, inline_string_indexOf() generates checks:
-  // if (substr.count > string.count) return -1;
-  // if (substr.count == 0) return 0;
-
-  // We have two strings, a source string in str2, cnt2 and a pattern string
-  // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
-
-  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-  // With a small pattern and source we use linear scan.
-
-  if (icnt1 == -1) {
-    sub(result_tmp, cnt2, cnt1);
-    cmp(cnt1, (u1)8);             // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
-    br(LT, LINEARSEARCH);
-    dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
-    subs(zr, cnt1, 256);
-    lsr(tmp1, cnt2, 2);
-    ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
-    br(GE, LINEARSTUB);
-  }
-
-// The Boyer Moore alogorithm is based on the description here:-
-//
-// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-//
-// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-// and the 'Good Suffix' rule.
-//
-// These rules are essentially heuristics for how far we can shift the
-// pattern along the search string.
-//
-// The implementation here uses the 'Bad Character' rule only because of the
-// complexity of initialisation for the 'Good Suffix' rule.
-//
-// This is also known as the Boyer-Moore-Horspool algorithm:-
-//
-// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-//
-// This particular implementation has few java-specific optimizations.
-//
-// #define ASIZE 256
-//
-//    int bm(unsigned char *x, int m, unsigned char *y, int n) {
-//       int i, j;
-//       unsigned c;
-//       unsigned char bc[ASIZE];
-//
-//       /* Preprocessing */
-//       for (i = 0; i < ASIZE; ++i)
-//          bc[i] = m;
-//       for (i = 0; i < m - 1; ) {
-//          c = x[i];
-//          ++i;
-//          // c < 256 for Latin1 string, so, no need for branch
-//          #ifdef PATTERN_STRING_IS_LATIN1
-//          bc[c] = m - i;
-//          #else
-//          if (c < ASIZE) bc[c] = m - i;
-//          #endif
-//       }
-//
-//       /* Searching */
-//       j = 0;
-//       while (j <= n - m) {
-//          c = y[i+j];
-//          if (x[m-1] == c)
-//            for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
-//          if (i < 0) return j;
-//          // c < 256 for Latin1 string, so, no need for branch
-//          #ifdef SOURCE_STRING_IS_LATIN1
-//          // LL case: (c< 256) always true. Remove branch
-//          j += bc[y[j+m-1]];
-//          #endif
-//          #ifndef PATTERN_STRING_IS_UTF
-//          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-//          if (c < ASIZE)
-//            j += bc[y[j+m-1]];
-//          else
-//            j += 1
-//          #endif
-//          #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
-//          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-//          if (c < ASIZE)
-//            j += bc[y[j+m-1]];
-//          else
-//            j += m
-//          #endif
-//       }
-//    }
-
-  if (icnt1 == -1) {
-    Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
-    Register cnt1end = tmp2;
-    Register str2end = cnt2;
-    Register skipch = tmp2;
-
-    // str1 length is >=8, so, we can read at least 1 register for cases when
-    // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-    // UL case. We'll re-read last character in inner pre-loop code to have
-    // single outer pre-loop load
-    const int firstStep = isL ? 7 : 3;
-
-    const int ASIZE = 256;
-    const int STORED_BYTES = 32; // amount of bytes stored per instruction
-    sub(sp, sp, ASIZE);
-    mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
-    mov(ch1, sp);
-    BIND(BM_INIT_LOOP);
-      stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
-      subs(tmp5, tmp5, 1);
-      br(GT, BM_INIT_LOOP);
-
-      sub(cnt1tmp, cnt1, 1);
-      mov(tmp5, str2);
-      add(str2end, str2, result_tmp, LSL, str2_chr_shift);
-      sub(ch2, cnt1, 1);
-      mov(tmp3, str1);
-    BIND(BCLOOP);
-      (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
-      if (!str1_isL) {
-        subs(zr, ch1, ASIZE);
-        br(HS, BCSKIP);
-      }
-      strb(ch2, Address(sp, ch1));
-    BIND(BCSKIP);
-      subs(ch2, ch2, 1);
-      br(GT, BCLOOP);
-
-      add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
-      if (str1_isL == str2_isL) {
-        // load last 8 bytes (8LL/4UU symbols)
-        ldr(tmp6, Address(tmp6, -wordSize));
-      } else {
-        ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
-        // convert Latin1 to UTF. We'll have to wait until load completed, but
-        // it's still faster than per-character loads+checks
-        lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
-        ubfx(ch1, tmp6, 8, 8); // str1[N-2]
-        ubfx(ch2, tmp6, 16, 8); // str1[N-3]
-        andr(tmp6, tmp6, 0xFF); // str1[N-4]
-        orr(ch2, ch1, ch2, LSL, 16);
-        orr(tmp6, tmp6, tmp3, LSL, 48);
-        orr(tmp6, tmp6, ch2, LSL, 16);
-      }
-    BIND(BMLOOPSTR2);
-      (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
-      sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
-      if (str1_isL == str2_isL) {
-        // re-init tmp3. It's for free because it's executed in parallel with
-        // load above. Alternative is to initialize it before loop, but it'll
-        // affect performance on in-order systems with 2 or more ld/st pipelines
-        lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
-      }
-      if (!isL) { // UU/UL case
-        lsl(ch2, cnt1tmp, 1); // offset in bytes
-      }
-      cmp(tmp3, skipch);
-      br(NE, BMSKIP);
-      ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
-      mov(ch1, tmp6);
-      if (isL) {
-        b(BMLOOPSTR1_AFTER_LOAD);
-      } else {
-        sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-        b(BMLOOPSTR1_CMP);
-      }
-    BIND(BMLOOPSTR1);
-      (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
-      (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
-    BIND(BMLOOPSTR1_AFTER_LOAD);
-      subs(cnt1tmp, cnt1tmp, 1);
-      br(LT, BMLOOPSTR1_LASTCMP);
-    BIND(BMLOOPSTR1_CMP);
-      cmp(ch1, ch2);
-      br(EQ, BMLOOPSTR1);
-    BIND(BMSKIP);
-      if (!isL) {
-        // if we've met UTF symbol while searching Latin1 pattern, then we can
-        // skip cnt1 symbols
-        if (str1_isL != str2_isL) {
-          mov(result_tmp, cnt1);
-        } else {
-          mov(result_tmp, 1);
-        }
-        subs(zr, skipch, ASIZE);
-        br(HS, BMADV);
-      }
-      ldrb(result_tmp, Address(sp, skipch)); // load skip distance
-    BIND(BMADV);
-      sub(cnt1tmp, cnt1, 1);
-      add(str2, str2, result_tmp, LSL, str2_chr_shift);
-      cmp(str2, str2end);
-      br(LE, BMLOOPSTR2);
-      add(sp, sp, ASIZE);
-      b(NOMATCH);
-    BIND(BMLOOPSTR1_LASTCMP);
-      cmp(ch1, ch2);
-      br(NE, BMSKIP);
-    BIND(BMMATCH);
-      sub(result, str2, tmp5);
-      if (!str2_isL) lsr(result, result, 1);
-      add(sp, sp, ASIZE);
-      b(DONE);
-
-    BIND(LINEARSTUB);
-    cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
-    br(LT, LINEAR_MEDIUM);
-    mov(result, zr);
-    RuntimeAddress stub = NULL;
-    if (isL) {
-      stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
-      assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-    } else if (str1_isL) {
-      stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
-       assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
-    } else {
-      stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
-      assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
-    }
-    trampoline_call(stub);
-    b(DONE);
-  }
-
-  BIND(LINEARSEARCH);
-  {
-    Label DO1, DO2, DO3;
-
-    Register str2tmp = tmp2;
-    Register first = tmp3;
-
-    if (icnt1 == -1)
-    {
-        Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
-
-        cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
-        br(LT, DOSHORT);
-      BIND(LINEAR_MEDIUM);
-        (this->*str1_load_1chr)(first, Address(str1));
-        lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
-        sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
-        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
-        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
-
-      BIND(FIRST_LOOP);
-        (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
-        cmp(first, ch2);
-        br(EQ, STR1_LOOP);
-      BIND(STR2_NEXT);
-        adds(cnt2_neg, cnt2_neg, str2_chr_size);
-        br(LE, FIRST_LOOP);
-        b(NOMATCH);
-
-      BIND(STR1_LOOP);
-        adds(cnt1tmp, cnt1_neg, str1_chr_size);
-        add(cnt2tmp, cnt2_neg, str2_chr_size);
-        br(GE, MATCH);
-
-      BIND(STR1_NEXT);
-        (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
-        (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
-        cmp(ch1, ch2);
-        br(NE, STR2_NEXT);
-        adds(cnt1tmp, cnt1tmp, str1_chr_size);
-        add(cnt2tmp, cnt2tmp, str2_chr_size);
-        br(LT, STR1_NEXT);
-        b(MATCH);
-
-      BIND(DOSHORT);
-      if (str1_isL == str2_isL) {
-        cmp(cnt1, (u1)2);
-        br(LT, DO1);
-        br(GT, DO3);
-      }
-    }
-
-    if (icnt1 == 4) {
-      Label CH1_LOOP;
-
-        (this->*load_4chr)(ch1, str1);
-        sub(result_tmp, cnt2, 4);
-        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
-        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
-
-      BIND(CH1_LOOP);
-        (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
-        cmp(ch1, ch2);
-        br(EQ, MATCH);
-        adds(cnt2_neg, cnt2_neg, str2_chr_size);
-        br(LE, CH1_LOOP);
-        b(NOMATCH);
-      }
-
-    if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
-      Label CH1_LOOP;
-
-      BIND(DO2);
-        (this->*load_2chr)(ch1, str1);
-        if (icnt1 == 2) {
-          sub(result_tmp, cnt2, 2);
-        }
-        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
-        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
-      BIND(CH1_LOOP);
-        (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
-        cmp(ch1, ch2);
-        br(EQ, MATCH);
-        adds(cnt2_neg, cnt2_neg, str2_chr_size);
-        br(LE, CH1_LOOP);
-        b(NOMATCH);
-    }
-
-    if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
-      Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-
-      BIND(DO3);
-        (this->*load_2chr)(first, str1);
-        (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
-        if (icnt1 == 3) {
-          sub(result_tmp, cnt2, 3);
-        }
-        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
-        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
-      BIND(FIRST_LOOP);
-        (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
-        cmpw(first, ch2);
-        br(EQ, STR1_LOOP);
-      BIND(STR2_NEXT);
-        adds(cnt2_neg, cnt2_neg, str2_chr_size);
-        br(LE, FIRST_LOOP);
-        b(NOMATCH);
-
-      BIND(STR1_LOOP);
-        add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
-        (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
-        cmp(ch1, ch2);
-        br(NE, STR2_NEXT);
-        b(MATCH);
-    }
-
-    if (icnt1 == -1 || icnt1 == 1) {
-      Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
-
-      BIND(DO1);
-        (this->*str1_load_1chr)(ch1, str1);
-        cmp(cnt2, (u1)8);
-        br(LT, DO1_SHORT);
-
-        sub(result_tmp, cnt2, 8/str2_chr_size);
-        sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
-        mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
-        lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
-
-        if (str2_isL) {
-          orr(ch1, ch1, ch1, LSL, 8);
-        }
-        orr(ch1, ch1, ch1, LSL, 16);
-        orr(ch1, ch1, ch1, LSL, 32);
-      BIND(CH1_LOOP);
-        ldr(ch2, Address(str2, cnt2_neg));
-        eor(ch2, ch1, ch2);
-        sub(tmp1, ch2, tmp3);
-        orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
-        bics(tmp1, tmp1, tmp2);
-        br(NE, HAS_ZERO);
-        adds(cnt2_neg, cnt2_neg, 8);
-        br(LT, CH1_LOOP);
-
-        cmp(cnt2_neg, (u1)8);
-        mov(cnt2_neg, 0);
-        br(LT, CH1_LOOP);
-        b(NOMATCH);
-
-      BIND(HAS_ZERO);
-        rev(tmp1, tmp1);
-        clz(tmp1, tmp1);
-        add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
-        b(MATCH);
-
-      BIND(DO1_SHORT);
-        mov(result_tmp, cnt2);
-        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
-        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
-      BIND(DO1_LOOP);
-        (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
-        cmpw(ch1, ch2);
-        br(EQ, MATCH);
-        adds(cnt2_neg, cnt2_neg, str2_chr_size);
-        br(LT, DO1_LOOP);
-    }
-  }
-  BIND(NOMATCH);
-    mov(result, -1);
-    b(DONE);
-  BIND(MATCH);
-    add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
-  BIND(DONE);
-}
-
-typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
-typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
-
-void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-                                         Register ch, Register result,
-                                         Register tmp1, Register tmp2, Register tmp3)
-{
-  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
-  Register cnt1_neg = cnt1;
-  Register ch1 = rscratch1;
-  Register result_tmp = rscratch2;
-
-  cbz(cnt1, NOMATCH);
-
-  cmp(cnt1, (u1)4);
-  br(LT, DO1_SHORT);
-
-  orr(ch, ch, ch, LSL, 16);
-  orr(ch, ch, ch, LSL, 32);
-
-  sub(cnt1, cnt1, 4);
-  mov(result_tmp, cnt1);
-  lea(str1, Address(str1, cnt1, Address::uxtw(1)));
-  sub(cnt1_neg, zr, cnt1, LSL, 1);
-
-  mov(tmp3, 0x0001000100010001);
-
-  BIND(CH1_LOOP);
-    ldr(ch1, Address(str1, cnt1_neg));
-    eor(ch1, ch, ch1);
-    sub(tmp1, ch1, tmp3);
-    orr(tmp2, ch1, 0x7fff7fff7fff7fff);
-    bics(tmp1, tmp1, tmp2);
-    br(NE, HAS_ZERO);
-    adds(cnt1_neg, cnt1_neg, 8);
-    br(LT, CH1_LOOP);
-
-    cmp(cnt1_neg, (u1)8);
-    mov(cnt1_neg, 0);
-    br(LT, CH1_LOOP);
-    b(NOMATCH);
-
-  BIND(HAS_ZERO);
-    rev(tmp1, tmp1);
-    clz(tmp1, tmp1);
-    add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
-    b(MATCH);
-
-  BIND(DO1_SHORT);
-    mov(result_tmp, cnt1);
-    lea(str1, Address(str1, cnt1, Address::uxtw(1)));
-    sub(cnt1_neg, zr, cnt1, LSL, 1);
-  BIND(DO1_LOOP);
-    ldrh(ch1, Address(str1, cnt1_neg));
-    cmpw(ch, ch1);
-    br(EQ, MATCH);
-    adds(cnt1_neg, cnt1_neg, 2);
-    br(LT, DO1_LOOP);
-  BIND(NOMATCH);
-    mov(result, -1);
-    b(DONE);
-  BIND(MATCH);
-    add(result, result_tmp, cnt1_neg, ASR, 1);
-  BIND(DONE);
-}
-
-// Compare strings.
-void MacroAssembler::string_compare(Register str1, Register str2,
-    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-    FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
-  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-      SHORT_LOOP_START, TAIL_CHECK;
-
-  bool isLL = ae == StrIntrinsicNode::LL;
-  bool isLU = ae == StrIntrinsicNode::LU;
-  bool isUL = ae == StrIntrinsicNode::UL;
-
-  // The stub threshold for LL strings is: 72 (64 + 8) chars
-  // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
-  // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
-  const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
-
-  bool str1_isL = isLL || isLU;
-  bool str2_isL = isLL || isUL;
-
-  int str1_chr_shift = str1_isL ? 0 : 1;
-  int str2_chr_shift = str2_isL ? 0 : 1;
-  int str1_chr_size = str1_isL ? 1 : 2;
-  int str2_chr_size = str2_isL ? 1 : 2;
-  int minCharsInWord = isLL ? wordSize : wordSize/2;
-
-  FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
-  chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
-                                      (chr_insn)&MacroAssembler::ldrh;
-  chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
-                                      (chr_insn)&MacroAssembler::ldrh;
-  uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
-                            (uxt_insn)&MacroAssembler::uxthw;
-
-  BLOCK_COMMENT("string_compare {");
-
-  // Bizzarely, the counts are passed in bytes, regardless of whether they
-  // are L or U strings, however the result is always in characters.
-  if (!str1_isL) asrw(cnt1, cnt1, 1);
-  if (!str2_isL) asrw(cnt2, cnt2, 1);
-
-  // Compute the minimum of the string lengths and save the difference.
-  subsw(result, cnt1, cnt2);
-  cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
-
-  // A very short string
-  cmpw(cnt2, minCharsInWord);
-  br(Assembler::LE, SHORT_STRING);
-
-  // Compare longwords
-  // load first parts of strings and finish initialization while loading
-  {
-    if (str1_isL == str2_isL) { // LL or UU
-      ldr(tmp1, Address(str1));
-      cmp(str1, str2);
-      br(Assembler::EQ, DONE);
-      ldr(tmp2, Address(str2));
-      cmp(cnt2, stub_threshold);
-      br(GE, STUB);
-      subsw(cnt2, cnt2, minCharsInWord);
-      br(EQ, TAIL_CHECK);
-      lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
-      lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
-      sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
-    } else if (isLU) {
-      ldrs(vtmp, Address(str1));
-      ldr(tmp2, Address(str2));
-      cmp(cnt2, stub_threshold);
-      br(GE, STUB);
-      subw(cnt2, cnt2, 4);
-      eor(vtmpZ, T16B, vtmpZ, vtmpZ);
-      lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
-      lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
-      zip1(vtmp, T8B, vtmp, vtmpZ);
-      sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
-      sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
-      add(cnt1, cnt1, 4);
-      fmovd(tmp1, vtmp);
-    } else { // UL case
-      ldr(tmp1, Address(str1));
-      ldrs(vtmp, Address(str2));
-      cmp(cnt2, stub_threshold);
-      br(GE, STUB);
-      subw(cnt2, cnt2, 4);
-      lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
-      eor(vtmpZ, T16B, vtmpZ, vtmpZ);
-      lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
-      sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
-      zip1(vtmp, T8B, vtmp, vtmpZ);
-      sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
-      add(cnt1, cnt1, 8);
-      fmovd(tmp2, vtmp);
-    }
-    adds(cnt2, cnt2, isUL ? 4 : 8);
-    br(GE, TAIL);
-    eor(rscratch2, tmp1, tmp2);
-    cbnz(rscratch2, DIFFERENCE);
-    // main loop
-    bind(NEXT_WORD);
-    if (str1_isL == str2_isL) {
-      ldr(tmp1, Address(str1, cnt2));
-      ldr(tmp2, Address(str2, cnt2));
-      adds(cnt2, cnt2, 8);
-    } else if (isLU) {
-      ldrs(vtmp, Address(str1, cnt1));
-      ldr(tmp2, Address(str2, cnt2));
-      add(cnt1, cnt1, 4);
-      zip1(vtmp, T8B, vtmp, vtmpZ);
-      fmovd(tmp1, vtmp);
-      adds(cnt2, cnt2, 8);
-    } else { // UL
-      ldrs(vtmp, Address(str2, cnt2));
-      ldr(tmp1, Address(str1, cnt1));
-      zip1(vtmp, T8B, vtmp, vtmpZ);
-      add(cnt1, cnt1, 8);
-      fmovd(tmp2, vtmp);
-      adds(cnt2, cnt2, 4);
-    }
-    br(GE, TAIL);
-
-    eor(rscratch2, tmp1, tmp2);
-    cbz(rscratch2, NEXT_WORD);
-    b(DIFFERENCE);
-    bind(TAIL);
-    eor(rscratch2, tmp1, tmp2);
-    cbnz(rscratch2, DIFFERENCE);
-    // Last longword.  In the case where length == 4 we compare the
-    // same longword twice, but that's still faster than another
-    // conditional branch.
-    if (str1_isL == str2_isL) {
-      ldr(tmp1, Address(str1));
-      ldr(tmp2, Address(str2));
-    } else if (isLU) {
-      ldrs(vtmp, Address(str1));
-      ldr(tmp2, Address(str2));
-      zip1(vtmp, T8B, vtmp, vtmpZ);
-      fmovd(tmp1, vtmp);
-    } else { // UL
-      ldrs(vtmp, Address(str2));
-      ldr(tmp1, Address(str1));
-      zip1(vtmp, T8B, vtmp, vtmpZ);
-      fmovd(tmp2, vtmp);
-    }
-    bind(TAIL_CHECK);
-    eor(rscratch2, tmp1, tmp2);
-    cbz(rscratch2, DONE);
-
-    // Find the first different characters in the longwords and
-    // compute their difference.
-    bind(DIFFERENCE);
-    rev(rscratch2, rscratch2);
-    clz(rscratch2, rscratch2);
-    andr(rscratch2, rscratch2, isLL ? -8 : -16);
-    lsrv(tmp1, tmp1, rscratch2);
-    (this->*ext_chr)(tmp1, tmp1);
-    lsrv(tmp2, tmp2, rscratch2);
-    (this->*ext_chr)(tmp2, tmp2);
-    subw(result, tmp1, tmp2);
-    b(DONE);
-  }
-
-  bind(STUB);
-    RuntimeAddress stub = NULL;
-    switch(ae) {
-      case StrIntrinsicNode::LL:
-        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
-        break;
-      case StrIntrinsicNode::UU:
-        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
-        break;
-      case StrIntrinsicNode::LU:
-        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
-        break;
-      case StrIntrinsicNode::UL:
-        stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
-        break;
-      default:
-        ShouldNotReachHere();
-     }
-    assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-    trampoline_call(stub);
-    b(DONE);
-
-  bind(SHORT_STRING);
-  // Is the minimum length zero?
-  cbz(cnt2, DONE);
-  // arrange code to do most branches while loading and loading next characters
-  // while comparing previous
-  (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
-  subs(cnt2, cnt2, 1);
-  br(EQ, SHORT_LAST_INIT);
-  (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
-  b(SHORT_LOOP_START);
-  bind(SHORT_LOOP);
-  subs(cnt2, cnt2, 1);
-  br(EQ, SHORT_LAST);
-  bind(SHORT_LOOP_START);
-  (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
-  (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
-  cmp(tmp1, cnt1);
-  br(NE, SHORT_LOOP_TAIL);
-  subs(cnt2, cnt2, 1);
-  br(EQ, SHORT_LAST2);
-  (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
-  (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
-  cmp(tmp2, rscratch1);
-  br(EQ, SHORT_LOOP);
-  sub(result, tmp2, rscratch1);
-  b(DONE);
-  bind(SHORT_LOOP_TAIL);
-  sub(result, tmp1, cnt1);
-  b(DONE);
-  bind(SHORT_LAST2);
-  cmp(tmp2, rscratch1);
-  br(EQ, DONE);
-  sub(result, tmp2, rscratch1);
-
-  b(DONE);
-  bind(SHORT_LAST_INIT);
-  (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
-  bind(SHORT_LAST);
-  cmp(tmp1, cnt1);
-  br(EQ, DONE);
-  sub(result, tmp1, cnt1);
-
-  bind(DONE);
-
-  BLOCK_COMMENT("} string_compare");
-}
-#endif // COMPILER2

 // This method checks if provided byte array contains byte with highest bit set.
 void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@ -1227,11 +1227,6 @@ public:
        Register table0, Register table1, Register table2, Register table3,
        bool upper = false);

-  void string_compare(Register str1, Register str2,
-                      Register cnt1, Register cnt2, Register result,
-                      Register tmp1, Register tmp2, FloatRegister vtmp1,
-                      FloatRegister vtmp2, FloatRegister vtmp3, int ae);
-
  void has_negatives(Register ary1, Register len, Register result);

  void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
@ -1260,15 +1255,6 @@ public:
                        Register len, Register result,
                        FloatRegister Vtmp1, FloatRegister Vtmp2,
                        FloatRegister Vtmp3, FloatRegister Vtmp4);
-  void string_indexof(Register str1, Register str2,
-                      Register cnt1, Register cnt2,
-                      Register tmp1, Register tmp2,
-                      Register tmp3, Register tmp4,
-                      Register tmp5, Register tmp6,
-                      int int_cnt1, Register result, int ae);
-  void string_indexof_char(Register str1, Register cnt1,
-                           Register ch, Register result,
-                           Register tmp1, Register tmp2, Register tmp3);
  void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
                FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
                FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@ -137,7 +137,7 @@ bool SafePointNode::needs_polling_address_input() {

 // emit an interrupt that is caught by the debugger (for debugging compiler)
 void emit_break(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ breakpoint();
 }

@ -157,7 +157,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {


 void emit_nop(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ nop();
 }

@ -165,7 +165,7 @@ void emit_nop(CodeBuffer &cbuf) {
 void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
  int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
  int call_site_offset = cbuf.insts()->mark_off();
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
  address target = (address)m->method();
  assert(n->as_MachCall()->entry_point() == target, "sanity");
@ -212,7 +212,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  Compile* C = ra_->C;
  ConstantTable& constant_table = C->output()->constant_table();
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  Register r = as_Register(ra_->get_encode(this));
  CodeSection* consts_section = __ code()->consts();
@ -269,7 +269,7 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {

 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  for (int i = 0; i < OptoPrologueNops; i++) {
    __ nop();
@ -339,7 +339,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 #endif

 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Compile* C = ra_->C;

  size_t framesize = C->output()->frame_size_in_bytes();
@ -429,7 +429,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
  // Bailout only for real instruction emit.
  // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )

-  MacroAssembler _masm(cbuf);
+  C2_MacroAssembler _masm(cbuf);

  // --------------------------------------
  // Check for mem-mem move.  Load into unused float registers and fall into
@ -790,7 +790,7 @@ void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
 #endif

 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  for(int i = 0; i < _count; i += 1) {
    __ nop();
  }
@ -811,7 +811,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 #endif

 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_encode(this);
  Register dst = reg_to_register_object(reg);
@ -847,7 +847,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 #endif

 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Register iCache  = reg_to_register_object(Matcher::inline_cache_reg_encode());
  assert(iCache == Ricklass, "should be");
  Register receiver = R0;
@ -866,7 +866,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {

 // Emit exception handler code.
 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
@ -889,7 +889,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
  // Can't use any of the current frame's registers as we may have deopted
  // at a poll and everything can be live.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) {
@ -1280,7 +1280,7 @@ encode %{
    // preserve mark
    address mark = cbuf.insts()->mark();
    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int ret_addr_offset = as_MachCall()->ret_addr_offset();
    __ adr(LR, mark + ret_addr_offset);
    __ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset()));
@ -1294,7 +1294,7 @@ encode %{
    // preserve mark
    address mark = cbuf.insts()->mark();
    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    // FP is preserved across all calls, even compiled calls.
    // Use it to preserve SP in places where the callee might change the SP.
    __ mov(Rmh_SP_save, SP);
@ -1305,12 +1305,12 @@ encode %{
  %}

  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(SP, Rmh_SP_save);
  %}

  enc_class Java_Dynamic_Call (method meth) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
    assert(R8_ic_reg == Ricklass, "should be");
    __ set_inst_mark();
@ -1338,7 +1338,7 @@ encode %{
        val |= (val << bit_width);
      }
    }
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    if (val == -1) {
      __ mvn($tmp$$Register, 0);
@ -1355,7 +1355,7 @@ encode %{
    // Replicate float con 2 times and pack into vector (8 bytes) in regD.
    float fval = $src$$constant;
    int val = *((int*)&fval);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    if (val == -1) {
      __ mvn($tmp$$Register, 0);
@ -1370,7 +1370,7 @@ encode %{

  enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{
    Label Ldone, Lloop;
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register   str1_reg = $str1$$Register;
    Register   str2_reg = $str2$$Register;
@ -1462,7 +1462,7 @@ encode %{

  enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{
    Label Lchar, Lchar_loop, Ldone, Lequal;
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register   str1_reg = $str1$$Register;
    Register   str2_reg = $str2$$Register;
@ -1524,7 +1524,7 @@ encode %{

  enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{
    Label Ldone, Lloop, Lequal;
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register   ary1_reg = $ary1$$Register;
    Register   ary2_reg = $ary2$$Register;
--- a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp
@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "runtime/basicLock.hpp"
+
+// TODO: 8 bytes at a time? pre-fetch?
+// Compare char[] arrays aligned to 4 bytes.
+void C2_MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
+                                           Register limit, Register result,
+                                           Register chr1, Register chr2, Label& Ldone) {
+  Label Lvector, Lloop;
+
+  // if (ary1 == ary2)
+  //     return true;
+  cmpoop(ary1, ary2);
+  b(Ldone, eq);
+
+  // Note: limit contains number of bytes (2*char_elements) != 0.
+  tst(limit, 0x2); // trailing character ?
+  b(Lvector, eq);
+
+  // compare the trailing char
+  sub(limit, limit, sizeof(jchar));
+  ldrh(chr1, Address(ary1, limit));
+  ldrh(chr2, Address(ary2, limit));
+  cmp(chr1, chr2);
+  mov(result, 0, ne);     // not equal
+  b(Ldone, ne);
+
+  // only one char ?
+  tst(limit, limit);
+  mov(result, 1, eq);
+  b(Ldone, eq);
+
+  // word by word compare, dont't need alignment check
+  bind(Lvector);
+
+  // Shift ary1 and ary2 to the end of the arrays, negate limit
+  add(ary1, limit, ary1);
+  add(ary2, limit, ary2);
+  neg(limit, limit);
+
+  bind(Lloop);
+  ldr_u32(chr1, Address(ary1, limit));
+  ldr_u32(chr2, Address(ary2, limit));
+  cmp_32(chr1, chr2);
+  mov(result, 0, ne);     // not equal
+  b(Ldone, ne);
+  adds(limit, limit, 2*sizeof(jchar));
+  b(Lloop, ne);
+
+  // Caller should set it:
+  // mov(result_reg, 1);  //equal
+}
+
+void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3) {
+  assert(VM_Version::supports_ldrex(), "unsupported, yet?");
+
+  Register Rmark      = Rscratch2;
+
+  assert(Roop != Rscratch, "");
+  assert(Roop != Rmark, "");
+  assert(Rbox != Rscratch, "");
+  assert(Rbox != Rmark, "");
+
+  Label fast_lock, done;
+
+  if (UseBiasedLocking && !UseOptoBiasInlining) {
+    assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
+    biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
+    // Fall through if lock not biased otherwise branch to done
+  }
+
+  // Invariant: Rmark loaded below does not contain biased lock pattern
+
+  ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
+  tst(Rmark, markWord::unlocked_value);
+  b(fast_lock, ne);
+
+  // Check for recursive lock
+  // See comments in InterpreterMacroAssembler::lock_object for
+  // explanations on the fast recursive locking check.
+  // -1- test low 2 bits
+  movs(Rscratch, AsmOperand(Rmark, lsl, 30));
+  // -2- test (hdr - SP) if the low two bits are 0
+  sub(Rscratch, Rmark, SP, eq);
+  movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
+  // If still 'eq' then recursive locking OK
+  // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
+  str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+  b(done);
+
+  bind(fast_lock);
+  str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+
+  bool allow_fallthrough_on_failure = true;
+  bool one_shot = true;
+  cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
+
+  bind(done);
+
+  // At this point flags are set as follows:
+  //  EQ -> Success
+  //  NE -> Failure, branch to slow path
+}
+
+void C2_MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) {
+  assert(VM_Version::supports_ldrex(), "unsupported, yet?");
+
+  Register Rmark      = Rscratch2;
+
+  assert(Roop != Rscratch, "");
+  assert(Roop != Rmark, "");
+  assert(Rbox != Rscratch, "");
+  assert(Rbox != Rmark, "");
+
+  Label done;
+
+  if (UseBiasedLocking && !UseOptoBiasInlining) {
+    biased_locking_exit(Roop, Rscratch, done);
+  }
+
+  ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
+  // If hdr is NULL, we've got recursive locking and there's nothing more to do
+  cmp(Rmark, 0);
+  b(done, eq);
+
+  // Restore the object header
+  bool allow_fallthrough_on_failure = true;
+  bool one_shot = true;
+  cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
+
+  bind(done);
+}
+
--- a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp
@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
+#define CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+ public:
+  // Compare char[] arrays aligned to 4 bytes.
+  void char_arrays_equals(Register ary1, Register ary2,
+                          Register limit, Register result,
+                          Register chr1, Register chr2, Label& Ldone);
+
+  void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
+  void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
+
+#endif // CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
@ -1626,57 +1626,6 @@ void MacroAssembler::lookup_interface_method(Register Rklass,
  }
 }

-#ifdef COMPILER2
-// TODO: 8 bytes at a time? pre-fetch?
-// Compare char[] arrays aligned to 4 bytes.
-void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
-                                        Register limit, Register result,
-                                      Register chr1, Register chr2, Label& Ldone) {
-  Label Lvector, Lloop;
-
-  // if (ary1 == ary2)
-  //     return true;
-  cmpoop(ary1, ary2);
-  b(Ldone, eq);
-
-  // Note: limit contains number of bytes (2*char_elements) != 0.
-  tst(limit, 0x2); // trailing character ?
-  b(Lvector, eq);
-
-  // compare the trailing char
-  sub(limit, limit, sizeof(jchar));
-  ldrh(chr1, Address(ary1, limit));
-  ldrh(chr2, Address(ary2, limit));
-  cmp(chr1, chr2);
-  mov(result, 0, ne);     // not equal
-  b(Ldone, ne);
-
-  // only one char ?
-  tst(limit, limit);
-  mov(result, 1, eq);
-  b(Ldone, eq);
-
-  // word by word compare, dont't need alignment check
-  bind(Lvector);
-
-  // Shift ary1 and ary2 to the end of the arrays, negate limit
-  add(ary1, limit, ary1);
-  add(ary2, limit, ary2);
-  neg(limit, limit);
-
-  bind(Lloop);
-  ldr_u32(chr1, Address(ary1, limit));
-  ldr_u32(chr2, Address(ary2, limit));
-  cmp_32(chr1, chr2);
-  mov(result, 0, ne);     // not equal
-  b(Ldone, ne);
-  adds(limit, limit, 2*sizeof(jchar));
-  b(Lloop, ne);
-
-  // Caller should set it:
-  // mov(result_reg, 1);  //equal
-}
-#endif

 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
  mov_slow(tmpreg1, counter_addr);
@ -1970,92 +1919,6 @@ void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
  return bs->resolve(this, decorators, obj);
 }

-
-#ifdef COMPILER2
-void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3)
-{
-  assert(VM_Version::supports_ldrex(), "unsupported, yet?");
-
-  Register Rmark      = Rscratch2;
-
-  assert(Roop != Rscratch, "");
-  assert(Roop != Rmark, "");
-  assert(Rbox != Rscratch, "");
-  assert(Rbox != Rmark, "");
-
-  Label fast_lock, done;
-
-  if (UseBiasedLocking && !UseOptoBiasInlining) {
-    assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
-    biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
-    // Fall through if lock not biased otherwise branch to done
-  }
-
-  // Invariant: Rmark loaded below does not contain biased lock pattern
-
-  ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
-  tst(Rmark, markWord::unlocked_value);
-  b(fast_lock, ne);
-
-  // Check for recursive lock
-  // See comments in InterpreterMacroAssembler::lock_object for
-  // explanations on the fast recursive locking check.
-  // -1- test low 2 bits
-  movs(Rscratch, AsmOperand(Rmark, lsl, 30));
-  // -2- test (hdr - SP) if the low two bits are 0
-  sub(Rscratch, Rmark, SP, eq);
-  movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
-  // If still 'eq' then recursive locking OK
-  // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
-  str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
-  b(done);
-
-  bind(fast_lock);
-  str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
-
-  bool allow_fallthrough_on_failure = true;
-  bool one_shot = true;
-  cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
-
-  bind(done);
-
-  // At this point flags are set as follows:
-  //  EQ -> Success
-  //  NE -> Failure, branch to slow path
-}
-
-void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2)
-{
-  assert(VM_Version::supports_ldrex(), "unsupported, yet?");
-
-  Register Rmark      = Rscratch2;
-
-  assert(Roop != Rscratch, "");
-  assert(Roop != Rmark, "");
-  assert(Rbox != Rscratch, "");
-  assert(Rbox != Rmark, "");
-
-  Label done;
-
-  if (UseBiasedLocking && !UseOptoBiasInlining) {
-    biased_locking_exit(Roop, Rscratch, done);
-  }
-
-  ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
-  // If hdr is NULL, we've got recursive locking and there's nothing more to do
-  cmp(Rmark, 0);
-  b(done, eq);
-
-  // Restore the object header
-  bool allow_fallthrough_on_failure = true;
-  bool one_shot = true;
-  cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
-
-  bind(done);
-
-}
-#endif // COMPILER2
-
 void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
  if (SafepointMechanism::uses_thread_local_poll()) {
    ldr_u32(tmp1, Address(Rthread, Thread::polling_page_offset()));
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
@ -1068,11 +1068,6 @@ public:
                               Register temp_reg2,
                               Label& L_no_such_interface);

-  // Compare char[] arrays aligned to 4 bytes.
-  void char_arrays_equals(Register ary1, Register ary2,
-                          Register limit, Register result,
-                          Register chr1, Register chr2, Label& Ldone);
-

  void floating_cmp(Register dst);

@ -1090,11 +1085,6 @@ public:

  void restore_default_fp_mode();

-#ifdef COMPILER2
-  void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
-  void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
-#endif
-
  void safepoint_poll(Register tmp1, Label& slow_path);
  void get_polling_page(Register dest);
  void read_polling_page(Register dest, relocInfo::relocType rtype);
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "opto/intrinsicnode.hpp"
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) // nothing
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Intrinsics for CompactStrings
+
+// Compress char[] to byte[] by compressing 16 bytes at once.
+void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
+                                           Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+                                           Label& Lfailure) {
+
+  const Register tmp0 = R0;
+  assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
+  Label Lloop, Lslow;
+
+  // Check if cnt >= 8 (= 16 bytes)
+  lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
+  srwi_(tmp2, cnt, 3);
+  beq(CCR0, Lslow);
+  ori(tmp1, tmp1, 0xFF);
+  rldimi(tmp1, tmp1, 32, 0);
+  mtctr(tmp2);
+
+  // 2x unrolled loop
+  bind(Lloop);
+  ld(tmp2, 0, src);               // _0_1_2_3 (Big Endian)
+  ld(tmp4, 8, src);               // _4_5_6_7
+
+  orr(tmp0, tmp2, tmp4);
+  rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
+  rldimi(tmp2, tmp2, 2*8, 2*8);   // _0_2_3_3
+  rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
+  rldimi(tmp4, tmp4, 2*8, 2*8);   // _4_6_7_7
+
+  andc_(tmp0, tmp0, tmp1);
+  bne(CCR0, Lfailure);            // Not latin1.
+  addi(src, src, 16);
+
+  rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
+  srdi(tmp2, tmp2, 3*8);          // ____0_2_
+  rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
+  srdi(tmp4, tmp4, 3*8);          // ____4_6_
+
+  orr(tmp2, tmp2, tmp3);          // ____0123
+  orr(tmp4, tmp4, tmp5);          // ____4567
+
+  stw(tmp2, 0, dst);
+  stw(tmp4, 4, dst);
+  addi(dst, dst, 8);
+  bdnz(Lloop);
+
+  bind(Lslow);                    // Fallback to slow version
+}
+
+// Compress char[] to byte[]. cnt must be positive int.
+void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
+  Label Lloop;
+  mtctr(cnt);
+
+  bind(Lloop);
+  lhz(tmp, 0, src);
+  cmplwi(CCR0, tmp, 0xff);
+  bgt(CCR0, Lfailure);            // Not latin1.
+  addi(src, src, 2);
+  stb(tmp, 0, dst);
+  addi(dst, dst, 1);
+  bdnz(Lloop);
+}
+
+// Inflate byte[] to char[] by inflating 16 bytes at once.
+void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
+                                          Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
+  const Register tmp0 = R0;
+  assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
+  Label Lloop, Lslow;
+
+  // Check if cnt >= 8
+  srwi_(tmp2, cnt, 3);
+  beq(CCR0, Lslow);
+  lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF
+  ori(tmp1, tmp1, 0xFF);
+  mtctr(tmp2);
+
+  // 2x unrolled loop
+  bind(Lloop);
+  lwz(tmp2, 0, src);              // ____0123 (Big Endian)
+  lwz(tmp4, 4, src);              // ____4567
+  addi(src, src, 8);
+
+  rldicl(tmp3, tmp2, 7*8, 64-8);  // _______2
+  rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
+  rldicl(tmp5, tmp4, 7*8, 64-8);  // _______6
+  rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
+
+  andc(tmp0, tmp2, tmp1);         // ____0_1_
+  rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
+  andc(tmp3, tmp4, tmp1);         // ____4_5_
+  rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
+
+  rldimi(tmp2, tmp0, 3*8, 0*8);   // _0_1_2_3
+  rldimi(tmp4, tmp3, 3*8, 0*8);   // _4_5_6_7
+
+  std(tmp2, 0, dst);
+  std(tmp4, 8, dst);
+  addi(dst, dst, 16);
+  bdnz(Lloop);
+
+  bind(Lslow);                    // Fallback to slow version
+}
+
+// Inflate byte[] to char[]. cnt must be positive int.
+void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
+  Label Lloop;
+  mtctr(cnt);
+
+  bind(Lloop);
+  lbz(tmp, 0, src);
+  addi(src, src, 1);
+  sth(tmp, 0, dst);
+  addi(dst, dst, 2);
+  bdnz(Lloop);
+}
+
+void C2_MacroAssembler::string_compare(Register str1, Register str2,
+                                       Register cnt1, Register cnt2,
+                                       Register tmp1, Register result, int ae) {
+  const Register tmp0 = R0,
+                 diff = tmp1;
+
+  assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
+  Label Ldone, Lslow, Lloop, Lreturn_diff;
+
+  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
+  // we interchange str1 and str2 in the UL case and negate the result.
+  // Like this, str1 is always latin1 encoded, except for the UU case.
+  // In addition, we need 0 (or sign which is 0) extend.
+
+  if (ae == StrIntrinsicNode::UU) {
+    srwi(cnt1, cnt1, 1);
+  } else {
+    clrldi(cnt1, cnt1, 32);
+  }
+
+  if (ae != StrIntrinsicNode::LL) {
+    srwi(cnt2, cnt2, 1);
+  } else {
+    clrldi(cnt2, cnt2, 32);
+  }
+
+  // See if the lengths are different, and calculate min in cnt1.
+  // Save diff in case we need it for a tie-breaker.
+  subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
+  // if (diff > 0) { cnt1 = cnt2; }
+  if (VM_Version::has_isel()) {
+    isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
+  } else {
+    Label Lskip;
+    blt(CCR0, Lskip);
+    mr(cnt1, cnt2);
+    bind(Lskip);
+  }
+
+  // Rename registers
+  Register chr1 = result;
+  Register chr2 = tmp0;
+
+  // Compare multiple characters in fast loop (only implemented for same encoding).
+  int stride1 = 8, stride2 = 8;
+  if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+    int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
+    Label Lfastloop, Lskipfast;
+
+    srwi_(tmp0, cnt1, log2_chars_per_iter);
+    beq(CCR0, Lskipfast);
+    rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
+    li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
+    mtctr(tmp0);
+
+    bind(Lfastloop);
+    ld(chr1, 0, str1);
+    ld(chr2, 0, str2);
+    cmpd(CCR0, chr1, chr2);
+    bne(CCR0, Lslow);
+    addi(str1, str1, stride1);
+    addi(str2, str2, stride2);
+    bdnz(Lfastloop);
+    mr(cnt1, cnt2); // Remaining characters.
+    bind(Lskipfast);
+  }
+
+  // Loop which searches the first difference character by character.
+  cmpwi(CCR0, cnt1, 0);
+  beq(CCR0, Lreturn_diff);
+  bind(Lslow);
+  mtctr(cnt1);
+
+  switch (ae) {
+    case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
+    case StrIntrinsicNode::UL: // fallthru (see comment above)
+    case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
+    case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
+    default: ShouldNotReachHere(); break;
+  }
+
+  bind(Lloop);
+  if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
+  if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
+  subf_(result, chr2, chr1); // result = chr1 - chr2
+  bne(CCR0, Ldone);
+  addi(str1, str1, stride1);
+  addi(str2, str2, stride2);
+  bdnz(Lloop);
+
+  // If strings are equal up to min length, return the length difference.
+  bind(Lreturn_diff);
+  mr(result, diff);
+
+  // Otherwise, return the difference between the first mismatched chars.
+  bind(Ldone);
+  if (ae == StrIntrinsicNode::UL) {
+    neg(result, result); // Negate result (see note above).
+  }
+}
+
+void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
+                                     Register limit, Register tmp1, Register result, bool is_byte) {
+  const Register tmp0 = R0;
+  assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
+  Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
+  bool limit_needs_shift = false;
+
+  if (is_array_equ) {
+    const int length_offset = arrayOopDesc::length_offset_in_bytes();
+    const int base_offset   = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
+
+    // Return true if the same array.
+    cmpd(CCR0, ary1, ary2);
+    beq(CCR0, Lskiploop);
+
+    // Return false if one of them is NULL.
+    cmpdi(CCR0, ary1, 0);
+    cmpdi(CCR1, ary2, 0);
+    li(result, 0);
+    cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
+    beq(CCR0, Ldone);
+
+    // Load the lengths of arrays.
+    lwz(limit, length_offset, ary1);
+    lwz(tmp0, length_offset, ary2);
+
+    // Return false if the two arrays are not equal length.
+    cmpw(CCR0, limit, tmp0);
+    bne(CCR0, Ldone);
+
+    // Load array addresses.
+    addi(ary1, ary1, base_offset);
+    addi(ary2, ary2, base_offset);
+  } else {
+    limit_needs_shift = !is_byte;
+    li(result, 0); // Assume not equal.
+  }
+
+  // Rename registers
+  Register chr1 = tmp0;
+  Register chr2 = tmp1;
+
+  // Compare 8 bytes per iteration in fast loop.
+  const int log2_chars_per_iter = is_byte ? 3 : 2;
+
+  srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
+  beq(CCR0, Lskipfast);
+  mtctr(tmp0);
+
+  bind(Lfastloop);
+  ld(chr1, 0, ary1);
+  ld(chr2, 0, ary2);
+  addi(ary1, ary1, 8);
+  addi(ary2, ary2, 8);
+  cmpd(CCR0, chr1, chr2);
+  bne(CCR0, Ldone);
+  bdnz(Lfastloop);
+
+  bind(Lskipfast);
+  rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
+  beq(CCR0, Lskiploop);
+  mtctr(limit);
+
+  // Character by character.
+  bind(Lloop);
+  if (is_byte) {
+    lbz(chr1, 0, ary1);
+    lbz(chr2, 0, ary2);
+    addi(ary1, ary1, 1);
+    addi(ary2, ary2, 1);
+  } else {
+    lhz(chr1, 0, ary1);
+    lhz(chr2, 0, ary2);
+    addi(ary1, ary1, 2);
+    addi(ary2, ary2, 2);
+  }
+  cmpw(CCR0, chr1, chr2);
+  bne(CCR0, Ldone);
+  bdnz(Lloop);
+
+  bind(Lskiploop);
+  li(result, 1); // All characters are equal.
+  bind(Ldone);
+}
+
+void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
+                                       Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
+                                       Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
+
+  // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
+  Label L_TooShort, L_Found, L_NotFound, L_End;
+  Register last_addr = haycnt, // Kill haycnt at the beginning.
+  addr      = tmp1,
+  n_start   = tmp2,
+  ch1       = tmp3,
+  ch2       = R0;
+
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+  const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
+  const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
+
+  // **************************************************************************************************
+  // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
+  // **************************************************************************************************
+
+  // Compute last haystack addr to use if no match gets found.
+  clrldi(haycnt, haycnt, 32);         // Ensure positive int is valid as 64 bit value.
+  addi(addr, haystack, -h_csize);     // Accesses use pre-increment.
+  if (needlecntval == 0) { // variable needlecnt
+   cmpwi(CCR6, needlecnt, 2);
+   clrldi(needlecnt, needlecnt, 32);  // Ensure positive int is valid as 64 bit value.
+   blt(CCR6, L_TooShort);             // Variable needlecnt: handle short needle separately.
+  }
+
+  if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
+
+  if (needlecntval == 0) { // variable needlecnt
+   subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
+   addi(needlecnt, needlecnt, -2);    // Rest of needle.
+  } else { // constant needlecnt
+  guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
+  assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
+   addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
+   if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
+  }
+
+  if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
+
+  if (ae ==StrIntrinsicNode::UL) {
+   srwi(tmp4, n_start, 1*8);          // ___0
+   rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
+  }
+
+  add(last_addr, haystack, ch1);      // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
+
+  // Main Loop (now we have at least 2 characters).
+  Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
+  bind(L_OuterLoop); // Search for 1st 2 characters.
+  Register addr_diff = tmp4;
+   subf(addr_diff, addr, last_addr);  // Difference between already checked address and last address to check.
+   addi(addr, addr, h_csize);         // This is the new address we want to use for comparing.
+   srdi_(ch2, addr_diff, h_csize);
+   beq(CCR0, L_FinalCheck);           // 2 characters left?
+   mtctr(ch2);                        // num of characters / 2
+  bind(L_InnerLoop);                  // Main work horse (2x unrolled search loop)
+   if (h_csize == 2) {                // Load 2 characters of haystack (ignore alignment).
+    lwz(ch1, 0, addr);
+    lwz(ch2, 2, addr);
+   } else {
+    lhz(ch1, 0, addr);
+    lhz(ch2, 1, addr);
+   }
+   cmpw(CCR0, ch1, n_start);          // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
+   cmpw(CCR1, ch2, n_start);
+   beq(CCR0, L_Comp1);                // Did we find the needle start?
+   beq(CCR1, L_Comp2);
+   addi(addr, addr, 2 * h_csize);
+   bdnz(L_InnerLoop);
+  bind(L_FinalCheck);
+   andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
+   beq(CCR0, L_NotFound);
+   if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
+   cmpw(CCR1, ch1, n_start);
+   beq(CCR1, L_Comp1);
+  bind(L_NotFound);
+   li(result, -1);                    // not found
+   b(L_End);
+
+   // **************************************************************************************************
+   // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
+   // **************************************************************************************************
+  if (needlecntval == 0) {           // We have to handle these cases separately.
+  Label L_OneCharLoop;
+  bind(L_TooShort);
+   mtctr(haycnt);
+   if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
+  bind(L_OneCharLoop);
+   if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
+   cmpw(CCR1, ch1, n_start);
+   beq(CCR1, L_Found);               // Did we find the one character needle?
+   bdnz(L_OneCharLoop);
+   li(result, -1);                   // Not found.
+   b(L_End);
+  }
+
+  // **************************************************************************************************
+  // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
+  // **************************************************************************************************
+
+  // Compare the rest
+  bind(L_Comp2);
+   addi(addr, addr, h_csize);        // First comparison has failed, 2nd one hit.
+  bind(L_Comp1);                     // Addr points to possible needle start.
+  if (needlecntval != 2) {           // Const needlecnt==2?
+   if (needlecntval != 3) {
+    if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
+    Register n_ind = tmp4,
+             h_ind = n_ind;
+    li(n_ind, 2 * n_csize);          // First 2 characters are already compared, use index 2.
+    mtctr(needlecnt);                // Decremented by 2, still > 0.
+   Label L_CompLoop;
+   bind(L_CompLoop);
+    if (ae ==StrIntrinsicNode::UL) {
+      h_ind = ch1;
+      sldi(h_ind, n_ind, 1);
+    }
+    if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
+    if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
+    cmpw(CCR1, ch1, ch2);
+    bne(CCR1, L_OuterLoop);
+    addi(n_ind, n_ind, n_csize);
+    bdnz(L_CompLoop);
+   } else { // No loop required if there's only one needle character left.
+    if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
+    if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
+    cmpw(CCR1, ch1, ch2);
+    bne(CCR1, L_OuterLoop);
+   }
+  }
+  // Return index ...
+  bind(L_Found);
+   subf(result, haystack, addr);     // relative to haystack, ...
+   if (h_csize == 2) { srdi(result, result, 1); } // in characters.
+  bind(L_End);
+} // string_indexof
+
+void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
+                                            Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
+  assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
+
+  Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
+  Register addr = tmp1,
+           ch1 = tmp2,
+           ch2 = R0;
+
+  const int h_csize = is_byte ? 1 : 2;
+
+//4:
+   srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
+   mr(addr, haystack);
+   beq(CCR0, L_FinalCheck);
+   mtctr(tmp2);              // Move to count register.
+//8:
+  bind(L_InnerLoop);         // Main work horse (2x unrolled search loop).
+   if (!is_byte) {
+    lhz(ch1, 0, addr);
+    lhz(ch2, 2, addr);
+   } else {
+    lbz(ch1, 0, addr);
+    lbz(ch2, 1, addr);
+   }
+   (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
+   (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
+   beq(CCR0, L_Found1);      // Did we find the needle?
+   beq(CCR1, L_Found2);
+   addi(addr, addr, 2 * h_csize);
+   bdnz(L_InnerLoop);
+//16:
+  bind(L_FinalCheck);
+   andi_(R0, haycnt, 1);
+   beq(CCR0, L_NotFound);
+   if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
+   (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
+   beq(CCR1, L_Found1);
+//21:
+  bind(L_NotFound);
+   li(result, -1);           // Not found.
+   b(L_End);
+
+  bind(L_Found2);
+   addi(addr, addr, h_csize);
+//24:
+  bind(L_Found1);            // Return index ...
+   subf(result, haystack, addr); // relative to haystack, ...
+   if (!is_byte) { srdi(result, result, 1); } // in characters.
+  bind(L_End);
+} // string_indexof_char
+
+
+void C2_MacroAssembler::has_negatives(Register src, Register cnt, Register result,
+                                      Register tmp1, Register tmp2) {
+  const Register tmp0 = R0;
+  assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
+  Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
+
+  // Check if cnt >= 8 (= 16 bytes)
+  lis(tmp1, (int)(short)0x8080);  // tmp1 = 0x8080808080808080
+  srwi_(tmp2, cnt, 4);
+  li(result, 1);                  // Assume there's a negative byte.
+  beq(CCR0, Lslow);
+  ori(tmp1, tmp1, 0x8080);
+  rldimi(tmp1, tmp1, 32, 0);
+  mtctr(tmp2);
+
+  // 2x unrolled loop
+  bind(Lfastloop);
+  ld(tmp2, 0, src);
+  ld(tmp0, 8, src);
+
+  orr(tmp0, tmp2, tmp0);
+
+  and_(tmp0, tmp0, tmp1);
+  bne(CCR0, Ldone);               // Found negative byte.
+  addi(src, src, 16);
+
+  bdnz(Lfastloop);
+
+  bind(Lslow);                    // Fallback to slow version
+  rldicl_(tmp0, cnt, 0, 64-4);
+  beq(CCR0, Lnoneg);
+  mtctr(tmp0);
+  bind(Lloop);
+  lbz(tmp0, 0, src);
+  addi(src, src, 1);
+  andi_(tmp0, tmp0, 0x80);
+  bne(CCR0, Ldone);               // Found negative byte.
+  bdnz(Lloop);
+  bind(Lnoneg);
+  li(result, 0);
+
+  bind(Ldone);
+}
+
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
+#define CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+ public:
+  // Intrinsics for CompactStrings
+  // Compress char[] to byte[] by compressing 16 bytes at once.
+  void string_compress_16(Register src, Register dst, Register cnt,
+                          Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+                          Label& Lfailure);
+
+  // Compress char[] to byte[]. cnt must be positive int.
+  void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
+
+  // Inflate byte[] to char[] by inflating 16 bytes at once.
+  void string_inflate_16(Register src, Register dst, Register cnt,
+                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
+
+  // Inflate byte[] to char[]. cnt must be positive int.
+  void string_inflate(Register src, Register dst, Register cnt, Register tmp);
+
+  void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
+                      Register tmp1, Register result, int ae);
+
+  void array_equals(bool is_array_equ, Register ary1, Register ary2,
+                    Register limit, Register tmp1, Register result, bool is_byte);
+
+  void string_indexof(Register result, Register haystack, Register haycnt,
+                      Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
+                      Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
+
+  void string_indexof_char(Register result, Register haystack, Register haycnt,
+                           Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
+
+  void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
+
+#endif // CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@ -33,6 +33,7 @@
 #include "memory/resourceArea.hpp"
 #include "nativeInst_ppc.hpp"
 #include "oops/klass.inline.hpp"
+#include "oops/methodData.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/icache.hpp"
@ -45,9 +46,6 @@
 #include "runtime/stubRoutines.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/powerOfTwo.hpp"
-#ifdef COMPILER2
-#include "opto/intrinsicnode.hpp"
-#endif

 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) // nothing
@ -3311,552 +3309,6 @@ void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwo

 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////

-#ifdef COMPILER2
-// Intrinsics for CompactStrings
-
-// Compress char[] to byte[] by compressing 16 bytes at once.
-void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
-                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
-                                        Label& Lfailure) {
-
-  const Register tmp0 = R0;
-  assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
-  Label Lloop, Lslow;
-
-  // Check if cnt >= 8 (= 16 bytes)
-  lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
-  srwi_(tmp2, cnt, 3);
-  beq(CCR0, Lslow);
-  ori(tmp1, tmp1, 0xFF);
-  rldimi(tmp1, tmp1, 32, 0);
-  mtctr(tmp2);
-
-  // 2x unrolled loop
-  bind(Lloop);
-  ld(tmp2, 0, src);               // _0_1_2_3 (Big Endian)
-  ld(tmp4, 8, src);               // _4_5_6_7
-
-  orr(tmp0, tmp2, tmp4);
-  rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
-  rldimi(tmp2, tmp2, 2*8, 2*8);   // _0_2_3_3
-  rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
-  rldimi(tmp4, tmp4, 2*8, 2*8);   // _4_6_7_7
-
-  andc_(tmp0, tmp0, tmp1);
-  bne(CCR0, Lfailure);            // Not latin1.
-  addi(src, src, 16);
-
-  rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
-  srdi(tmp2, tmp2, 3*8);          // ____0_2_
-  rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
-  srdi(tmp4, tmp4, 3*8);          // ____4_6_
-
-  orr(tmp2, tmp2, tmp3);          // ____0123
-  orr(tmp4, tmp4, tmp5);          // ____4567
-
-  stw(tmp2, 0, dst);
-  stw(tmp4, 4, dst);
-  addi(dst, dst, 8);
-  bdnz(Lloop);
-
-  bind(Lslow);                    // Fallback to slow version
-}
-
-// Compress char[] to byte[]. cnt must be positive int.
-void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
-  Label Lloop;
-  mtctr(cnt);
-
-  bind(Lloop);
-  lhz(tmp, 0, src);
-  cmplwi(CCR0, tmp, 0xff);
-  bgt(CCR0, Lfailure);            // Not latin1.
-  addi(src, src, 2);
-  stb(tmp, 0, dst);
-  addi(dst, dst, 1);
-  bdnz(Lloop);
-}
-
-// Inflate byte[] to char[] by inflating 16 bytes at once.
-void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
-                                       Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
-  const Register tmp0 = R0;
-  assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
-  Label Lloop, Lslow;
-
-  // Check if cnt >= 8
-  srwi_(tmp2, cnt, 3);
-  beq(CCR0, Lslow);
-  lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF
-  ori(tmp1, tmp1, 0xFF);
-  mtctr(tmp2);
-
-  // 2x unrolled loop
-  bind(Lloop);
-  lwz(tmp2, 0, src);              // ____0123 (Big Endian)
-  lwz(tmp4, 4, src);              // ____4567
-  addi(src, src, 8);
-
-  rldicl(tmp3, tmp2, 7*8, 64-8);  // _______2
-  rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
-  rldicl(tmp5, tmp4, 7*8, 64-8);  // _______6
-  rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
-
-  andc(tmp0, tmp2, tmp1);         // ____0_1_
-  rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
-  andc(tmp3, tmp4, tmp1);         // ____4_5_
-  rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
-
-  rldimi(tmp2, tmp0, 3*8, 0*8);   // _0_1_2_3
-  rldimi(tmp4, tmp3, 3*8, 0*8);   // _4_5_6_7
-
-  std(tmp2, 0, dst);
-  std(tmp4, 8, dst);
-  addi(dst, dst, 16);
-  bdnz(Lloop);
-
-  bind(Lslow);                    // Fallback to slow version
-}
-
-// Inflate byte[] to char[]. cnt must be positive int.
-void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
-  Label Lloop;
-  mtctr(cnt);
-
-  bind(Lloop);
-  lbz(tmp, 0, src);
-  addi(src, src, 1);
-  sth(tmp, 0, dst);
-  addi(dst, dst, 2);
-  bdnz(Lloop);
-}
-
-void MacroAssembler::string_compare(Register str1, Register str2,
-                                    Register cnt1, Register cnt2,
-                                    Register tmp1, Register result, int ae) {
-  const Register tmp0 = R0,
-                 diff = tmp1;
-
-  assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
-  Label Ldone, Lslow, Lloop, Lreturn_diff;
-
-  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
-  // we interchange str1 and str2 in the UL case and negate the result.
-  // Like this, str1 is always latin1 encoded, except for the UU case.
-  // In addition, we need 0 (or sign which is 0) extend.
-
-  if (ae == StrIntrinsicNode::UU) {
-    srwi(cnt1, cnt1, 1);
-  } else {
-    clrldi(cnt1, cnt1, 32);
-  }
-
-  if (ae != StrIntrinsicNode::LL) {
-    srwi(cnt2, cnt2, 1);
-  } else {
-    clrldi(cnt2, cnt2, 32);
-  }
-
-  // See if the lengths are different, and calculate min in cnt1.
-  // Save diff in case we need it for a tie-breaker.
-  subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
-  // if (diff > 0) { cnt1 = cnt2; }
-  if (VM_Version::has_isel()) {
-    isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
-  } else {
-    Label Lskip;
-    blt(CCR0, Lskip);
-    mr(cnt1, cnt2);
-    bind(Lskip);
-  }
-
-  // Rename registers
-  Register chr1 = result;
-  Register chr2 = tmp0;
-
-  // Compare multiple characters in fast loop (only implemented for same encoding).
-  int stride1 = 8, stride2 = 8;
-  if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
-    int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
-    Label Lfastloop, Lskipfast;
-
-    srwi_(tmp0, cnt1, log2_chars_per_iter);
-    beq(CCR0, Lskipfast);
-    rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
-    li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
-    mtctr(tmp0);
-
-    bind(Lfastloop);
-    ld(chr1, 0, str1);
-    ld(chr2, 0, str2);
-    cmpd(CCR0, chr1, chr2);
-    bne(CCR0, Lslow);
-    addi(str1, str1, stride1);
-    addi(str2, str2, stride2);
-    bdnz(Lfastloop);
-    mr(cnt1, cnt2); // Remaining characters.
-    bind(Lskipfast);
-  }
-
-  // Loop which searches the first difference character by character.
-  cmpwi(CCR0, cnt1, 0);
-  beq(CCR0, Lreturn_diff);
-  bind(Lslow);
-  mtctr(cnt1);
-
-  switch (ae) {
-    case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
-    case StrIntrinsicNode::UL: // fallthru (see comment above)
-    case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
-    case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
-    default: ShouldNotReachHere(); break;
-  }
-
-  bind(Lloop);
-  if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
-  if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
-  subf_(result, chr2, chr1); // result = chr1 - chr2
-  bne(CCR0, Ldone);
-  addi(str1, str1, stride1);
-  addi(str2, str2, stride2);
-  bdnz(Lloop);
-
-  // If strings are equal up to min length, return the length difference.
-  bind(Lreturn_diff);
-  mr(result, diff);
-
-  // Otherwise, return the difference between the first mismatched chars.
-  bind(Ldone);
-  if (ae == StrIntrinsicNode::UL) {
-    neg(result, result); // Negate result (see note above).
-  }
-}
-
-void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
-                                  Register limit, Register tmp1, Register result, bool is_byte) {
-  const Register tmp0 = R0;
-  assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
-  Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
-  bool limit_needs_shift = false;
-
-  if (is_array_equ) {
-    const int length_offset = arrayOopDesc::length_offset_in_bytes();
-    const int base_offset   = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
-
-    // Return true if the same array.
-    cmpd(CCR0, ary1, ary2);
-    beq(CCR0, Lskiploop);
-
-    // Return false if one of them is NULL.
-    cmpdi(CCR0, ary1, 0);
-    cmpdi(CCR1, ary2, 0);
-    li(result, 0);
-    cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
-    beq(CCR0, Ldone);
-
-    // Load the lengths of arrays.
-    lwz(limit, length_offset, ary1);
-    lwz(tmp0, length_offset, ary2);
-
-    // Return false if the two arrays are not equal length.
-    cmpw(CCR0, limit, tmp0);
-    bne(CCR0, Ldone);
-
-    // Load array addresses.
-    addi(ary1, ary1, base_offset);
-    addi(ary2, ary2, base_offset);
-  } else {
-    limit_needs_shift = !is_byte;
-    li(result, 0); // Assume not equal.
-  }
-
-  // Rename registers
-  Register chr1 = tmp0;
-  Register chr2 = tmp1;
-
-  // Compare 8 bytes per iteration in fast loop.
-  const int log2_chars_per_iter = is_byte ? 3 : 2;
-
-  srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
-  beq(CCR0, Lskipfast);
-  mtctr(tmp0);
-
-  bind(Lfastloop);
-  ld(chr1, 0, ary1);
-  ld(chr2, 0, ary2);
-  addi(ary1, ary1, 8);
-  addi(ary2, ary2, 8);
-  cmpd(CCR0, chr1, chr2);
-  bne(CCR0, Ldone);
-  bdnz(Lfastloop);
-
-  bind(Lskipfast);
-  rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
-  beq(CCR0, Lskiploop);
-  mtctr(limit);
-
-  // Character by character.
-  bind(Lloop);
-  if (is_byte) {
-    lbz(chr1, 0, ary1);
-    lbz(chr2, 0, ary2);
-    addi(ary1, ary1, 1);
-    addi(ary2, ary2, 1);
-  } else {
-    lhz(chr1, 0, ary1);
-    lhz(chr2, 0, ary2);
-    addi(ary1, ary1, 2);
-    addi(ary2, ary2, 2);
-  }
-  cmpw(CCR0, chr1, chr2);
-  bne(CCR0, Ldone);
-  bdnz(Lloop);
-
-  bind(Lskiploop);
-  li(result, 1); // All characters are equal.
-  bind(Ldone);
-}
-
-void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
-                                    Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
-                                    Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
-
-  // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
-  Label L_TooShort, L_Found, L_NotFound, L_End;
-  Register last_addr = haycnt, // Kill haycnt at the beginning.
-  addr      = tmp1,
-  n_start   = tmp2,
-  ch1       = tmp3,
-  ch2       = R0;
-
-  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
-  const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
-  const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
-
-  // **************************************************************************************************
-  // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
-  // **************************************************************************************************
-
-  // Compute last haystack addr to use if no match gets found.
-  clrldi(haycnt, haycnt, 32);         // Ensure positive int is valid as 64 bit value.
-  addi(addr, haystack, -h_csize);     // Accesses use pre-increment.
-  if (needlecntval == 0) { // variable needlecnt
-   cmpwi(CCR6, needlecnt, 2);
-   clrldi(needlecnt, needlecnt, 32);  // Ensure positive int is valid as 64 bit value.
-   blt(CCR6, L_TooShort);             // Variable needlecnt: handle short needle separately.
-  }
-
-  if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
-
-  if (needlecntval == 0) { // variable needlecnt
-   subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
-   addi(needlecnt, needlecnt, -2);    // Rest of needle.
-  } else { // constant needlecnt
-  guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
-  assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
-   addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
-   if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
-  }
-
-  if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
-
-  if (ae ==StrIntrinsicNode::UL) {
-   srwi(tmp4, n_start, 1*8);          // ___0
-   rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
-  }
-
-  add(last_addr, haystack, ch1);      // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
-
-  // Main Loop (now we have at least 2 characters).
-  Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
-  bind(L_OuterLoop); // Search for 1st 2 characters.
-  Register addr_diff = tmp4;
-   subf(addr_diff, addr, last_addr);  // Difference between already checked address and last address to check.
-   addi(addr, addr, h_csize);         // This is the new address we want to use for comparing.
-   srdi_(ch2, addr_diff, h_csize);
-   beq(CCR0, L_FinalCheck);           // 2 characters left?
-   mtctr(ch2);                        // num of characters / 2
-  bind(L_InnerLoop);                  // Main work horse (2x unrolled search loop)
-   if (h_csize == 2) {                // Load 2 characters of haystack (ignore alignment).
-    lwz(ch1, 0, addr);
-    lwz(ch2, 2, addr);
-   } else {
-    lhz(ch1, 0, addr);
-    lhz(ch2, 1, addr);
-   }
-   cmpw(CCR0, ch1, n_start);          // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
-   cmpw(CCR1, ch2, n_start);
-   beq(CCR0, L_Comp1);                // Did we find the needle start?
-   beq(CCR1, L_Comp2);
-   addi(addr, addr, 2 * h_csize);
-   bdnz(L_InnerLoop);
-  bind(L_FinalCheck);
-   andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
-   beq(CCR0, L_NotFound);
-   if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
-   cmpw(CCR1, ch1, n_start);
-   beq(CCR1, L_Comp1);
-  bind(L_NotFound);
-   li(result, -1);                    // not found
-   b(L_End);
-
-   // **************************************************************************************************
-   // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
-   // **************************************************************************************************
-  if (needlecntval == 0) {           // We have to handle these cases separately.
-  Label L_OneCharLoop;
-  bind(L_TooShort);
-   mtctr(haycnt);
-   if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
-  bind(L_OneCharLoop);
-   if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
-   cmpw(CCR1, ch1, n_start);
-   beq(CCR1, L_Found);               // Did we find the one character needle?
-   bdnz(L_OneCharLoop);
-   li(result, -1);                   // Not found.
-   b(L_End);
-  }
-
-  // **************************************************************************************************
-  // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
-  // **************************************************************************************************
-
-  // Compare the rest
-  bind(L_Comp2);
-   addi(addr, addr, h_csize);        // First comparison has failed, 2nd one hit.
-  bind(L_Comp1);                     // Addr points to possible needle start.
-  if (needlecntval != 2) {           // Const needlecnt==2?
-   if (needlecntval != 3) {
-    if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
-    Register n_ind = tmp4,
-             h_ind = n_ind;
-    li(n_ind, 2 * n_csize);          // First 2 characters are already compared, use index 2.
-    mtctr(needlecnt);                // Decremented by 2, still > 0.
-   Label L_CompLoop;
-   bind(L_CompLoop);
-    if (ae ==StrIntrinsicNode::UL) {
-      h_ind = ch1;
-      sldi(h_ind, n_ind, 1);
-    }
-    if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
-    if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
-    cmpw(CCR1, ch1, ch2);
-    bne(CCR1, L_OuterLoop);
-    addi(n_ind, n_ind, n_csize);
-    bdnz(L_CompLoop);
-   } else { // No loop required if there's only one needle character left.
-    if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
-    if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
-    cmpw(CCR1, ch1, ch2);
-    bne(CCR1, L_OuterLoop);
-   }
-  }
-  // Return index ...
-  bind(L_Found);
-   subf(result, haystack, addr);     // relative to haystack, ...
-   if (h_csize == 2) { srdi(result, result, 1); } // in characters.
-  bind(L_End);
-} // string_indexof
-
-void MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
-                                         Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
-  assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
-
-  Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
-  Register addr = tmp1,
-           ch1 = tmp2,
-           ch2 = R0;
-
-  const int h_csize = is_byte ? 1 : 2;
-
-//4:
-   srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
-   mr(addr, haystack);
-   beq(CCR0, L_FinalCheck);
-   mtctr(tmp2);              // Move to count register.
-//8:
-  bind(L_InnerLoop);         // Main work horse (2x unrolled search loop).
-   if (!is_byte) {
-    lhz(ch1, 0, addr);
-    lhz(ch2, 2, addr);
-   } else {
-    lbz(ch1, 0, addr);
-    lbz(ch2, 1, addr);
-   }
-   (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
-   (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
-   beq(CCR0, L_Found1);      // Did we find the needle?
-   beq(CCR1, L_Found2);
-   addi(addr, addr, 2 * h_csize);
-   bdnz(L_InnerLoop);
-//16:
-  bind(L_FinalCheck);
-   andi_(R0, haycnt, 1);
-   beq(CCR0, L_NotFound);
-   if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
-   (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
-   beq(CCR1, L_Found1);
-//21:
-  bind(L_NotFound);
-   li(result, -1);           // Not found.
-   b(L_End);
-
-  bind(L_Found2);
-   addi(addr, addr, h_csize);
-//24:
-  bind(L_Found1);            // Return index ...
-   subf(result, haystack, addr); // relative to haystack, ...
-   if (!is_byte) { srdi(result, result, 1); } // in characters.
-  bind(L_End);
-} // string_indexof_char
-
-
-void MacroAssembler::has_negatives(Register src, Register cnt, Register result,
-                                   Register tmp1, Register tmp2) {
-  const Register tmp0 = R0;
-  assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
-  Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
-
-  // Check if cnt >= 8 (= 16 bytes)
-  lis(tmp1, (int)(short)0x8080);  // tmp1 = 0x8080808080808080
-  srwi_(tmp2, cnt, 4);
-  li(result, 1);                  // Assume there's a negative byte.
-  beq(CCR0, Lslow);
-  ori(tmp1, tmp1, 0x8080);
-  rldimi(tmp1, tmp1, 32, 0);
-  mtctr(tmp2);
-
-  // 2x unrolled loop
-  bind(Lfastloop);
-  ld(tmp2, 0, src);
-  ld(tmp0, 8, src);
-
-  orr(tmp0, tmp2, tmp0);
-
-  and_(tmp0, tmp0, tmp1);
-  bne(CCR0, Ldone);               // Found negative byte.
-  addi(src, src, 16);
-
-  bdnz(Lfastloop);
-
-  bind(Lslow);                    // Fallback to slow version
-  rldicl_(tmp0, cnt, 0, 64-4);
-  beq(CCR0, Lnoneg);
-  mtctr(tmp0);
-  bind(Lloop);
-  lbz(tmp0, 0, src);
-  addi(src, src, 1);
-  andi_(tmp0, tmp0, 0x80);
-  bne(CCR0, Ldone);               // Found negative byte.
-  bdnz(Lloop);
-  bind(Lnoneg);
-  li(result, 0);
-
-  bind(Ldone);
-}
-
-#endif // Compiler2
-
 // Helpers for Intrinsic Emitters
 //
 // Revert the byte order of a 32bit value in a register
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@ -761,39 +761,6 @@ class MacroAssembler: public Assembler {
  void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
  void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);

-#ifdef COMPILER2
-  // Intrinsics for CompactStrings
-  // Compress char[] to byte[] by compressing 16 bytes at once.
-  void string_compress_16(Register src, Register dst, Register cnt,
-                          Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
-                          Label& Lfailure);
-
-  // Compress char[] to byte[]. cnt must be positive int.
-  void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
-
-  // Inflate byte[] to char[] by inflating 16 bytes at once.
-  void string_inflate_16(Register src, Register dst, Register cnt,
-                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
-
-  // Inflate byte[] to char[]. cnt must be positive int.
-  void string_inflate(Register src, Register dst, Register cnt, Register tmp);
-
-  void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
-                      Register tmp1, Register result, int ae);
-
-  void array_equals(bool is_array_equ, Register ary1, Register ary2,
-                    Register limit, Register tmp1, Register result, bool is_byte);
-
-  void string_indexof(Register result, Register haystack, Register haycnt,
-                      Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
-                      Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
-
-  void string_indexof_char(Register result, Register haystack, Register haycnt,
-                           Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
-
-  void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
-#endif
-
  // Emitters for BigInteger.multiplyToLen intrinsic.
  inline void multiply64(Register dest_hi, Register dest_lo,
                         Register x, Register y);
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@ -1144,7 +1144,7 @@ bool SafePointNode::needs_polling_address_input() {

 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 void emit_break(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ illtrap();
 }

@ -1165,7 +1165,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 //=============================================================================

 void emit_nop(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ nop();
 }

@ -1184,12 +1184,14 @@ source_hpp %{ // Header information of the source block.
 //---<  Used for optimization in Compile::Shorten_branches  >---
 //--------------------------------------------------------------

+class C2_MacroAssembler;
+
 class CallStubImpl {

 public:

  // Emit call stub, compiled java to interpreter.
-  static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
+  static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);

  // Size of call trampoline stub.
  // This doesn't need to be accurate to the byte, but it
@ -1220,7 +1222,7 @@ source %{
 //   load the call target from the constant pool
 //   branch via CTR (LR/link still points to the call-site above)

-void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
+void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
  address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
  if (stub == NULL) {
    ciEnv::current()->record_out_of_memory_failure();
@ -1251,7 +1253,7 @@ typedef struct {
 // - Add a relocation at the branch-and-link instruction.
 // - Emit a branch-and-link.
 // - Remember the return pc offset.
-EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
+EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
  EmitCallOffsets offsets = { -1, -1 };
  const int start_offset = __ offset();
  offsets.insts_call_instruction_offset = __ offset();
@ -1379,7 +1381,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {

 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  const long framesize = C->output()->frame_size_in_bytes();
  assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
@ -1571,7 +1573,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {

 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
  assert(framesize >= 0, "negative frame-size?");
@ -1637,7 +1639,7 @@ int MachEpilogNode::safepoint_offset() const {

 #if 0 // TODO: PPC port
 void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  if (LoadPollAddressFromThread) {
    _masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread);
  } else {
@ -1754,7 +1756,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
      int src_offset = ra_->reg2offset(src_lo);
      int dst_offset = ra_->reg2offset(dst_lo);
      if (cbuf) {
-        MacroAssembler _masm(cbuf);
+        C2_MacroAssembler _masm(cbuf);
        __ ld(R0, src_offset, R1_SP);
        __ std(R0, dst_offset, R1_SP);
        __ ld(R0, src_offset+8, R1_SP);
@ -1767,7 +1769,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
      VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
      int dst_offset = ra_->reg2offset(dst_lo);
      if (cbuf) {
-        MacroAssembler _masm(cbuf);
+        C2_MacroAssembler _masm(cbuf);
        __ addi(R0, R1_SP, dst_offset);
        __ stxvd2x(Rsrc, R0);
      }
@ -1778,7 +1780,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
      VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
      int src_offset = ra_->reg2offset(src_lo);
      if (cbuf) {
-        MacroAssembler _masm(cbuf);
+        C2_MacroAssembler _masm(cbuf);
        __ addi(R0, R1_SP, src_offset);
        __ lxvd2x(Rdst, R0);
      }
@ -1789,7 +1791,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
      VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
      VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
      if (cbuf) {
-        MacroAssembler _masm(cbuf);
+        C2_MacroAssembler _masm(cbuf);
        __ xxlor(Rdst, Rsrc, Rsrc);
      }
      size += 4;
@ -1833,7 +1835,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
      size = (Rsrc != Rdst) ? 4 : 0;

      if (cbuf) {
-        MacroAssembler _masm(cbuf);
+        C2_MacroAssembler _masm(cbuf);
        if (size) {
          __ mr(Rdst, Rsrc);
        }
@ -1879,7 +1881,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
  // Check for float reg-reg copy.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
    if (cbuf) {
-      MacroAssembler _masm(cbuf);
+      C2_MacroAssembler _masm(cbuf);
      FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
      FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
      __ fmr(Rdst, Rsrc);
@ -2049,7 +2051,7 @@ void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 #endif

 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  // _count contains the number of nops needed for padding.
  for (int i = 0; i < _count; i++) {
    __ nop();
@ -2070,7 +2072,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 #endif

 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg    = ra_->get_encode(this);
@ -2096,7 +2098,7 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {

 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  // This is the unverified entry point.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  // Inline_cache contains a klass.
  Register ic_klass       = as_Register(Matcher::inline_cache_reg_encode());
@ -2179,7 +2181,7 @@ class HandlerImpl {
 source %{

 int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) return 0; // CodeBuffer::expand failed
@ -2196,7 +2198,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 // The deopt_handler is like the exception handler, but it calls to
 // the deoptimization blob instead of jumping to the exception blob.
 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) return 0; // CodeBuffer::expand failed
@ -2660,14 +2662,14 @@ const bool Matcher::convi2l_type_required = true;
 encode %{
  enc_class enc_unimplemented %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
  %}

  enc_class enc_untested %{
 #ifdef ASSERT
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ untested("Untested mach node encoding in AD file.");
 #else
    // TODO: PPC port $archOpcode(ppc64Opcode_none);
@ -2676,7 +2678,7 @@ encode %{

  enc_class enc_lbz(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lbz);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
  %}
@ -2684,7 +2686,7 @@ encode %{
  // Load acquire.
  enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
@ -2694,7 +2696,7 @@ encode %{
  enc_class enc_lhz(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lhz);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
  %}
@ -2703,7 +2705,7 @@ encode %{
  enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
@ -2713,7 +2715,7 @@ encode %{
  enc_class enc_lwz(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwz);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
  %}
@ -2722,7 +2724,7 @@ encode %{
  enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
@ -2731,7 +2733,7 @@ encode %{

  enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    // Operand 'ds' requires 4-alignment.
    assert((Idisp & 0x3) == 0, "unaligned offset");
@ -2741,7 +2743,7 @@ encode %{
  // Load acquire.
  enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    // Operand 'ds' requires 4-alignment.
    assert((Idisp & 0x3) == 0, "unaligned offset");
@ -2752,7 +2754,7 @@ encode %{

  enc_class enc_lfd(RegF dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
  %}
@ -2760,7 +2762,7 @@ encode %{
  enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int toc_offset = 0;

    address const_toc_addr;
@ -2784,7 +2786,7 @@ encode %{
  enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    if (!ra_->C->output()->in_scratch_emit_size()) {
      address const_toc_addr;
@ -3019,7 +3021,7 @@ encode %{
  enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int toc_offset = 0;

    intptr_t val = $src$$constant;
@ -3052,7 +3054,7 @@ encode %{
  enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    if (!ra_->C->output()->in_scratch_emit_size()) {
      intptr_t val = $src$$constant;
      relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
@ -3186,14 +3188,14 @@ encode %{

  enc_class enc_stw(iRegIsrc src, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stw);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stw($src$$Register, Idisp, $mem$$base$$Register);
  %}

  enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_std);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    // Operand 'ds' requires 4-alignment.
    assert((Idisp & 0x3) == 0, "unaligned offset");
@ -3202,14 +3204,14 @@ encode %{

  enc_class enc_stfs(RegF src, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stfs);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
  %}

  enc_class enc_stfd(RegF src, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stfd);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
  %}
@ -3230,7 +3232,7 @@ encode %{
    // __ bind(skip_release);
    // __ stb(card mark);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label skip_storestore;

 #if 0 // TODO: PPC port
@ -3451,7 +3453,7 @@ encode %{
  enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int cc        = $cmp$$cmpcode;
    int flags_reg = $crx$$reg;
    Label done;
@ -3466,7 +3468,7 @@ encode %{
  enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label done;
    assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
    // Branch if not (cmp crx).
@ -3480,14 +3482,14 @@ encode %{
  // input mapping for latency computation.
  enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andc);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ andc($dst$$Register, $src1$$Register, $src2$$Register);
  %}

  enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Label done;
    __ cmpwi($crx$$CondRegister, $src$$Register, 0);
@ -3500,7 +3502,7 @@ encode %{
  enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Label done;
    __ cmpdi($crx$$CondRegister, $src$$Register, 0);
@ -3513,7 +3515,7 @@ encode %{
  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    Label done;
    __ bso($crx$$CondRegister, done);
@ -3525,7 +3527,7 @@ encode %{
  enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label done;
    __ bso($crx$$CondRegister, done);
    __ mffprd($dst$$Register, $src$$FloatRegister);
@ -3536,7 +3538,7 @@ encode %{
  enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bc);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label d;   // dummy
    __ bind(d);
    Label* p = ($lbl$$label);
@ -3566,7 +3568,7 @@ encode %{
    // to ppc64Opcode_bc in order to hide this detail from the scheduler.
    // TODO: PPC port $archOpcode(ppc64Opcode_bc);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label d;    // dummy
    __ bind(d);
    Label* p = ($lbl$$label);
@ -3598,7 +3600,7 @@ encode %{
    // to ppc64Opcode_bc in order to hide this detail from the scheduler.
    // TODO: PPC port $archOpcode(ppc64Opcode_bc);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label d;   // dummy
    __ bind(d);
    Label* p = ($lbl$$label);
@ -3683,7 +3685,7 @@ encode %{
    // Fake operand dst needed for PPC scheduler.
    assert($dst$$constant == 0x0, "dst must be 0x0");

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    // Mark the code position where the load from the safepoint
    // polling page was emitted as relocInfo::poll_type.
    __ relocate(relocInfo::poll_type);
@ -3739,7 +3741,7 @@ encode %{
  enc_class enc_java_static_call(method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bl);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    address entry_point = (address)$meth$$method;

    if (!_method) {
@ -3789,7 +3791,7 @@ encode %{
  enc_class enc_java_dynamic_call_sched(method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bl);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    if (!ra_->C->output()->in_scratch_emit_size()) {
      // Create a call trampoline stub for the given method.
@ -3892,7 +3894,7 @@ encode %{
  // In the code we have to use $constanttablebase.
  enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int start_offset = __ offset();

    Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
@ -3951,7 +3953,7 @@ encode %{
  enc_class enc_java_to_runtime_call (method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    const address start_pc = __ pc();

 #if defined(ABI_ELFv2)
@ -3984,7 +3986,7 @@ encode %{
  // input mapping for latency computation.
  enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mtctr);
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mtctr($src$$Register);
  %}

--- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
--- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_S390_C2_MACROASSEMBLER_S390_HPP
+#define CPU_S390_C2_MACROASSEMBLER_S390_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+ public:
+  //-------------------------------------------
+  // Special String Intrinsics Implementation.
+  //-------------------------------------------
+  // Intrinsics for CompactStrings
+  //   Restores: src, dst
+  //   Uses:     cnt
+  //   Kills:    tmp, Z_R0, Z_R1.
+  //   Early clobber: result.
+  //   Boolean precise controls accuracy of result value.
+  unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
+                               Register tmp,    bool precise);
+
+  // Inflate byte[] to char[].
+  unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
+
+  // Inflate byte[] to char[].
+  //   Restores: src, dst
+  //   Uses:     cnt
+  //   Kills:    tmp, Z_R0, Z_R1.
+  unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp);
+
+  // Inflate byte[] to char[], length known at compile time.
+  //   Restores: src, dst
+  //   Kills:    tmp, Z_R0, Z_R1.
+  // Note:
+  //   len is signed int. Counts # characters, not bytes.
+  unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
+
+  // Kills src.
+  unsigned int has_negatives(Register result, Register src, Register cnt,
+                             Register odd_reg, Register even_reg, Register tmp);
+
+  unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
+                              Register odd_reg, Register even_reg, Register result, int ae);
+
+  unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
+                            Register odd_reg, Register even_reg, Register result, bool is_byte);
+
+  unsigned int string_indexof(Register result, Register haystack, Register haycnt,
+                              Register needle, Register needlecnt, int needlecntval,
+                              Register odd_reg, Register even_reg, int ae);
+
+  unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
+                                   Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
+
+#endif // CPU_S390_C2_MACROASSEMBLER_S390_HPP
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
@ -841,52 +841,6 @@ class MacroAssembler: public Assembler {
                                             Register cnt_reg,
                                             Register tmp1_reg, Register tmp2_reg);

-  //-------------------------------------------
-  // Special String Intrinsics Implementation.
-  //-------------------------------------------
-  // Intrinsics for CompactStrings
-  //   Restores: src, dst
-  //   Uses:     cnt
-  //   Kills:    tmp, Z_R0, Z_R1.
-  //   Early clobber: result.
-  //   Boolean precise controls accuracy of result value.
-#ifdef COMPILER2
-  unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
-                               Register tmp,    bool precise);
-
-  // Inflate byte[] to char[].
-  unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
-
-  // Inflate byte[] to char[].
-  //   Restores: src, dst
-  //   Uses:     cnt
-  //   Kills:    tmp, Z_R0, Z_R1.
-  unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp);
-
-  // Inflate byte[] to char[], length known at compile time.
-  //   Restores: src, dst
-  //   Kills:    tmp, Z_R0, Z_R1.
-  // Note:
-  //   len is signed int. Counts # characters, not bytes.
-  unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
-
-  // Kills src.
-  unsigned int has_negatives(Register result, Register src, Register cnt,
-                             Register odd_reg, Register even_reg, Register tmp);
-
-  unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
-                              Register odd_reg, Register even_reg, Register result, int ae);
-
-  unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
-                            Register odd_reg, Register even_reg, Register result, bool is_byte);
-
-  unsigned int string_indexof(Register result, Register haystack, Register haycnt,
-                              Register needle, Register needlecnt, int needlecntval,
-                              Register odd_reg, Register even_reg, int ae);
-
-  unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
-                                   Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
-#endif

  // Emit an oop const to the constant pool and set a relocation info
  // with address current_pc. Return the TOC offset of the constant.
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@ -664,13 +664,13 @@ bool SafePointNode::needs_polling_address_input() {
 }

 void emit_nop(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ z_nop();
 }

 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 void emit_break(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ z_illtrap();
 }

@ -735,7 +735,7 @@ static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {

 // Check effective address (at runtime) for required alignment.
 static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  __ z_lay(Z_R0, disp, index, base);
  __ z_nill(Z_R0, alignment-1);
@ -743,7 +743,7 @@ static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index,
  __ z_illtrap();
 }

-int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
+int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
                    PhaseRegAlloc* ra_, bool is_native_call = false) {
  __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
  address old_mark = __ inst_mark();
@ -774,7 +774,7 @@ int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relo
  return (ret_off - start_off);
 }

-static int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
+static int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
  __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
  address old_mark = __ inst_mark();
  unsigned int start_off = __ offset();
@ -807,7 +807,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
 // Even with PC-relative TOC addressing, we still need this node.
 // Float loads/stores do not support PC-relative addresses.
 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Register Rtoc = as_Register(ra_->get_encode(this));
  __ load_toc(Rtoc);
 }
@ -858,7 +858,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {

 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  __ verify_thread();

@ -932,7 +932,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
 #endif

 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Compile* C = ra_->C;
  __ verify_thread();

@ -1034,7 +1034,7 @@ static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigne

 static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+    C2_MacroAssembler _masm(cbuf);
    __ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP);
  }

@ -1108,7 +1108,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
  // Check for integer reg-reg copy.
  if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
    if (cbuf) {
-      MacroAssembler _masm(cbuf);
+      C2_MacroAssembler _masm(cbuf);
      Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
      Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
      __ z_lgr(Rdst, Rsrc);
@ -1155,7 +1155,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
  // Check for float reg-reg copy.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
    if (cbuf) {
-      MacroAssembler _masm(cbuf);
+      C2_MacroAssembler _masm(cbuf);
      FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
      FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
      __ z_ldr(Rdst, Rsrc);
@ -1254,7 +1254,7 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const {
 #endif

 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  int rem_space = 0;
  if (!(ra_->C->output()->in_scratch_emit_size())) {
@ -1294,7 +1294,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const {

 // Take care of the size function, if you make changes here!
 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_encode(this);
@ -1360,7 +1360,7 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
 #endif

 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  const int ic_miss_offset = 2;

  // Inline_cache contains a klass.
@ -1455,7 +1455,7 @@ source %{
 //   aware of the exception.
 int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
  Register temp_reg = Z_R1;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
@ -1476,7 +1476,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {

 // Emit deopt handler code.
 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  address        base = __ start_a_stub(size_deopt_handler());

  if (base == NULL) {
@ -1841,13 +1841,13 @@ void Compile::reshape_address(AddPNode* addp) {
 // needs for encoding need to be specified.
 encode %{
  enc_class enc_unimplemented %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
  %}

  enc_class enc_untested %{
 #ifdef ASSERT
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ untested("Untested mach node encoding in AD file.");
 #endif
  %}
@ -2033,7 +2033,7 @@ encode %{
                  Assembler::reg(Ridx, 12, 48) |
                  Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
    } else {
-        MacroAssembler _masm(&cbuf);
+        C2_MacroAssembler _masm(&cbuf);
        __ load_const_optimized(Z_R1_scratch, $mem$$disp);
        if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); }
        z_emit_inst(cbuf, $secondary |
@ -2045,7 +2045,7 @@ encode %{
  %}

  enc_class z_enc_brul(Label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2058,7 +2058,7 @@ encode %{
  %}

  enc_class z_enc_bru(Label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2071,7 +2071,7 @@ encode %{
  %}

  enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2084,7 +2084,7 @@ encode %{
  %}

  enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2097,7 +2097,7 @@ encode %{
  %}

  enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2121,7 +2121,7 @@ encode %{
  %}

  enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2147,7 +2147,7 @@ encode %{
  %}

  enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2172,7 +2172,7 @@ encode %{
  %}

  enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* p = $lbl$$label;

    // 'p' is `NULL' when this encoding class is used only to
@ -2199,7 +2199,7 @@ encode %{

  // Call from Java to runtime.
  enc_class z_enc_java_to_runtime_call(method meth) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    // Save return pc before call to the place where we need it, since
    // callee doesn't.
@ -2227,7 +2227,7 @@ encode %{
  enc_class z_enc_java_static_call(method meth) %{
    // Call to fixup routine. Fixup routine uses ScopeDesc info to determine
    // whom we intended to call.
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    int ret_offset = 0;

    if (!_method) {
@ -2256,7 +2256,7 @@ encode %{

  // Java dynamic call
  enc_class z_enc_java_dynamic_call(method meth) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    unsigned int start_off = __ offset();

    int vtable_index = this->_vtable_index;
@ -2311,7 +2311,7 @@ encode %{
  %}

  enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdst = reg_to_register_object($dst$$reg);
    Register Rsrc = reg_to_register_object($src$$reg);

@ -2332,7 +2332,7 @@ encode %{
  %}

  enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdst = reg_to_register_object($dst$$reg);
    int      Csrc = $src$$constant;
    Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
@ -2349,7 +2349,7 @@ encode %{
  %}

  enc_class z_enc_cctobool(iRegI res) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rres = reg_to_register_object($res$$reg);

    if (VM_Version::has_LoadStoreConditional()) {
@ -2366,7 +2366,7 @@ encode %{
  %}

  enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rcomp = reg_to_register_object($compare_value$$reg);
    Register Rnew  = reg_to_register_object($exchange_value$$reg);
    Register Raddr = reg_to_register_object($addr_ptr$$reg);
@ -2375,7 +2375,7 @@ encode %{
  %}

  enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rcomp = reg_to_register_object($compare_value$$reg);
    Register Rnew  = reg_to_register_object($exchange_value$$reg);
    Register Raddr = reg_to_register_object($addr_ptr$$reg);
@ -2384,7 +2384,7 @@ encode %{
  %}

  enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdst = reg_to_register_object($dst$$reg);
    Register Rtmp = reg_to_register_object($tmp$$reg);
    guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
@ -2400,7 +2400,7 @@ encode %{
  %}

  enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdst = reg_to_register_object($dst$$reg);
    Register Rtmp = reg_to_register_object($tmp$$reg);
    guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
--- a/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp
+++ b/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp
@ -0,0 +1,526 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "oops/arrayOop.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "opto/intrinsicnode.hpp"
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
+void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
+                                           Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                                           FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
+  Label Lloop, Lslow;
+  assert(UseVIS >= 3, "VIS3 is required");
+  assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
+  assert_different_registers(ftmp1, ftmp2, ftmp3);
+
+  // Check if cnt >= 8 (= 16 bytes)
+  cmp(cnt, 8);
+  br(Assembler::less, false, Assembler::pn, Lslow);
+  delayed()->mov(cnt, result); // copy count
+
+  // Check for 8-byte alignment of src and dst
+  or3(src, dst, tmp1);
+  andcc(tmp1, 7, G0);
+  br(Assembler::notZero, false, Assembler::pn, Lslow);
+  delayed()->nop();
+
+  // Set mask for bshuffle instruction
+  Register mask = tmp4;
+  set(0x13579bdf, mask);
+  bmask(mask, G0, G0);
+
+  // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
+  Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
+  add(mask, 0x300, mask);             // mask = 0x0000 0000 ff00 ff00
+  sllx(mask, 32, tmp1);               // tmp1 = 0xff00 ff00 0000 0000
+  or3(mask, tmp1, mask);              // mask = 0xff00 ff00 ff00 ff00
+
+  // Load first 8 bytes
+  ldx(src, 0, tmp1);
+
+  bind(Lloop);
+  // Load next 8 bytes
+  ldx(src, 8, tmp2);
+
+  // Check for non-latin1 character by testing if the most significant byte of a char is set.
+  // Although we have to move the data between integer and floating point registers, this is
+  // still faster than the corresponding VIS instructions (ford/fand/fcmpd).
+  or3(tmp1, tmp2, tmp3);
+  btst(tmp3, mask);
+  // annul zeroing if branch is not taken to preserve original count
+  brx(Assembler::notZero, true, Assembler::pn, Ldone);
+  delayed()->mov(G0, result); // 0 - failed
+
+  // Move bytes into float register
+  movxtod(tmp1, ftmp1);
+  movxtod(tmp2, ftmp2);
+
+  // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
+  bshuffle(ftmp1, ftmp2, ftmp3);
+  stf(FloatRegisterImpl::D, ftmp3, dst, 0);
+
+  // Increment addresses and decrement count
+  inc(src, 16);
+  inc(dst, 8);
+  dec(cnt, 8);
+
+  cmp(cnt, 8);
+  // annul LDX if branch is not taken to prevent access past end of string
+  br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
+  delayed()->ldx(src, 0, tmp1);
+
+  // Fallback to slow version
+  bind(Lslow);
+}
+
+// Compress char[] to byte[]. Return 0 on failure.
+void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
+  Label Lloop;
+  assert_different_registers(src, dst, cnt, tmp, result);
+
+  lduh(src, 0, tmp);
+
+  bind(Lloop);
+  inc(src, sizeof(jchar));
+  cmp(tmp, 0xff);
+  // annul zeroing if branch is not taken to preserve original count
+  br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
+  delayed()->mov(G0, result); // 0 - failed
+  deccc(cnt);
+  stb(tmp, dst, 0);
+  inc(dst);
+  // annul LDUH if branch is not taken to prevent access past end of string
+  br(Assembler::notZero, true, Assembler::pt, Lloop);
+  delayed()->lduh(src, 0, tmp); // hoisted
+}
+
+// Inflate byte[] to char[] by inflating 16 bytes at once.
+void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
+                                          FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
+  Label Lloop, Lslow;
+  assert(UseVIS >= 3, "VIS3 is required");
+  assert_different_registers(src, dst, cnt, tmp);
+  assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
+
+  // Check if cnt >= 8 (= 16 bytes)
+  cmp(cnt, 8);
+  br(Assembler::less, false, Assembler::pn, Lslow);
+  delayed()->nop();
+
+  // Check for 8-byte alignment of src and dst
+  or3(src, dst, tmp);
+  andcc(tmp, 7, G0);
+  br(Assembler::notZero, false, Assembler::pn, Lslow);
+  // Initialize float register to zero
+  FloatRegister zerof = ftmp4;
+  delayed()->fzero(FloatRegisterImpl::D, zerof);
+
+  // Load first 8 bytes
+  ldf(FloatRegisterImpl::D, src, 0, ftmp1);
+
+  bind(Lloop);
+  inc(src, 8);
+  dec(cnt, 8);
+
+  // Inflate the string by interleaving each byte from the source array
+  // with a zero byte and storing the result in the destination array.
+  fpmerge(zerof, ftmp1->successor(), ftmp2);
+  stf(FloatRegisterImpl::D, ftmp2, dst, 8);
+  fpmerge(zerof, ftmp1, ftmp3);
+  stf(FloatRegisterImpl::D, ftmp3, dst, 0);
+
+  inc(dst, 16);
+
+  cmp(cnt, 8);
+  // annul LDX if branch is not taken to prevent access past end of string
+  br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
+  delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
+
+  // Fallback to slow version
+  bind(Lslow);
+}
+
+// Inflate byte[] to char[].
+void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
+  Label Loop;
+  assert_different_registers(src, dst, cnt, tmp);
+
+  ldub(src, 0, tmp);
+  bind(Loop);
+  inc(src);
+  deccc(cnt);
+  sth(tmp, dst, 0);
+  inc(dst, sizeof(jchar));
+  // annul LDUB if branch is not taken to prevent access past end of string
+  br(Assembler::notZero, true, Assembler::pt, Loop);
+  delayed()->ldub(src, 0, tmp); // hoisted
+}
+
+void C2_MacroAssembler::string_compare(Register str1, Register str2,
+                                       Register cnt1, Register cnt2,
+                                       Register tmp1, Register tmp2,
+                                       Register result, int ae) {
+  Label Ldone, Lloop;
+  assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
+  int stride1, stride2;
+
+  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
+  // we interchange str1 and str2 in the UL case and negate the result.
+  // Like this, str1 is always latin1 encoded, expect for the UU case.
+
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    srl(cnt2, 1, cnt2);
+  }
+
+  // See if the lengths are different, and calculate min in cnt1.
+  // Save diff in case we need it for a tie-breaker.
+  Label Lskip;
+  Register diff = tmp1;
+  subcc(cnt1, cnt2, diff);
+  br(Assembler::greater, true, Assembler::pt, Lskip);
+  // cnt2 is shorter, so use its count:
+  delayed()->mov(cnt2, cnt1);
+  bind(Lskip);
+
+  // Rename registers
+  Register limit1 = cnt1;
+  Register limit2 = limit1;
+  Register chr1   = result;
+  Register chr2   = cnt2;
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    // We need an additional register to keep track of two limits
+    assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
+    limit2 = tmp2;
+  }
+
+  // Is the minimum length zero?
+  cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
+  br(Assembler::equal, true, Assembler::pn, Ldone);
+  // result is difference in lengths
+  if (ae == StrIntrinsicNode::UU) {
+    delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
+  } else {
+    delayed()->mov(diff, result);
+  }
+
+  // Load first characters
+  if (ae == StrIntrinsicNode::LL) {
+    stride1 = stride2 = sizeof(jbyte);
+    ldub(str1, 0, chr1);
+    ldub(str2, 0, chr2);
+  } else if (ae == StrIntrinsicNode::UU) {
+    stride1 = stride2 = sizeof(jchar);
+    lduh(str1, 0, chr1);
+    lduh(str2, 0, chr2);
+  } else {
+    stride1 = sizeof(jbyte);
+    stride2 = sizeof(jchar);
+    ldub(str1, 0, chr1);
+    lduh(str2, 0, chr2);
+  }
+
+  // Compare first characters
+  subcc(chr1, chr2, chr1);
+  br(Assembler::notZero, false, Assembler::pt, Ldone);
+  assert(chr1 == result, "result must be pre-placed");
+  delayed()->nop();
+
+  // Check if the strings start at same location
+  cmp(str1, str2);
+  brx(Assembler::equal, true, Assembler::pn, Ldone);
+  delayed()->mov(G0, result);  // result is zero
+
+  // We have no guarantee that on 64 bit the higher half of limit is 0
+  signx(limit1);
+
+  // Get limit
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    sll(limit1, 1, limit2);
+    subcc(limit2, stride2, chr2);
+  }
+  subcc(limit1, stride1, chr1);
+  br(Assembler::zero, true, Assembler::pn, Ldone);
+  // result is difference in lengths
+  if (ae == StrIntrinsicNode::UU) {
+    delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
+  } else {
+    delayed()->mov(diff, result);
+  }
+
+  // Shift str1 and str2 to the end of the arrays, negate limit
+  add(str1, limit1, str1);
+  add(str2, limit2, str2);
+  neg(chr1, limit1);  // limit1 = -(limit1-stride1)
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    neg(chr2, limit2);  // limit2 = -(limit2-stride2)
+  }
+
+  // Compare the rest of the characters
+  load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
+
+  bind(Lloop);
+  load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false);
+
+  subcc(chr1, chr2, chr1);
+  br(Assembler::notZero, false, Assembler::pt, Ldone);
+  assert(chr1 == result, "result must be pre-placed");
+  delayed()->inccc(limit1, stride1);
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    inccc(limit2, stride2);
+  }
+
+  // annul LDUB if branch is not taken to prevent access past end of string
+  br(Assembler::notZero, true, Assembler::pt, Lloop);
+  delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
+
+  // If strings are equal up to min length, return the length difference.
+  if (ae == StrIntrinsicNode::UU) {
+    // Divide by 2 to get number of chars
+    sra(diff, 1, result);
+  } else {
+    mov(diff, result);
+  }
+
+  // Otherwise, return the difference between the first mismatched chars.
+  bind(Ldone);
+  if(ae == StrIntrinsicNode::UL) {
+    // Negate result (see note above)
+    neg(result);
+  }
+}
+
+void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
+                                     Register limit, Register tmp, Register result, bool is_byte) {
+  Label Ldone, Lloop, Lremaining;
+  assert_different_registers(ary1, ary2, limit, tmp, result);
+
+  int length_offset  = arrayOopDesc::length_offset_in_bytes();
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
+  assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned");
+
+  if (is_array_equ) {
+    // return true if the same array
+    cmp(ary1, ary2);
+    brx(Assembler::equal, true, Assembler::pn, Ldone);
+    delayed()->mov(1, result);  // equal
+
+    br_null(ary1, true, Assembler::pn, Ldone);
+    delayed()->clr(result);     // not equal
+
+    br_null(ary2, true, Assembler::pn, Ldone);
+    delayed()->clr(result);     // not equal
+
+    // load the lengths of arrays
+    ld(Address(ary1, length_offset), limit);
+    ld(Address(ary2, length_offset), tmp);
+
+    // return false if the two arrays are not equal length
+    cmp(limit, tmp);
+    br(Assembler::notEqual, true, Assembler::pn, Ldone);
+    delayed()->clr(result);     // not equal
+  }
+
+  cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
+  delayed()->mov(1, result); // zero-length arrays are equal
+
+  if (is_array_equ) {
+    // load array addresses
+    add(ary1, base_offset, ary1);
+    add(ary2, base_offset, ary2);
+    // set byte count
+    if (!is_byte) {
+      sll(limit, exact_log2(sizeof(jchar)), limit);
+    }
+  } else {
+    // We have no guarantee that on 64 bit the higher half of limit is 0
+    signx(limit);
+  }
+
+#ifdef ASSERT
+  // Sanity check for doubleword (8-byte) alignment of ary1 and ary2.
+  // Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()).
+  Label Laligned;
+  or3(ary1, ary2, tmp);
+  andcc(tmp, 7, tmp);
+  br_null_short(tmp, Assembler::pn, Laligned);
+  STOP("First array element is not 8-byte aligned.");
+  should_not_reach_here();
+  bind(Laligned);
+#endif
+
+  // Shift ary1 and ary2 to the end of the arrays, negate limit
+  add(ary1, limit, ary1);
+  add(ary2, limit, ary2);
+  neg(limit, limit);
+
+  // MAIN LOOP
+  // Load and compare array elements of size 'byte_width' until the elements are not
+  // equal or we reached the end of the arrays. If the size of the arrays is not a
+  // multiple of 'byte_width', we simply read over the end of the array, bail out and
+  // compare the remaining bytes below by skipping the garbage bytes.
+  ldx(ary1, limit, result);
+  bind(Lloop);
+  ldx(ary2, limit, tmp);
+  inccc(limit, 8);
+  // Bail out if we reached the end (but still do the comparison)
+  br(Assembler::positive, false, Assembler::pn, Lremaining);
+  delayed()->cmp(result, tmp);
+  // Check equality of elements
+  brx(Assembler::equal, false, Assembler::pt, target(Lloop));
+  delayed()->ldx(ary1, limit, result);
+
+  ba(Ldone);
+  delayed()->clr(result); // not equal
+
+  // TAIL COMPARISON
+  // We got here because we reached the end of the arrays. 'limit' is the number of
+  // garbage bytes we may have compared by reading over the end of the arrays. Shift
+  // out the garbage and compare the remaining elements.
+  bind(Lremaining);
+  // Optimistic shortcut: elements potentially including garbage are equal
+  brx(Assembler::equal, true, Assembler::pt, target(Ldone));
+  delayed()->mov(1, result); // equal
+  // Shift 'limit' bytes to the right and compare
+  sll(limit, 3, limit); // bytes to bits
+  srlx(result, limit, result);
+  srlx(tmp, limit, tmp);
+  cmp(result, tmp);
+  clr(result);
+  movcc(Assembler::equal, false, xcc, 1, result);
+
+  bind(Ldone);
+}
+
+void C2_MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
+
+  // test for negative bytes in input string of a given size
+  // result 1 if found, 0 otherwise.
+
+  Label Lcore, Ltail, Lreturn, Lcore_rpt;
+
+  assert_different_registers(inp, size, t2, t3, t4, t5, result);
+
+  Register i     = result;  // result used as integer index i until very end
+  Register lmask = t2;      // t2 is aliased to lmask
+
+  // INITIALIZATION
+  // ===========================================================
+  // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
+  // compute unaligned offset -> i
+  // compute core end index   -> t5
+  Assembler::sethi(0x80808000, t2);   //! sethi macro fails to emit optimal
+  add(t2, 0x80, t2);
+  sllx(t2, 32, t3);
+  or3(t3, t2, lmask);                 // 0x8080808080808080 -> lmask
+  sra(size,0,size);
+  andcc(inp, 0x7, i);                 // unaligned offset -> i
+  br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
+  delayed()->add(size, -8, t5);       // (annuled) core end index -> t5
+
+  // ===========================================================
+
+  // UNALIGNED HEAD
+  // ===========================================================
+  // * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
+  // * obliterate (ignore) bytes outside string by shifting off reg ends
+  // * compare with bitmask, short circuit return true if one or more high
+  //   bits set.
+  cmp(size, 0);
+  br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
+  delayed()->mov(0,result);      // annuled so i not clobbered for following
+  neg(i, t4);
+  add(i, size, t5);
+  ldx(inp, t4, t3);  // raw aligned 8B containing unaligned head -> t3
+  mov(8, t4);
+  sub(t4, t5, t4);
+  sra(t4, 31, t5);
+  andn(t4, t5, t5);
+  add(i, t5, t4);
+  sll(t5, 3, t5);
+  sll(t4, 3, t4);   // # bits to shift right, left -> t5,t4
+  srlx(t3, t5, t3);
+  sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
+  andcc(lmask, t3, G0);
+  brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
+  delayed()->mov(1,result);      // annuled so i not clobbered for following
+  add(size, -8, t5);             // core end index -> t5
+  mov(8, t4);
+  sub(t4, i, i);                 // # bytes examined in unalgn head (<8) -> i
+  // ===========================================================
+
+  // ALIGNED CORE
+  // ===========================================================
+  // * iterate index i over aligned 8B sections of core, comparing with
+  //   bitmask, short circuit return true if one or more high bits set
+  // t5 contains core end index/loop limit which is the index
+  //     of the MSB of last (unaligned) 8B fully contained in the string.
+  // inp   contains address of first byte in string/array
+  // lmask contains 8B high bit mask for comparison
+  // i     contains next index to be processed (adr. inp+i is on 8B boundary)
+  bind(Lcore);
+  cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
+  bind(Lcore_rpt);
+  ldx(inp, i, t3);
+  andcc(t3, lmask, G0);
+  brx(Assembler::notZero, true, Assembler::pn, Lreturn);
+  delayed()->mov(1, result);    // annuled so i not clobbered for following
+  add(i, 8, i);
+  cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
+  // ===========================================================
+
+  // ALIGNED TAIL (<8B)
+  // ===========================================================
+  // handle aligned tail of 7B or less as complete 8B, obliterating end of
+  // string bytes by shifting them off end, compare what's left with bitmask
+  // inp   contains address of first byte in string/array
+  // lmask contains 8B high bit mask for comparison
+  // i     contains next index to be processed (adr. inp+i is on 8B boundary)
+  bind(Ltail);
+  subcc(size, i, t4);   // # of remaining bytes in string -> t4
+  // return 0 if no more remaining bytes
+  br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
+  delayed()->mov(0, result); // annuled so i not clobbered for following
+  ldx(inp, i, t3);       // load final 8B (aligned) containing tail -> t3
+  mov(8, t5);
+  sub(t5, t4, t4);
+  mov(0, result);        // ** i clobbered at this point
+  sll(t4, 3, t4);        // bits beyond end of string          -> t4
+  srlx(t3, t4, t3);      // bytes beyond end now obliterated   -> t3
+  andcc(lmask, t3, G0);
+  movcc(Assembler::notZero, false, xcc,  1, result);
+  bind(Lreturn);
+}
+
--- a/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp
+++ b/src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp
@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
+#define CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+ public:
+  // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
+  void string_compress_16(Register src, Register dst, Register cnt, Register result,
+                          Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                          FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
+
+  // Compress char[] to byte[]. Return 0 on failure.
+  void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
+
+  // Inflate byte[] to char[] by inflating 16 bytes at once.
+  void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
+                         FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
+
+  // Inflate byte[] to char[].
+  void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
+
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      Register tmp1, Register tmp2,
+                      Register result, int ae);
+
+  void array_equals(bool is_array_equ, Register ary1, Register ary2,
+                    Register limit, Register tmp, Register result, bool is_byte);
+  // test for negative bytes in input string of a given size, result 0 if none
+  void has_negatives(Register inp, Register size, Register result,
+                     Register t2, Register t3, Register t4,
+                     Register t5);
+
+#endif // CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
@ -49,9 +49,6 @@
 #include "utilities/align.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/powerOfTwo.hpp"
-#ifdef COMPILER2
-#include "opto/intrinsicnode.hpp"
-#endif

 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@ -3436,498 +3433,6 @@ void MacroAssembler::reinit_heapbase() {
  }
 }

-#ifdef COMPILER2
-
-// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
-void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
-                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-                                        FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
-  Label Lloop, Lslow;
-  assert(UseVIS >= 3, "VIS3 is required");
-  assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
-  assert_different_registers(ftmp1, ftmp2, ftmp3);
-
-  // Check if cnt >= 8 (= 16 bytes)
-  cmp(cnt, 8);
-  br(Assembler::less, false, Assembler::pn, Lslow);
-  delayed()->mov(cnt, result); // copy count
-
-  // Check for 8-byte alignment of src and dst
-  or3(src, dst, tmp1);
-  andcc(tmp1, 7, G0);
-  br(Assembler::notZero, false, Assembler::pn, Lslow);
-  delayed()->nop();
-
-  // Set mask for bshuffle instruction
-  Register mask = tmp4;
-  set(0x13579bdf, mask);
-  bmask(mask, G0, G0);
-
-  // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
-  Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
-  add(mask, 0x300, mask);             // mask = 0x0000 0000 ff00 ff00
-  sllx(mask, 32, tmp1);               // tmp1 = 0xff00 ff00 0000 0000
-  or3(mask, tmp1, mask);              // mask = 0xff00 ff00 ff00 ff00
-
-  // Load first 8 bytes
-  ldx(src, 0, tmp1);
-
-  bind(Lloop);
-  // Load next 8 bytes
-  ldx(src, 8, tmp2);
-
-  // Check for non-latin1 character by testing if the most significant byte of a char is set.
-  // Although we have to move the data between integer and floating point registers, this is
-  // still faster than the corresponding VIS instructions (ford/fand/fcmpd).
-  or3(tmp1, tmp2, tmp3);
-  btst(tmp3, mask);
-  // annul zeroing if branch is not taken to preserve original count
-  brx(Assembler::notZero, true, Assembler::pn, Ldone);
-  delayed()->mov(G0, result); // 0 - failed
-
-  // Move bytes into float register
-  movxtod(tmp1, ftmp1);
-  movxtod(tmp2, ftmp2);
-
-  // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
-  bshuffle(ftmp1, ftmp2, ftmp3);
-  stf(FloatRegisterImpl::D, ftmp3, dst, 0);
-
-  // Increment addresses and decrement count
-  inc(src, 16);
-  inc(dst, 8);
-  dec(cnt, 8);
-
-  cmp(cnt, 8);
-  // annul LDX if branch is not taken to prevent access past end of string
-  br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
-  delayed()->ldx(src, 0, tmp1);
-
-  // Fallback to slow version
-  bind(Lslow);
-}
-
-// Compress char[] to byte[]. Return 0 on failure.
-void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
-  Label Lloop;
-  assert_different_registers(src, dst, cnt, tmp, result);
-
-  lduh(src, 0, tmp);
-
-  bind(Lloop);
-  inc(src, sizeof(jchar));
-  cmp(tmp, 0xff);
-  // annul zeroing if branch is not taken to preserve original count
-  br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
-  delayed()->mov(G0, result); // 0 - failed
-  deccc(cnt);
-  stb(tmp, dst, 0);
-  inc(dst);
-  // annul LDUH if branch is not taken to prevent access past end of string
-  br(Assembler::notZero, true, Assembler::pt, Lloop);
-  delayed()->lduh(src, 0, tmp); // hoisted
-}
-
-// Inflate byte[] to char[] by inflating 16 bytes at once.
-void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
-                                       FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
-  Label Lloop, Lslow;
-  assert(UseVIS >= 3, "VIS3 is required");
-  assert_different_registers(src, dst, cnt, tmp);
-  assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
-
-  // Check if cnt >= 8 (= 16 bytes)
-  cmp(cnt, 8);
-  br(Assembler::less, false, Assembler::pn, Lslow);
-  delayed()->nop();
-
-  // Check for 8-byte alignment of src and dst
-  or3(src, dst, tmp);
-  andcc(tmp, 7, G0);
-  br(Assembler::notZero, false, Assembler::pn, Lslow);
-  // Initialize float register to zero
-  FloatRegister zerof = ftmp4;
-  delayed()->fzero(FloatRegisterImpl::D, zerof);
-
-  // Load first 8 bytes
-  ldf(FloatRegisterImpl::D, src, 0, ftmp1);
-
-  bind(Lloop);
-  inc(src, 8);
-  dec(cnt, 8);
-
-  // Inflate the string by interleaving each byte from the source array
-  // with a zero byte and storing the result in the destination array.
-  fpmerge(zerof, ftmp1->successor(), ftmp2);
-  stf(FloatRegisterImpl::D, ftmp2, dst, 8);
-  fpmerge(zerof, ftmp1, ftmp3);
-  stf(FloatRegisterImpl::D, ftmp3, dst, 0);
-
-  inc(dst, 16);
-
-  cmp(cnt, 8);
-  // annul LDX if branch is not taken to prevent access past end of string
-  br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
-  delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
-
-  // Fallback to slow version
-  bind(Lslow);
-}
-
-// Inflate byte[] to char[].
-void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
-  Label Loop;
-  assert_different_registers(src, dst, cnt, tmp);
-
-  ldub(src, 0, tmp);
-  bind(Loop);
-  inc(src);
-  deccc(cnt);
-  sth(tmp, dst, 0);
-  inc(dst, sizeof(jchar));
-  // annul LDUB if branch is not taken to prevent access past end of string
-  br(Assembler::notZero, true, Assembler::pt, Loop);
-  delayed()->ldub(src, 0, tmp); // hoisted
-}
-
-void MacroAssembler::string_compare(Register str1, Register str2,
-                                    Register cnt1, Register cnt2,
-                                    Register tmp1, Register tmp2,
-                                    Register result, int ae) {
-  Label Ldone, Lloop;
-  assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
-  int stride1, stride2;
-
-  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
-  // we interchange str1 and str2 in the UL case and negate the result.
-  // Like this, str1 is always latin1 encoded, expect for the UU case.
-
-  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
-    srl(cnt2, 1, cnt2);
-  }
-
-  // See if the lengths are different, and calculate min in cnt1.
-  // Save diff in case we need it for a tie-breaker.
-  Label Lskip;
-  Register diff = tmp1;
-  subcc(cnt1, cnt2, diff);
-  br(Assembler::greater, true, Assembler::pt, Lskip);
-  // cnt2 is shorter, so use its count:
-  delayed()->mov(cnt2, cnt1);
-  bind(Lskip);
-
-  // Rename registers
-  Register limit1 = cnt1;
-  Register limit2 = limit1;
-  Register chr1   = result;
-  Register chr2   = cnt2;
-  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
-    // We need an additional register to keep track of two limits
-    assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
-    limit2 = tmp2;
-  }
-
-  // Is the minimum length zero?
-  cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
-  br(Assembler::equal, true, Assembler::pn, Ldone);
-  // result is difference in lengths
-  if (ae == StrIntrinsicNode::UU) {
-    delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
-  } else {
-    delayed()->mov(diff, result);
-  }
-
-  // Load first characters
-  if (ae == StrIntrinsicNode::LL) {
-    stride1 = stride2 = sizeof(jbyte);
-    ldub(str1, 0, chr1);
-    ldub(str2, 0, chr2);
-  } else if (ae == StrIntrinsicNode::UU) {
-    stride1 = stride2 = sizeof(jchar);
-    lduh(str1, 0, chr1);
-    lduh(str2, 0, chr2);
-  } else {
-    stride1 = sizeof(jbyte);
-    stride2 = sizeof(jchar);
-    ldub(str1, 0, chr1);
-    lduh(str2, 0, chr2);
-  }
-
-  // Compare first characters
-  subcc(chr1, chr2, chr1);
-  br(Assembler::notZero, false, Assembler::pt, Ldone);
-  assert(chr1 == result, "result must be pre-placed");
-  delayed()->nop();
-
-  // Check if the strings start at same location
-  cmp(str1, str2);
-  brx(Assembler::equal, true, Assembler::pn, Ldone);
-  delayed()->mov(G0, result);  // result is zero
-
-  // We have no guarantee that on 64 bit the higher half of limit is 0
-  signx(limit1);
-
-  // Get limit
-  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
-    sll(limit1, 1, limit2);
-    subcc(limit2, stride2, chr2);
-  }
-  subcc(limit1, stride1, chr1);
-  br(Assembler::zero, true, Assembler::pn, Ldone);
-  // result is difference in lengths
-  if (ae == StrIntrinsicNode::UU) {
-    delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
-  } else {
-    delayed()->mov(diff, result);
-  }
-
-  // Shift str1 and str2 to the end of the arrays, negate limit
-  add(str1, limit1, str1);
-  add(str2, limit2, str2);
-  neg(chr1, limit1);  // limit1 = -(limit1-stride1)
-  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
-    neg(chr2, limit2);  // limit2 = -(limit2-stride2)
-  }
-
-  // Compare the rest of the characters
-  load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
-
-  bind(Lloop);
-  load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false);
-
-  subcc(chr1, chr2, chr1);
-  br(Assembler::notZero, false, Assembler::pt, Ldone);
-  assert(chr1 == result, "result must be pre-placed");
-  delayed()->inccc(limit1, stride1);
-  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
-    inccc(limit2, stride2);
-  }
-
-  // annul LDUB if branch is not taken to prevent access past end of string
-  br(Assembler::notZero, true, Assembler::pt, Lloop);
-  delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
-
-  // If strings are equal up to min length, return the length difference.
-  if (ae == StrIntrinsicNode::UU) {
-    // Divide by 2 to get number of chars
-    sra(diff, 1, result);
-  } else {
-    mov(diff, result);
-  }
-
-  // Otherwise, return the difference between the first mismatched chars.
-  bind(Ldone);
-  if(ae == StrIntrinsicNode::UL) {
-    // Negate result (see note above)
-    neg(result);
-  }
-}
-
-void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
-                                  Register limit, Register tmp, Register result, bool is_byte) {
-  Label Ldone, Lloop, Lremaining;
-  assert_different_registers(ary1, ary2, limit, tmp, result);
-
-  int length_offset  = arrayOopDesc::length_offset_in_bytes();
-  int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
-  assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned");
-
-  if (is_array_equ) {
-    // return true if the same array
-    cmp(ary1, ary2);
-    brx(Assembler::equal, true, Assembler::pn, Ldone);
-    delayed()->mov(1, result);  // equal
-
-    br_null(ary1, true, Assembler::pn, Ldone);
-    delayed()->clr(result);     // not equal
-
-    br_null(ary2, true, Assembler::pn, Ldone);
-    delayed()->clr(result);     // not equal
-
-    // load the lengths of arrays
-    ld(Address(ary1, length_offset), limit);
-    ld(Address(ary2, length_offset), tmp);
-
-    // return false if the two arrays are not equal length
-    cmp(limit, tmp);
-    br(Assembler::notEqual, true, Assembler::pn, Ldone);
-    delayed()->clr(result);     // not equal
-  }
-
-  cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
-  delayed()->mov(1, result); // zero-length arrays are equal
-
-  if (is_array_equ) {
-    // load array addresses
-    add(ary1, base_offset, ary1);
-    add(ary2, base_offset, ary2);
-    // set byte count
-    if (!is_byte) {
-      sll(limit, exact_log2(sizeof(jchar)), limit);
-    }
-  } else {
-    // We have no guarantee that on 64 bit the higher half of limit is 0
-    signx(limit);
-  }
-
-#ifdef ASSERT
-  // Sanity check for doubleword (8-byte) alignment of ary1 and ary2.
-  // Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()).
-  Label Laligned;
-  or3(ary1, ary2, tmp);
-  andcc(tmp, 7, tmp);
-  br_null_short(tmp, Assembler::pn, Laligned);
-  STOP("First array element is not 8-byte aligned.");
-  should_not_reach_here();
-  bind(Laligned);
-#endif
-
-  // Shift ary1 and ary2 to the end of the arrays, negate limit
-  add(ary1, limit, ary1);
-  add(ary2, limit, ary2);
-  neg(limit, limit);
-
-  // MAIN LOOP
-  // Load and compare array elements of size 'byte_width' until the elements are not
-  // equal or we reached the end of the arrays. If the size of the arrays is not a
-  // multiple of 'byte_width', we simply read over the end of the array, bail out and
-  // compare the remaining bytes below by skipping the garbage bytes.
-  ldx(ary1, limit, result);
-  bind(Lloop);
-  ldx(ary2, limit, tmp);
-  inccc(limit, 8);
-  // Bail out if we reached the end (but still do the comparison)
-  br(Assembler::positive, false, Assembler::pn, Lremaining);
-  delayed()->cmp(result, tmp);
-  // Check equality of elements
-  brx(Assembler::equal, false, Assembler::pt, target(Lloop));
-  delayed()->ldx(ary1, limit, result);
-
-  ba(Ldone);
-  delayed()->clr(result); // not equal
-
-  // TAIL COMPARISON
-  // We got here because we reached the end of the arrays. 'limit' is the number of
-  // garbage bytes we may have compared by reading over the end of the arrays. Shift
-  // out the garbage and compare the remaining elements.
-  bind(Lremaining);
-  // Optimistic shortcut: elements potentially including garbage are equal
-  brx(Assembler::equal, true, Assembler::pt, target(Ldone));
-  delayed()->mov(1, result); // equal
-  // Shift 'limit' bytes to the right and compare
-  sll(limit, 3, limit); // bytes to bits
-  srlx(result, limit, result);
-  srlx(tmp, limit, tmp);
-  cmp(result, tmp);
-  clr(result);
-  movcc(Assembler::equal, false, xcc, 1, result);
-
-  bind(Ldone);
-}
-
-void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
-
-  // test for negative bytes in input string of a given size
-  // result 1 if found, 0 otherwise.
-
-  Label Lcore, Ltail, Lreturn, Lcore_rpt;
-
-  assert_different_registers(inp, size, t2, t3, t4, t5, result);
-
-  Register i     = result;  // result used as integer index i until very end
-  Register lmask = t2;      // t2 is aliased to lmask
-
-  // INITIALIZATION
-  // ===========================================================
-  // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
-  // compute unaligned offset -> i
-  // compute core end index   -> t5
-  Assembler::sethi(0x80808000, t2);   //! sethi macro fails to emit optimal
-  add(t2, 0x80, t2);
-  sllx(t2, 32, t3);
-  or3(t3, t2, lmask);                 // 0x8080808080808080 -> lmask
-  sra(size,0,size);
-  andcc(inp, 0x7, i);                 // unaligned offset -> i
-  br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
-  delayed()->add(size, -8, t5);       // (annuled) core end index -> t5
-
-  // ===========================================================
-
-  // UNALIGNED HEAD
-  // ===========================================================
-  // * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
-  // * obliterate (ignore) bytes outside string by shifting off reg ends
-  // * compare with bitmask, short circuit return true if one or more high
-  //   bits set.
-  cmp(size, 0);
-  br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
-  delayed()->mov(0,result);      // annuled so i not clobbered for following
-  neg(i, t4);
-  add(i, size, t5);
-  ldx(inp, t4, t3);  // raw aligned 8B containing unaligned head -> t3
-  mov(8, t4);
-  sub(t4, t5, t4);
-  sra(t4, 31, t5);
-  andn(t4, t5, t5);
-  add(i, t5, t4);
-  sll(t5, 3, t5);
-  sll(t4, 3, t4);   // # bits to shift right, left -> t5,t4
-  srlx(t3, t5, t3);
-  sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
-  andcc(lmask, t3, G0);
-  brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
-  delayed()->mov(1,result);      // annuled so i not clobbered for following
-  add(size, -8, t5);             // core end index -> t5
-  mov(8, t4);
-  sub(t4, i, i);                 // # bytes examined in unalgn head (<8) -> i
-  // ===========================================================
-
-  // ALIGNED CORE
-  // ===========================================================
-  // * iterate index i over aligned 8B sections of core, comparing with
-  //   bitmask, short circuit return true if one or more high bits set
-  // t5 contains core end index/loop limit which is the index
-  //     of the MSB of last (unaligned) 8B fully contained in the string.
-  // inp   contains address of first byte in string/array
-  // lmask contains 8B high bit mask for comparison
-  // i     contains next index to be processed (adr. inp+i is on 8B boundary)
-  bind(Lcore);
-  cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
-  bind(Lcore_rpt);
-  ldx(inp, i, t3);
-  andcc(t3, lmask, G0);
-  brx(Assembler::notZero, true, Assembler::pn, Lreturn);
-  delayed()->mov(1, result);    // annuled so i not clobbered for following
-  add(i, 8, i);
-  cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
-  // ===========================================================
-
-  // ALIGNED TAIL (<8B)
-  // ===========================================================
-  // handle aligned tail of 7B or less as complete 8B, obliterating end of
-  // string bytes by shifting them off end, compare what's left with bitmask
-  // inp   contains address of first byte in string/array
-  // lmask contains 8B high bit mask for comparison
-  // i     contains next index to be processed (adr. inp+i is on 8B boundary)
-  bind(Ltail);
-  subcc(size, i, t4);   // # of remaining bytes in string -> t4
-  // return 0 if no more remaining bytes
-  br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
-  delayed()->mov(0, result); // annuled so i not clobbered for following
-  ldx(inp, i, t3);       // load final 8B (aligned) containing tail -> t3
-  mov(8, t5);
-  sub(t5, t4, t4);
-  mov(0, result);        // ** i clobbered at this point
-  sll(t4, 3, t4);        // bits beyond end of string          -> t4
-  srlx(t3, t4, t3);      // bytes beyond end now obliterated   -> t3
-  andcc(lmask, t3, G0);
-  movcc(Assembler::notZero, false, xcc,  1, result);
-  bind(Lreturn);
-}
-
-#endif
-
-
 // Use BIS for zeroing (count is in bytes).
 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
  assert(UseBlockZeroing && VM_Version::has_blk_zeroing(), "only works with BIS zeroing");
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
@ -1301,36 +1301,6 @@ public:
  void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
  void inc_counter(int*    counter_addr, Register Rtmp1, Register Rtmp2);

-#ifdef COMPILER2
-  // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
-  void string_compress_16(Register src, Register dst, Register cnt, Register result,
-                          Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-                          FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
-
-  // Compress char[] to byte[]. Return 0 on failure.
-  void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
-
-  // Inflate byte[] to char[] by inflating 16 bytes at once.
-  void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
-                         FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
-
-  // Inflate byte[] to char[].
-  void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
-
-  void string_compare(Register str1, Register str2,
-                      Register cnt1, Register cnt2,
-                      Register tmp1, Register tmp2,
-                      Register result, int ae);
-
-  void array_equals(bool is_array_equ, Register ary1, Register ary2,
-                    Register limit, Register tmp, Register result, bool is_byte);
-  // test for negative bytes in input string of a given size, result 0 if none
-  void has_negatives(Register inp, Register size, Register result,
-                     Register t2, Register t3, Register t4,
-                     Register t5);
-
-#endif
-
  // Use BIS for zeroing
  void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);

--- a/src/hotspot/cpu/sparc/sparc.ad
+++ b/src/hotspot/cpu/sparc/sparc.ad
@ -592,7 +592,7 @@ bool SafePointNode::needs_polling_address_input() {

 // emit an interrupt that is caught by the debugger (for debugging compiler)
 void emit_break(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ breakpoint_trap();
 }

@ -612,7 +612,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {

 // Traceable jump
 void  emit_jmpl(CodeBuffer &cbuf, int jump_target) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Register rdest = reg_to_register_object(jump_target);
  __ JMP(rdest, 0);
  __ delayed()->nop();
@ -620,19 +620,19 @@ void  emit_jmpl(CodeBuffer &cbuf, int jump_target) {

 // Traceable jump and set exception pc
 void  emit_jmpl_set_exception_pc(CodeBuffer &cbuf, int jump_target) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Register rdest = reg_to_register_object(jump_target);
  __ JMP(rdest, 0);
  __ delayed()->add(O7, frame::pc_return_offset, Oissuing_pc );
 }

 void emit_nop(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ nop();
 }

 void emit_illtrap(CodeBuffer &cbuf) {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ illtrap(0);
 }

@ -908,7 +908,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
    disp += STACK_BIAS;
    // Check that stack offset fits, load into O7 if not
    if (!Assembler::is_simm13(disp)) {
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      __ set(disp, O7);
      if (index != R_G0_enc) {
        __ add(O7, reg_to_register_object(index), O7);
@ -932,7 +932,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,

 #ifdef ASSERT
  if (VerifyOops) {
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    if (is_verified_oop_base) {
      __ verify_oop(reg_to_register_object(src1_enc));
    }
@ -960,7 +960,7 @@ void emit_call_reloc(CodeBuffer &cbuf, intptr_t entry_point, RelocationHolder co
  // putting the "mov" instruction in the delay slot but the problem
  // may bite us again at some other point and a cleaner/generic
  // solution using relocations would be needed.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  __ set_inst_mark();

  // We flush the current window just so that there is a valid stack copy
@ -1024,7 +1024,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  Compile* C = ra_->C;
  ConstantTable& constant_table = C->output()->constant_table();
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  Register r = as_Register(ra_->get_encode(this));
  CodeSection* consts_section = __ code()->consts();
@ -1153,7 +1153,7 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {

 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  for (int i = 0; i < OptoPrologueNops; i++) {
    __ nop();
@ -1226,7 +1226,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 #endif

 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Compile* C = ra_->C;

  __ verify_thread();
@ -1534,7 +1534,7 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const {
 #endif

 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  for (int i = 0; i < _count; i += 1) {
    __ nop();
  }
@ -1555,7 +1555,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 #endif

 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()) + STACK_BIAS;
  int reg = ra_->get_encode(this);

@ -1599,7 +1599,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 #endif

 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  Register G5_ic_reg  = reg_to_register_object(Matcher::inline_cache_reg_encode());
  Register temp_reg   = G3;
  assert( G5_ic_reg != temp_reg, "conflicting registers" );
@ -1624,7 +1624,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
  Register temp_reg = G3;
  AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point());
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
@ -1649,7 +1649,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
  // at a poll and everything (including G3) can be live.
  Register temp_reg = L0;
  AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack());
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) {
@ -2007,7 +2007,7 @@ void Compile::reshape_address(AddPNode* addp) {
 encode %{
  enc_class enc_untested %{
 #ifdef ASSERT
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ untested("encoding");
 #endif
  %}
@ -2142,7 +2142,7 @@ encode %{

  /* %%% merge with enc_to_bool */
  enc_class enc_convP2B( iRegI dst, iRegP src ) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register   src_reg = reg_to_register_object($src$$reg);
    Register   dst_reg = reg_to_register_object($dst$$reg);
@ -2151,7 +2151,7 @@ encode %{

  enc_class enc_cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp ) %{
    // (Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)))
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register   p_reg = reg_to_register_object($p$$reg);
    Register   q_reg = reg_to_register_object($q$$reg);
@ -2284,13 +2284,13 @@ encode %{
  %}

  enc_class enc_PartialSubtypeCheck() %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type);
    __ delayed()->nop();
  %}

  enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
@ -2300,7 +2300,7 @@ encode %{
  %}

  enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
@ -2419,13 +2419,13 @@ encode %{
  %}

  enc_class Set32( immI src, iRegI rd ) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ set($src$$constant, reg_to_register_object($rd$$reg));
  %}

  enc_class call_epilog %{
    if( VerifyStackAtCalls ) {
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      int framesize = ra_->C->output()->frame_size_in_bytes();
      Register temp_reg = G3;
      __ add(SP, framesize, temp_reg);
@ -2447,12 +2447,12 @@ encode %{
  %}

  enc_class preserve_SP %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(SP, L7_mh_SP_save);
  %}

  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(L7_mh_SP_save, SP);
  %}

@ -2477,7 +2477,7 @@ encode %{
  %}

  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ set_inst_mark();
    int vtable_index = this->_vtable_index;
    // MachCallDynamicJavaNode::ret_addr_offset uses this same test
@ -2526,7 +2526,7 @@ encode %{
  %}

  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
    Register temp_reg = G3;   // caller must kill G3!  We cannot reuse G5_ic_reg here because
@ -2543,7 +2543,7 @@ encode %{
  %}

 enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdividend = reg_to_register_object($src1$$reg);
    Register Rdivisor = reg_to_register_object($src2$$reg);
    Register Rresult = reg_to_register_object($dst$$reg);
@ -2554,7 +2554,7 @@ enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
 %}

 enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register Rdividend = reg_to_register_object($src1$$reg);
    int divisor = $imm$$constant;
@ -2565,7 +2565,7 @@ enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
 %}

 enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rsrc1 = reg_to_register_object($src1$$reg);
    Register Rsrc2 = reg_to_register_object($src2$$reg);
    Register Rdst  = reg_to_register_object($dst$$reg);
@ -2577,7 +2577,7 @@ enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
 %}

 enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdividend = reg_to_register_object($src1$$reg);
    Register Rdivisor = reg_to_register_object($src2$$reg);
    Register Rresult = reg_to_register_object($dst$$reg);
@ -2594,7 +2594,7 @@ enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch
 %}

 enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register Rdividend = reg_to_register_object($src1$$reg);
    int divisor = $imm$$constant;
@ -2610,7 +2610,7 @@ enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
 %}

 enc_class fabss (sflt_reg dst, sflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
@ -2619,7 +2619,7 @@ enc_class fabss (sflt_reg dst, sflt_reg src) %{
 %}

 enc_class fabsd (dflt_reg dst, dflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2628,7 +2628,7 @@ enc_class fabsd (dflt_reg dst, dflt_reg src) %{
 %}

 enc_class fnegd (dflt_reg dst, dflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2637,7 +2637,7 @@ enc_class fnegd (dflt_reg dst, dflt_reg src) %{
 %}

 enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
@ -2646,7 +2646,7 @@ enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
 %}

 enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2656,7 +2656,7 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{


 enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2667,7 +2667,7 @@ enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
 %}

 enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2678,7 +2678,7 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
 %}

 enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2689,7 +2689,7 @@ enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
 %}

 enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2700,7 +2700,7 @@ enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
 %}

 enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2711,7 +2711,7 @@ enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
 %}

 enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2722,7 +2722,7 @@ enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
 %}

 enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2733,7 +2733,7 @@ enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
 %}

 enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2745,7 +2745,7 @@ enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{


 enc_class fmovs (dflt_reg dst, dflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
@ -2754,7 +2754,7 @@ enc_class fmovs (dflt_reg dst, dflt_reg src) %{
 %}

 enc_class fmovd (dflt_reg dst, dflt_reg src) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2763,7 +2763,7 @@ enc_class fmovd (dflt_reg dst, dflt_reg src) %{
 %}

 enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register Roop  = reg_to_register_object($oop$$reg);
    Register Rbox  = reg_to_register_object($box$$reg);
@ -2779,7 +2779,7 @@ enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
 %}

 enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);

    Register Roop  = reg_to_register_object($oop$$reg);
    Register Rbox  = reg_to_register_object($box$$reg);
@ -2795,7 +2795,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
  %}

  enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rmem = reg_to_register_object($mem$$reg);
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);
@ -2809,7 +2809,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(Rnew, O7);
    __ casx(Rmem, Rold, O7);
    __ cmp( Rold, O7 );
@ -2821,7 +2821,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(Rnew, O7);
    __ cas(Rmem, Rold, O7);
    __ cmp( Rold, O7 );
@ -2833,7 +2833,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ cas(Rmem, Rold, Rnew);
  %}

@ -2843,14 +2843,14 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ casx(Rmem, Rold, Rnew);
  %}

  enc_class enc_lflags_ne_to_boolean( iRegI res ) %{
    Register Rres = reg_to_register_object($res$$reg);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(1, Rres);
    __ movcc( Assembler::notEqual, false, Assembler::xcc, G0, Rres );
  %}
@ -2858,13 +2858,13 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
  enc_class enc_iflags_ne_to_boolean( iRegI res ) %{
    Register Rres = reg_to_register_object($res$$reg);

-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ mov(1, Rres);
    __ movcc( Assembler::notEqual, false, Assembler::icc, G0, Rres );
  %}

  enc_class floating_cmp ( iRegP dst, regF src1, regF src2 ) %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    Register Rdst = reg_to_register_object($dst$$reg);
    FloatRegister Fsrc1 = $primary ? reg_to_SingleFloatRegister_object($src1$$reg)
                                     : reg_to_DoubleFloatRegister_object($src1$$reg);
@ -2880,7 +2880,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    Register temp_reg = G3;
    AddressLiteral rethrow_stub(OptoRuntime::rethrow_stub());
    assert(temp_reg != reg_to_register_object(R_I0_num), "temp must not break oop_reg");
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
 #ifdef ASSERT
    __ save_frame(0);
    AddressLiteral last_rethrow_addrlit(&last_rethrow);
@ -2911,17 +2911,17 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
  %}

  enc_class enc_membar_acquire %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::LoadLoad) );
  %}

  enc_class enc_membar_release %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore) );
  %}

  enc_class enc_membar_volatile %{
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
  %}

--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
+#define CPU_X86_C2_MACROASSEMBLER_X86_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+public:
+  // special instructions for EVEX
+  void setvectmask(Register dst, Register src);
+  void restorevectmask();
+
+  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+  // See full desription in macroAssembler_x86.cpp.
+  void fast_lock(Register obj, Register box, Register tmp,
+                 Register scr, Register cx1, Register cx2,
+                 BiasedLockingCounters* counters,
+                 RTMLockingCounters* rtm_counters,
+                 RTMLockingCounters* stack_rtm_counters,
+                 Metadata* method_data,
+                 bool use_rtm, bool profile_rtm);
+  void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
+
+#if INCLUDE_RTM_OPT
+  void rtm_counters_update(Register abort_status, Register rtm_counters);
+  void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
+  void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data);
+  void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
+                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
+  void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
+  void rtm_stack_locking(Register obj, Register tmp, Register scr,
+                         Register retry_on_abort_count,
+                         RTMLockingCounters* stack_rtm_counters,
+                         Metadata* method_data, bool profile_rtm,
+                         Label& DONE_LABEL, Label& IsInflated);
+  void rtm_inflated_locking(Register obj, Register box, Register tmp,
+                            Register scr, Register retry_on_busy_count,
+                            Register retry_on_abort_count,
+                            RTMLockingCounters* rtm_counters,
+                            Metadata* method_data, bool profile_rtm,
+                            Label& DONE_LABEL);
+#endif
+
+  // Generic instructions support for use in .ad files C2 code generation
+  void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
+  void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
+  void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
+  void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
+  void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
+  void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
+  void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
+  void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
+  void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
+  void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  // Reductions for vectors of ints, longs, floats, and doubles.
+
+  // dst = src1 + reduce(op, src2) using vtmp as temps
+  void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+#ifdef _LP64
+  void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+#endif // _LP64
+
+  // dst = reduce(op, src2) using vtmp as temps
+  void reduce_fp(int opcode, int vlen,
+                 XMMRegister dst, XMMRegister src,
+                 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
+ private:
+  void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+
+  void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+
+#ifdef _LP64
+  void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+#endif // _LP64
+
+  void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
+  void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
+  void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+
+  void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
+  void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+  void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+
+  void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
+  void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
+
+ public:
+
+  void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+                           XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
+
+  // IndexOf strings.
+  // Small strings are loaded through stack if they cross page boundary.
+  void string_indexof(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      int int_cnt2,  Register result,
+                      XMMRegister vec, Register tmp,
+                      int ae);
+
+  // IndexOf for constant substrings with size >= 8 elements
+  // which don't need to be loaded through stack.
+  void string_indexofC8(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      int int_cnt2,  Register result,
+                      XMMRegister vec, Register tmp,
+                      int ae);
+
+    // Smallest code: we don't need to load through stack,
+    // check string tail.
+
+  // helper function for string_compare
+  void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
+                          Address::ScaleFactor scale, Address::ScaleFactor scale1,
+                          Address::ScaleFactor scale2, Register index, int ae);
+  // Compare strings.
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      XMMRegister vec1, int ae);
+
+  // Search for Non-ASCII character (Negative byte value) in a byte array,
+  // return true if it has any and false otherwise.
+  void has_negatives(Register ary1, Register len,
+                     Register result, Register tmp1,
+                     XMMRegister vec1, XMMRegister vec2);
+
+  // Compare char[] or byte[] arrays.
+  void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                     Register limit, Register result, Register chr,
+                     XMMRegister vec1, XMMRegister vec2, bool is_char);
+
+#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@ -158,12 +158,6 @@ class MacroAssembler: public Assembler {
  void incrementq(Register reg, int value = 1);
  void incrementq(Address dst, int value = 1);

-#ifdef COMPILER2
-  // special instructions for EVEX
-  void setvectmask(Register dst, Register src);
-  void restorevectmask();
-#endif
-
  // Support optimal SSE move instructions.
  void movflt(XMMRegister dst, XMMRegister src) {
    if (dst-> encoding() == src->encoding()) return;
@ -681,40 +675,6 @@ class MacroAssembler: public Assembler {
                           Label& done, Label* slow_case = NULL,
                           BiasedLockingCounters* counters = NULL);
  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-#ifdef COMPILER2
-  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
-  // See full desription in macroAssembler_x86.cpp.
-  void fast_lock(Register obj, Register box, Register tmp,
-                 Register scr, Register cx1, Register cx2,
-                 BiasedLockingCounters* counters,
-                 RTMLockingCounters* rtm_counters,
-                 RTMLockingCounters* stack_rtm_counters,
-                 Metadata* method_data,
-                 bool use_rtm, bool profile_rtm);
-  void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
-#if INCLUDE_RTM_OPT
-  void rtm_counters_update(Register abort_status, Register rtm_counters);
-  void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
-  void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
-                                   RTMLockingCounters* rtm_counters,
-                                   Metadata* method_data);
-  void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
-                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
-  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
-  void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
-  void rtm_stack_locking(Register obj, Register tmp, Register scr,
-                         Register retry_on_abort_count,
-                         RTMLockingCounters* stack_rtm_counters,
-                         Metadata* method_data, bool profile_rtm,
-                         Label& DONE_LABEL, Label& IsInflated);
-  void rtm_inflated_locking(Register obj, Register box, Register tmp,
-                            Register scr, Register retry_on_busy_count,
-                            Register retry_on_abort_count,
-                            RTMLockingCounters* rtm_counters,
-                            Metadata* method_data, bool profile_rtm,
-                            Label& DONE_LABEL);
-#endif
-#endif

  Condition negate_condition(Condition cond);

@ -1635,60 +1595,6 @@ public:
  void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
  void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }

-#ifdef COMPILER2
-  // Generic instructions support for use in .ad files C2 code generation
-  void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
-  void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
-  void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
-  void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
-  void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
-  void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
-  void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
-  void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
-  void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
-  void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
-  void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
-  void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
-
-  // Reductions for vectors of ints, longs, floats, and doubles.
-
-  // dst = src1 + reduce(op, src2) using vtmp as temps
-  void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-#ifdef _LP64
-  void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-#endif // _LP64
-
-  // dst = reduce(op, src2) using vtmp as temps
-  void reduce_fp(int opcode, int vlen,
-                 XMMRegister dst, XMMRegister src,
-                 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
- private:
-  void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
-
-  void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-
-#ifdef _LP64
-  void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
-#endif // _LP64
-
-  void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
-  void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
-  void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
-
-  void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
-  void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
-  void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
-
-  void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
-  void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
-#endif

 public:
  // C2 compiled method's prolog code.
@ -1701,51 +1607,6 @@ public:
  // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
  void xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp);

-#ifdef COMPILER2
-  void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-                           XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
-
-  // IndexOf strings.
-  // Small strings are loaded through stack if they cross page boundary.
-  void string_indexof(Register str1, Register str2,
-                      Register cnt1, Register cnt2,
-                      int int_cnt2,  Register result,
-                      XMMRegister vec, Register tmp,
-                      int ae);
-
-  // IndexOf for constant substrings with size >= 8 elements
-  // which don't need to be loaded through stack.
-  void string_indexofC8(Register str1, Register str2,
-                      Register cnt1, Register cnt2,
-                      int int_cnt2,  Register result,
-                      XMMRegister vec, Register tmp,
-                      int ae);
-
-    // Smallest code: we don't need to load through stack,
-    // check string tail.
-
-  // helper function for string_compare
-  void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
-                          Address::ScaleFactor scale, Address::ScaleFactor scale1,
-                          Address::ScaleFactor scale2, Register index, int ae);
-  // Compare strings.
-  void string_compare(Register str1, Register str2,
-                      Register cnt1, Register cnt2, Register result,
-                      XMMRegister vec1, int ae);
-
-  // Search for Non-ASCII character (Negative byte value) in a byte array,
-  // return true if it has any and false otherwise.
-  void has_negatives(Register ary1, Register len,
-                     Register result, Register tmp1,
-                     XMMRegister vec1, XMMRegister vec2);
-
-  // Compare char[] or byte[] arrays.
-  void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
-                     Register limit, Register result, Register chr,
-                     XMMRegister vec1, XMMRegister vec2, bool is_char);
-
-#endif
-
  // Fill primitive arrays
  void generate_fill(BasicType t, bool aligned,
                     Register to, Register value, Register count,
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@ -1177,7 +1177,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {

  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
@ -1195,7 +1195,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {

  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
+  C2_MacroAssembler _masm(&cbuf);
  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
@ -1716,7 +1716,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
         "no non-adjacent vector moves" );
  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+    C2_MacroAssembler _masm(cbuf);
    int offset = __ offset();
    switch (ireg) {
    case Op_VecS: // copy whole register
@ -1782,7 +1782,7 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
  // into scratch buffer is used to get size in 64-bit VM.
  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+    C2_MacroAssembler _masm(cbuf);
    int offset = __ offset();
    if (is_load) {
      switch (ireg) {
@ -1985,7 +1985,7 @@ static inline jlong replicate8_imm(int con, int width) {
 #endif

  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ nop(_count);
  }

@ -2000,7 +2000,7 @@ static inline jlong replicate8_imm(int con, int width) {
 #endif

  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
-    MacroAssembler _masm(&cbuf);
+    C2_MacroAssembler _masm(&cbuf);
    __ int3();
  }

@ -2016,7 +2016,7 @@ encode %{
    if (VerifyStackAtCalls) {
      // Check that stack depth is unchanged: find majik cookie on stack
      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
-      MacroAssembler _masm(&cbuf);
+      C2_MacroAssembler _masm(&cbuf);
      Label L;
      __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
      __ jccb(Assembler::equal, L);
--- a/src/hotspot/share/adlc/adlparse.cpp
+++ b/src/hotspot/share/adlc/adlparse.cpp
@ -2881,7 +2881,7 @@ void ADLParser::ins_encode_parse_block(InstructForm& inst) {
    // name is chosen to match the __ idiom used for assembly in other
    // parts of hotspot and assumes the existence of the standard
    // #define __ _masm.
-    encoding->add_code("    MacroAssembler _masm(&cbuf);\n");
+    encoding->add_code("    C2_MacroAssembler _masm(&cbuf);\n");
  }

  // Parse the following %{ }% block
@ -3004,9 +3004,9 @@ void ADLParser::ins_encode_parse_block_impl(InstructForm& inst, EncClass* encodi
 // which synthesizes a new encoding class taking the same arguments as
 // the InstructForm, and automatically prefixes the definition with:
 //
-//    MacroAssembler masm(&cbuf);\n");
+//    C2_MacroAssembler masm(&cbuf);\n");
 //
-//  making it more compact to take advantage of the MacroAssembler and
+//  making it more compact to take advantage of the C2_MacroAssembler and
 //  placing the assembly closer to it's use by instructions.
 void ADLParser::ins_encode_parse(InstructForm& inst) {

--- a/src/hotspot/share/adlc/main.cpp
+++ b/src/hotspot/share/adlc/main.cpp
@ -211,7 +211,6 @@ int main(int argc, char *argv[])
  AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._VM_file._name));
  AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._HPP_file._name));
  AD.addInclude(AD._CPP_file, "memory/allocation.inline.hpp");
-  AD.addInclude(AD._CPP_file, "asm/macroAssembler.inline.hpp");
  AD.addInclude(AD._CPP_file, "code/compiledIC.hpp");
  AD.addInclude(AD._CPP_file, "code/nativeInst.hpp");
  AD.addInclude(AD._CPP_file, "code/vmreg.inline.hpp");
@ -221,6 +220,7 @@ int main(int argc, char *argv[])
  AD.addInclude(AD._CPP_file, "oops/markWord.hpp");
  AD.addInclude(AD._CPP_file, "oops/method.hpp");
  AD.addInclude(AD._CPP_file, "oops/oop.inline.hpp");
+  AD.addInclude(AD._CPP_file, "opto/c2_MacroAssembler.hpp");
  AD.addInclude(AD._CPP_file, "opto/cfgnode.hpp");
  AD.addInclude(AD._CPP_file, "opto/intrinsicnode.hpp");
  AD.addInclude(AD._CPP_file, "opto/locknode.hpp");
--- a/src/hotspot/share/opto/c2_MacroAssembler.hpp
+++ b/src/hotspot/share/opto/c2_MacroAssembler.hpp
@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_OPTO_C2_MACROASSEMBLER_HPP
+#define SHARE_OPTO_C2_MACROASSEMBLER_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "utilities/macros.hpp"
+
+class C2_MacroAssembler: public MacroAssembler {
+ public:
+  // creation
+  C2_MacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
+
+#include CPU_HEADER(c2_MacroAssembler)
+
+};
+
+#endif // SHARE_OPTO_C2_MACROASSEMBLER_HPP