8241436: C2: Factor out C2-specific code from MacroAssembler

Reviewed-by: mdoerr, kvn, adinn
This commit is contained in:
Vladimir Ivanov 2020-03-27 13:42:57 +03:00
parent 1dd60a35d1
commit 536e062a56
33 changed files with 6696 additions and 6305 deletions

View File

@ -1545,7 +1545,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
#endif
void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ brk(0);
}
@ -1562,7 +1562,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
#endif
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
for (int i = 0; i < _count; i++) {
__ nop();
}
@ -1622,7 +1622,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// n.b. frame size includes space for return pc and rfp
const long framesize = C->output()->frame_size_in_bytes();
@ -1704,7 +1704,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int framesize = C->output()->frame_slots() << LogBytesPerInt;
__ remove_frame(framesize);
@ -1806,7 +1806,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
uint ireg = ideal_reg();
assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
// stack->stack
@ -1834,7 +1834,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
}
}
} else if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
switch (src_lo_rc) {
case rc_int:
if (dst_lo_rc == rc_int) { // gpr --> gpr copy
@ -1842,7 +1842,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
__ mov(as_Register(Matcher::_regEncode[dst_lo]),
as_Register(Matcher::_regEncode[src_lo]));
} else {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
__ movw(as_Register(Matcher::_regEncode[dst_lo]),
as_Register(Matcher::_regEncode[src_lo]));
}
@ -1952,7 +1952,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
#endif
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
int reg = ra_->get_encode(this);
@ -1991,7 +1991,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
// This is the unverified entry point.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ cmp_klass(j_rarg0, rscratch2, rscratch1);
Label skip;
@ -2018,7 +2018,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
// br rscratch1
// Note that the code buffer's insts_mark is always relative to insts.
// That's why we must use the macroassembler to generate a handler.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_exception_handler());
if (base == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
@ -2036,7 +2036,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
{
// Note that the code buffer's insts_mark is always relative to insts.
// That's why we must use the macroassembler to generate a handler.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_deopt_handler());
if (base == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
@ -2403,7 +2403,7 @@ void Compile::reshape_address(AddPNode* addp) {
#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \
MacroAssembler _masm(&cbuf); \
C2_MacroAssembler _masm(&cbuf); \
{ \
guarantee(INDEX == -1, "mode not permitted for volatile"); \
guarantee(DISP == 0, "mode not permitted for volatile"); \
@ -2448,7 +2448,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
// Used for all non-volatile memory accesses. The use of
// $mem->opcode() to discover whether this pattern uses sign-extended
// offsets is something of a kludge.
static void loadStore(MacroAssembler masm, mem_insn insn,
static void loadStore(C2_MacroAssembler masm, mem_insn insn,
Register reg, int opcode,
Register base, int index, int scale, int disp,
int size_in_memory)
@ -2467,7 +2467,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
(masm.*insn)(reg, addr);
}
static void loadStore(MacroAssembler masm, mem_float_insn insn,
static void loadStore(C2_MacroAssembler masm, mem_float_insn insn,
FloatRegister reg, int opcode,
Register base, int index, int size, int disp,
int size_in_memory)
@ -2498,7 +2498,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
}
}
static void loadStore(MacroAssembler masm, mem_vector_insn insn,
static void loadStore(C2_MacroAssembler masm, mem_vector_insn insn,
FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
int opcode, Register base, int index, int size, int disp)
{
@ -2551,7 +2551,7 @@ encode %{
// catch all for unimplemented encodings
enc_class enc_unimplemented %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ unimplemented("C2 catch all");
%}
@ -2561,7 +2561,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
@ -2569,7 +2569,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
@ -2577,7 +2577,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
@ -2585,7 +2585,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
@ -2593,7 +2593,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
@ -2601,7 +2601,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
@ -2609,7 +2609,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
@ -2617,7 +2617,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
@ -2625,7 +2625,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
@ -2633,7 +2633,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
@ -2641,7 +2641,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
@ -2649,7 +2649,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
%}
@ -2657,7 +2657,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
@ -2665,7 +2665,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
%}
@ -2673,14 +2673,14 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
Register src_reg = as_Register($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strb0(memory1 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
@ -2689,14 +2689,14 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
Register src_reg = as_Register($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strh0(memory2 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
@ -2705,14 +2705,14 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
Register src_reg = as_Register($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strw0(memory4 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
@ -2724,19 +2724,19 @@ encode %{
// we sometimes get asked to store the stack pointer into the
// current thread -- we cannot do that directly on AArch64
if (src_reg == r31_sp) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
__ mov(rscratch2, sp);
src_reg = rscratch2;
}
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_str0(memory8 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
%}
@ -2745,7 +2745,7 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
@ -2753,14 +2753,14 @@ encode %{
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strw_immn(immN src, memory1 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address con = (address)$src$$constant;
// need to do this the hard way until we can manage relocs
// for 32 bit constants
@ -2773,7 +2773,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strw_immnk(immN src, memory4 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address con = (address)$src$$constant;
// need to do this the hard way until we can manage relocs
// for 32 bit constants
@ -2786,7 +2786,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ membar(Assembler::StoreStore);
loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@ -2797,37 +2797,37 @@ encode %{
// Vector loads and stores
enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_strvS(vecD src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_strvD(vecD src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
@ -2929,7 +2929,7 @@ encode %{
// we sometimes get asked to store the stack pointer into the
// current thread -- we cannot do that directly on AArch64
if (src_reg == r31_sp) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
__ mov(rscratch2, sp);
src_reg = rscratch2;
@ -2940,7 +2940,7 @@ encode %{
enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister src_reg = as_FloatRegister($src$$reg);
__ fmovs(rscratch2, src_reg);
}
@ -2950,7 +2950,7 @@ encode %{
enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister src_reg = as_FloatRegister($src$$reg);
__ fmovd(rscratch2, src_reg);
}
@ -2961,7 +2961,7 @@ encode %{
// synchronized read/update encodings
enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register base = as_Register($mem$$base);
int index = $mem$$index;
@ -2990,7 +2990,7 @@ encode %{
%}
enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register src_reg = as_Register($src$$reg);
Register base = as_Register($mem$$base);
int index = $mem$$index;
@ -3020,7 +3020,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
@ -3028,7 +3028,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
@ -3036,7 +3036,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ false, /*release*/ true,
@ -3044,7 +3044,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ false, /*release*/ true,
@ -3057,7 +3057,7 @@ encode %{
// CompareAndSwap sequence to serve as a barrier on acquiring a
// lock.
enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
@ -3065,7 +3065,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
@ -3073,7 +3073,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ true, /*release*/ true,
@ -3081,7 +3081,7 @@ encode %{
%}
enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ true, /*release*/ true,
@ -3090,7 +3090,7 @@ encode %{
// auxiliary used for CompareAndSwapX to set result register
enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register res_reg = as_Register($res$$reg);
__ cset(res_reg, Assembler::EQ);
%}
@ -3098,7 +3098,7 @@ encode %{
// prefetch encodings
enc_class aarch64_enc_prefetchw(memory mem) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register base = as_Register($mem$$base);
int index = $mem$$index;
int scale = $mem$$scale;
@ -3119,7 +3119,7 @@ encode %{
/// mov envcodings
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
u_int32_t con = (u_int32_t)$src$$constant;
Register dst_reg = as_Register($dst$$reg);
if (con == 0) {
@ -3130,7 +3130,7 @@ encode %{
%}
enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
u_int64_t con = (u_int64_t)$src$$constant;
if (con == 0) {
@ -3141,7 +3141,7 @@ encode %{
%}
enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
address con = (address)$src$$constant;
if (con == NULL || con == (address)1) {
@ -3166,19 +3166,19 @@ encode %{
%}
enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
__ mov(dst_reg, zr);
%}
enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
__ mov(dst_reg, (u_int64_t)1);
%}
enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address page = (address)$src$$constant;
Register dst_reg = as_Register($dst$$reg);
unsigned long off;
@ -3187,12 +3187,12 @@ encode %{
%}
enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ load_byte_map_base($dst$$Register);
%}
enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
address con = (address)$src$$constant;
if (con == NULL) {
@ -3205,13 +3205,13 @@ encode %{
%}
enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
__ mov(dst_reg, zr);
%}
enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
address con = (address)$src$$constant;
if (con == NULL) {
@ -3226,7 +3226,7 @@ encode %{
// arithmetic encodings
enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register src_reg = as_Register($src1$$reg);
int32_t con = (int32_t)$src2$$constant;
@ -3240,7 +3240,7 @@ encode %{
%}
enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register src_reg = as_Register($src1$$reg);
int32_t con = (int32_t)$src2$$constant;
@ -3254,7 +3254,7 @@ encode %{
%}
enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register src1_reg = as_Register($src1$$reg);
Register src2_reg = as_Register($src2$$reg);
@ -3262,7 +3262,7 @@ encode %{
%}
enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register src1_reg = as_Register($src1$$reg);
Register src2_reg = as_Register($src2$$reg);
@ -3270,7 +3270,7 @@ encode %{
%}
enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register src1_reg = as_Register($src1$$reg);
Register src2_reg = as_Register($src2$$reg);
@ -3278,7 +3278,7 @@ encode %{
%}
enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register dst_reg = as_Register($dst$$reg);
Register src1_reg = as_Register($src1$$reg);
Register src2_reg = as_Register($src2$$reg);
@ -3288,14 +3288,14 @@ encode %{
// compare instruction encodings
enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg1 = as_Register($src1$$reg);
Register reg2 = as_Register($src2$$reg);
__ cmpw(reg1, reg2);
%}
enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg = as_Register($src1$$reg);
int32_t val = $src2$$constant;
if (val >= 0) {
@ -3306,7 +3306,7 @@ encode %{
%}
enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg1 = as_Register($src1$$reg);
u_int32_t val = (u_int32_t)$src2$$constant;
__ movw(rscratch1, val);
@ -3314,14 +3314,14 @@ encode %{
%}
enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg1 = as_Register($src1$$reg);
Register reg2 = as_Register($src2$$reg);
__ cmp(reg1, reg2);
%}
enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg = as_Register($src1$$reg);
int64_t val = $src2$$constant;
if (val >= 0) {
@ -3336,7 +3336,7 @@ encode %{
%}
enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg1 = as_Register($src1$$reg);
u_int64_t val = (u_int64_t)$src2$$constant;
__ mov(rscratch1, val);
@ -3344,45 +3344,45 @@ encode %{
%}
enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg1 = as_Register($src1$$reg);
Register reg2 = as_Register($src2$$reg);
__ cmp(reg1, reg2);
%}
enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg1 = as_Register($src1$$reg);
Register reg2 = as_Register($src2$$reg);
__ cmpw(reg1, reg2);
%}
enc_class aarch64_enc_testp(iRegP src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg = as_Register($src$$reg);
__ cmp(reg, zr);
%}
enc_class aarch64_enc_testn(iRegN src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register reg = as_Register($src$$reg);
__ cmpw(reg, zr);
%}
enc_class aarch64_enc_b(label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label *L = $lbl$$label;
__ b(*L);
%}
enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label *L = $lbl$$label;
__ br ((Assembler::Condition)$cmp$$cmpcode, *L);
%}
enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label *L = $lbl$$label;
__ br ((Assembler::Condition)$cmp$$cmpcode, *L);
%}
@ -3395,7 +3395,7 @@ encode %{
Register result_reg = as_Register($result$$reg);
Label miss;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
NULL, &miss,
/*set_cond_codes:*/ true);
@ -3406,7 +3406,7 @@ encode %{
%}
enc_class aarch64_enc_java_static_call(method meth) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address addr = (address)$meth$$method;
address call;
@ -3433,7 +3433,7 @@ encode %{
%}
enc_class aarch64_enc_java_dynamic_call(method meth) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int method_index = resolved_method_index(cbuf);
address call = __ ic_call((address)$meth$$method, method_index);
if (call == NULL) {
@ -3443,7 +3443,7 @@ encode %{
%}
enc_class aarch64_enc_call_epilog() %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (VerifyStackAtCalls) {
// Check that stack depth is unchanged: find majik cookie on stack
__ call_Unimplemented();
@ -3451,7 +3451,7 @@ encode %{
%}
enc_class aarch64_enc_java_to_runtime(method meth) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// some calls to generated routines (arraycopy code) are scheduled
// by C2 as runtime calls. if so we can call them using a br (they
@ -3478,23 +3478,23 @@ encode %{
%}
enc_class aarch64_enc_rethrow() %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
%}
enc_class aarch64_enc_ret() %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ ret(lr);
%}
enc_class aarch64_enc_tail_call(iRegP jump_target) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register target_reg = as_Register($jump_target$$reg);
__ br(target_reg);
%}
enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register target_reg = as_Register($jump_target$$reg);
// exception oop should be in r0
// ret addr has been popped into lr
@ -3504,7 +3504,7 @@ encode %{
%}
enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register oop = as_Register($object$$reg);
Register box = as_Register($box$$reg);
Register disp_hdr = as_Register($tmp$$reg);
@ -3582,7 +3582,7 @@ encode %{
%}
enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register oop = as_Register($object$$reg);
Register box = as_Register($box$$reg);
Register disp_hdr = as_Register($tmp$$reg);
@ -8075,7 +8075,7 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
"mov $dst, $tmp\t# vector (1D)" %}
ins_encode %{
FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
__ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
__ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
@ -8118,7 +8118,7 @@ instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
"mov $dst, $tmp\t# vector (1D)" %}
ins_encode %{
FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
__ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
__ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);

View File

@ -0,0 +1,769 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/intrinsicnode.hpp"
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#define STOP(error) stop(error)
#else
#define BLOCK_COMMENT(str) block_comment(str)
#define STOP(error) block_comment(error); stop(error)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
// Search for str1 in str2 and return index or -1
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
Register tmp1, Register tmp2,
Register tmp3, Register tmp4,
Register tmp5, Register tmp6,
int icnt1, Register result, int ae) {
// NOTE: tmp5, tmp6 can be zr depending on specific method version
Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
Register ch1 = rscratch1;
Register ch2 = rscratch2;
Register cnt1tmp = tmp1;
Register cnt2tmp = tmp2;
Register cnt1_neg = cnt1;
Register cnt2_neg = cnt2;
Register result_tmp = tmp4;
bool isL = ae == StrIntrinsicNode::LL;
bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
int str1_chr_shift = str1_isL ? 0:1;
int str2_chr_shift = str2_isL ? 0:1;
int str1_chr_size = str1_isL ? 1:2;
int str2_chr_size = str2_isL ? 1:2;
chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
// Note, inline_string_indexOf() generates checks:
// if (substr.count > string.count) return -1;
// if (substr.count == 0) return 0;
// We have two strings, a source string in str2, cnt2 and a pattern string
// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
// For larger pattern and source we use a simplified Boyer Moore algorithm.
// With a small pattern and source we use linear scan.
if (icnt1 == -1) {
sub(result_tmp, cnt2, cnt1);
cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
br(LT, LINEARSEARCH);
dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
subs(zr, cnt1, 256);
lsr(tmp1, cnt2, 2);
ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
br(GE, LINEARSTUB);
}
// The Boyer Moore alogorithm is based on the description here:-
//
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
//
// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
// and the 'Good Suffix' rule.
//
// These rules are essentially heuristics for how far we can shift the
// pattern along the search string.
//
// The implementation here uses the 'Bad Character' rule only because of the
// complexity of initialisation for the 'Good Suffix' rule.
//
// This is also known as the Boyer-Moore-Horspool algorithm:-
//
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
//
// This particular implementation has few java-specific optimizations.
//
// #define ASIZE 256
//
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
// int i, j;
// unsigned c;
// unsigned char bc[ASIZE];
//
// /* Preprocessing */
// for (i = 0; i < ASIZE; ++i)
// bc[i] = m;
// for (i = 0; i < m - 1; ) {
// c = x[i];
// ++i;
// // c < 256 for Latin1 string, so, no need for branch
// #ifdef PATTERN_STRING_IS_LATIN1
// bc[c] = m - i;
// #else
// if (c < ASIZE) bc[c] = m - i;
// #endif
// }
//
// /* Searching */
// j = 0;
// while (j <= n - m) {
// c = y[i+j];
// if (x[m-1] == c)
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
// if (i < 0) return j;
// // c < 256 for Latin1 string, so, no need for branch
// #ifdef SOURCE_STRING_IS_LATIN1
// // LL case: (c< 256) always true. Remove branch
// j += bc[y[j+m-1]];
// #endif
// #ifndef PATTERN_STRING_IS_UTF
// // UU case: need if (c<ASIZE) check. Skip 1 character if not.
// if (c < ASIZE)
// j += bc[y[j+m-1]];
// else
// j += 1
// #endif
// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
// if (c < ASIZE)
// j += bc[y[j+m-1]];
// else
// j += m
// #endif
// }
// }
if (icnt1 == -1) {
Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
Register cnt1end = tmp2;
Register str2end = cnt2;
Register skipch = tmp2;
// str1 length is >=8, so, we can read at least 1 register for cases when
// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
// UL case. We'll re-read last character in inner pre-loop code to have
// single outer pre-loop load
const int firstStep = isL ? 7 : 3;
const int ASIZE = 256;
const int STORED_BYTES = 32; // amount of bytes stored per instruction
sub(sp, sp, ASIZE);
mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
mov(ch1, sp);
BIND(BM_INIT_LOOP);
stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
subs(tmp5, tmp5, 1);
br(GT, BM_INIT_LOOP);
sub(cnt1tmp, cnt1, 1);
mov(tmp5, str2);
add(str2end, str2, result_tmp, LSL, str2_chr_shift);
sub(ch2, cnt1, 1);
mov(tmp3, str1);
BIND(BCLOOP);
(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
if (!str1_isL) {
subs(zr, ch1, ASIZE);
br(HS, BCSKIP);
}
strb(ch2, Address(sp, ch1));
BIND(BCSKIP);
subs(ch2, ch2, 1);
br(GT, BCLOOP);
add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
if (str1_isL == str2_isL) {
// load last 8 bytes (8LL/4UU symbols)
ldr(tmp6, Address(tmp6, -wordSize));
} else {
ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
// convert Latin1 to UTF. We'll have to wait until load completed, but
// it's still faster than per-character loads+checks
lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
ubfx(ch1, tmp6, 8, 8); // str1[N-2]
ubfx(ch2, tmp6, 16, 8); // str1[N-3]
andr(tmp6, tmp6, 0xFF); // str1[N-4]
orr(ch2, ch1, ch2, LSL, 16);
orr(tmp6, tmp6, tmp3, LSL, 48);
orr(tmp6, tmp6, ch2, LSL, 16);
}
BIND(BMLOOPSTR2);
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
if (str1_isL == str2_isL) {
// re-init tmp3. It's for free because it's executed in parallel with
// load above. Alternative is to initialize it before loop, but it'll
// affect performance on in-order systems with 2 or more ld/st pipelines
lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
}
if (!isL) { // UU/UL case
lsl(ch2, cnt1tmp, 1); // offset in bytes
}
cmp(tmp3, skipch);
br(NE, BMSKIP);
ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
mov(ch1, tmp6);
if (isL) {
b(BMLOOPSTR1_AFTER_LOAD);
} else {
sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
b(BMLOOPSTR1_CMP);
}
BIND(BMLOOPSTR1);
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
BIND(BMLOOPSTR1_AFTER_LOAD);
subs(cnt1tmp, cnt1tmp, 1);
br(LT, BMLOOPSTR1_LASTCMP);
BIND(BMLOOPSTR1_CMP);
cmp(ch1, ch2);
br(EQ, BMLOOPSTR1);
BIND(BMSKIP);
if (!isL) {
// if we've met UTF symbol while searching Latin1 pattern, then we can
// skip cnt1 symbols
if (str1_isL != str2_isL) {
mov(result_tmp, cnt1);
} else {
mov(result_tmp, 1);
}
subs(zr, skipch, ASIZE);
br(HS, BMADV);
}
ldrb(result_tmp, Address(sp, skipch)); // load skip distance
BIND(BMADV);
sub(cnt1tmp, cnt1, 1);
add(str2, str2, result_tmp, LSL, str2_chr_shift);
cmp(str2, str2end);
br(LE, BMLOOPSTR2);
add(sp, sp, ASIZE);
b(NOMATCH);
BIND(BMLOOPSTR1_LASTCMP);
cmp(ch1, ch2);
br(NE, BMSKIP);
BIND(BMMATCH);
sub(result, str2, tmp5);
if (!str2_isL) lsr(result, result, 1);
add(sp, sp, ASIZE);
b(DONE);
BIND(LINEARSTUB);
cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
br(LT, LINEAR_MEDIUM);
mov(result, zr);
RuntimeAddress stub = NULL;
if (isL) {
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
} else if (str1_isL) {
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
} else {
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
}
trampoline_call(stub);
b(DONE);
}
BIND(LINEARSEARCH);
{
Label DO1, DO2, DO3;
Register str2tmp = tmp2;
Register first = tmp3;
if (icnt1 == -1)
{
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
br(LT, DOSHORT);
BIND(LINEAR_MEDIUM);
(this->*str1_load_1chr)(first, Address(str1));
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(FIRST_LOOP);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
cmp(first, ch2);
br(EQ, STR1_LOOP);
BIND(STR2_NEXT);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, FIRST_LOOP);
b(NOMATCH);
BIND(STR1_LOOP);
adds(cnt1tmp, cnt1_neg, str1_chr_size);
add(cnt2tmp, cnt2_neg, str2_chr_size);
br(GE, MATCH);
BIND(STR1_NEXT);
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
cmp(ch1, ch2);
br(NE, STR2_NEXT);
adds(cnt1tmp, cnt1tmp, str1_chr_size);
add(cnt2tmp, cnt2tmp, str2_chr_size);
br(LT, STR1_NEXT);
b(MATCH);
BIND(DOSHORT);
if (str1_isL == str2_isL) {
cmp(cnt1, (u1)2);
br(LT, DO1);
br(GT, DO3);
}
}
if (icnt1 == 4) {
Label CH1_LOOP;
(this->*load_4chr)(ch1, str1);
sub(result_tmp, cnt2, 4);
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, CH1_LOOP);
b(NOMATCH);
}
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
Label CH1_LOOP;
BIND(DO2);
(this->*load_2chr)(ch1, str1);
if (icnt1 == 2) {
sub(result_tmp, cnt2, 2);
}
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, CH1_LOOP);
b(NOMATCH);
}
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
BIND(DO3);
(this->*load_2chr)(first, str1);
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
if (icnt1 == 3) {
sub(result_tmp, cnt2, 3);
}
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(FIRST_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmpw(first, ch2);
br(EQ, STR1_LOOP);
BIND(STR2_NEXT);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, FIRST_LOOP);
b(NOMATCH);
BIND(STR1_LOOP);
add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
cmp(ch1, ch2);
br(NE, STR2_NEXT);
b(MATCH);
}
if (icnt1 == -1 || icnt1 == 1) {
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
BIND(DO1);
(this->*str1_load_1chr)(ch1, str1);
cmp(cnt2, (u1)8);
br(LT, DO1_SHORT);
sub(result_tmp, cnt2, 8/str2_chr_size);
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
if (str2_isL) {
orr(ch1, ch1, ch1, LSL, 8);
}
orr(ch1, ch1, ch1, LSL, 16);
orr(ch1, ch1, ch1, LSL, 32);
BIND(CH1_LOOP);
ldr(ch2, Address(str2, cnt2_neg));
eor(ch2, ch1, ch2);
sub(tmp1, ch2, tmp3);
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
bics(tmp1, tmp1, tmp2);
br(NE, HAS_ZERO);
adds(cnt2_neg, cnt2_neg, 8);
br(LT, CH1_LOOP);
cmp(cnt2_neg, (u1)8);
mov(cnt2_neg, 0);
br(LT, CH1_LOOP);
b(NOMATCH);
BIND(HAS_ZERO);
rev(tmp1, tmp1);
clz(tmp1, tmp1);
add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
b(MATCH);
BIND(DO1_SHORT);
mov(result_tmp, cnt2);
lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
BIND(DO1_LOOP);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
cmpw(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LT, DO1_LOOP);
}
}
BIND(NOMATCH);
mov(result, -1);
b(DONE);
BIND(MATCH);
add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
BIND(DONE);
}
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3)
{
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
Register cnt1_neg = cnt1;
Register ch1 = rscratch1;
Register result_tmp = rscratch2;
cbz(cnt1, NOMATCH);
cmp(cnt1, (u1)4);
br(LT, DO1_SHORT);
orr(ch, ch, ch, LSL, 16);
orr(ch, ch, ch, LSL, 32);
sub(cnt1, cnt1, 4);
mov(result_tmp, cnt1);
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
sub(cnt1_neg, zr, cnt1, LSL, 1);
mov(tmp3, 0x0001000100010001);
BIND(CH1_LOOP);
ldr(ch1, Address(str1, cnt1_neg));
eor(ch1, ch, ch1);
sub(tmp1, ch1, tmp3);
orr(tmp2, ch1, 0x7fff7fff7fff7fff);
bics(tmp1, tmp1, tmp2);
br(NE, HAS_ZERO);
adds(cnt1_neg, cnt1_neg, 8);
br(LT, CH1_LOOP);
cmp(cnt1_neg, (u1)8);
mov(cnt1_neg, 0);
br(LT, CH1_LOOP);
b(NOMATCH);
BIND(HAS_ZERO);
rev(tmp1, tmp1);
clz(tmp1, tmp1);
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
b(MATCH);
BIND(DO1_SHORT);
mov(result_tmp, cnt1);
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
sub(cnt1_neg, zr, cnt1, LSL, 1);
BIND(DO1_LOOP);
ldrh(ch1, Address(str1, cnt1_neg));
cmpw(ch, ch1);
br(EQ, MATCH);
adds(cnt1_neg, cnt1_neg, 2);
br(LT, DO1_LOOP);
BIND(NOMATCH);
mov(result, -1);
b(DONE);
BIND(MATCH);
add(result, result_tmp, cnt1_neg, ASR, 1);
BIND(DONE);
}
// Compare strings.
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, TAIL_CHECK;
bool isLL = ae == StrIntrinsicNode::LL;
bool isLU = ae == StrIntrinsicNode::LU;
bool isUL = ae == StrIntrinsicNode::UL;
// The stub threshold for LL strings is: 72 (64 + 8) chars
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
bool str1_isL = isLL || isLU;
bool str2_isL = isLL || isUL;
int str1_chr_shift = str1_isL ? 0 : 1;
int str2_chr_shift = str2_isL ? 0 : 1;
int str1_chr_size = str1_isL ? 1 : 2;
int str2_chr_size = str2_isL ? 1 : 2;
int minCharsInWord = isLL ? wordSize : wordSize/2;
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
(uxt_insn)&MacroAssembler::uxthw;
BLOCK_COMMENT("string_compare {");
// Bizzarely, the counts are passed in bytes, regardless of whether they
// are L or U strings, however the result is always in characters.
if (!str1_isL) asrw(cnt1, cnt1, 1);
if (!str2_isL) asrw(cnt2, cnt2, 1);
// Compute the minimum of the string lengths and save the difference.
subsw(result, cnt1, cnt2);
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
// A very short string
cmpw(cnt2, minCharsInWord);
br(Assembler::LE, SHORT_STRING);
// Compare longwords
// load first parts of strings and finish initialization while loading
{
if (str1_isL == str2_isL) { // LL or UU
ldr(tmp1, Address(str1));
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldr(tmp2, Address(str2));
cmp(cnt2, stub_threshold);
br(GE, STUB);
subsw(cnt2, cnt2, minCharsInWord);
br(EQ, TAIL_CHECK);
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
} else if (isLU) {
ldrs(vtmp, Address(str1));
ldr(tmp2, Address(str2));
cmp(cnt2, stub_threshold);
br(GE, STUB);
subw(cnt2, cnt2, 4);
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
zip1(vtmp, T8B, vtmp, vtmpZ);
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
add(cnt1, cnt1, 4);
fmovd(tmp1, vtmp);
} else { // UL case
ldr(tmp1, Address(str1));
ldrs(vtmp, Address(str2));
cmp(cnt2, stub_threshold);
br(GE, STUB);
subw(cnt2, cnt2, 4);
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
zip1(vtmp, T8B, vtmp, vtmpZ);
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
add(cnt1, cnt1, 8);
fmovd(tmp2, vtmp);
}
adds(cnt2, cnt2, isUL ? 4 : 8);
br(GE, TAIL);
eor(rscratch2, tmp1, tmp2);
cbnz(rscratch2, DIFFERENCE);
// main loop
bind(NEXT_WORD);
if (str1_isL == str2_isL) {
ldr(tmp1, Address(str1, cnt2));
ldr(tmp2, Address(str2, cnt2));
adds(cnt2, cnt2, 8);
} else if (isLU) {
ldrs(vtmp, Address(str1, cnt1));
ldr(tmp2, Address(str2, cnt2));
add(cnt1, cnt1, 4);
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp1, vtmp);
adds(cnt2, cnt2, 8);
} else { // UL
ldrs(vtmp, Address(str2, cnt2));
ldr(tmp1, Address(str1, cnt1));
zip1(vtmp, T8B, vtmp, vtmpZ);
add(cnt1, cnt1, 8);
fmovd(tmp2, vtmp);
adds(cnt2, cnt2, 4);
}
br(GE, TAIL);
eor(rscratch2, tmp1, tmp2);
cbz(rscratch2, NEXT_WORD);
b(DIFFERENCE);
bind(TAIL);
eor(rscratch2, tmp1, tmp2);
cbnz(rscratch2, DIFFERENCE);
// Last longword. In the case where length == 4 we compare the
// same longword twice, but that's still faster than another
// conditional branch.
if (str1_isL == str2_isL) {
ldr(tmp1, Address(str1));
ldr(tmp2, Address(str2));
} else if (isLU) {
ldrs(vtmp, Address(str1));
ldr(tmp2, Address(str2));
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp1, vtmp);
} else { // UL
ldrs(vtmp, Address(str2));
ldr(tmp1, Address(str1));
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp2, vtmp);
}
bind(TAIL_CHECK);
eor(rscratch2, tmp1, tmp2);
cbz(rscratch2, DONE);
// Find the first different characters in the longwords and
// compute their difference.
bind(DIFFERENCE);
rev(rscratch2, rscratch2);
clz(rscratch2, rscratch2);
andr(rscratch2, rscratch2, isLL ? -8 : -16);
lsrv(tmp1, tmp1, rscratch2);
(this->*ext_chr)(tmp1, tmp1);
lsrv(tmp2, tmp2, rscratch2);
(this->*ext_chr)(tmp2, tmp2);
subw(result, tmp1, tmp2);
b(DONE);
}
bind(STUB);
RuntimeAddress stub = NULL;
switch(ae) {
case StrIntrinsicNode::LL:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
break;
case StrIntrinsicNode::UU:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
break;
case StrIntrinsicNode::LU:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
break;
case StrIntrinsicNode::UL:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
break;
default:
ShouldNotReachHere();
}
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
trampoline_call(stub);
b(DONE);
bind(SHORT_STRING);
// Is the minimum length zero?
cbz(cnt2, DONE);
// arrange code to do most branches while loading and loading next characters
// while comparing previous
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST_INIT);
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
b(SHORT_LOOP_START);
bind(SHORT_LOOP);
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST);
bind(SHORT_LOOP_START);
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
cmp(tmp1, cnt1);
br(NE, SHORT_LOOP_TAIL);
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST2);
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
cmp(tmp2, rscratch1);
br(EQ, SHORT_LOOP);
sub(result, tmp2, rscratch1);
b(DONE);
bind(SHORT_LOOP_TAIL);
sub(result, tmp1, cnt1);
b(DONE);
bind(SHORT_LAST2);
cmp(tmp2, rscratch1);
br(EQ, DONE);
sub(result, tmp2, rscratch1);
b(DONE);
bind(SHORT_LAST_INIT);
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
bind(SHORT_LAST);
cmp(tmp1, cnt1);
br(EQ, DONE);
sub(result, tmp1, cnt1);
bind(DONE);
BLOCK_COMMENT("} string_compare");
}

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
#define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
// C2_MacroAssembler contains high-level macros for C2
public:
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1, Register tmp2, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
void string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register tmp2,
Register tmp3, Register tmp4,
Register tmp5, Register tmp6,
int int_cnt1, Register result, int ae);
void string_indexof_char(Register str1, Register cnt1,
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3);
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP

View File

@ -54,7 +54,6 @@
#ifdef COMPILER2
#include "oops/oop.hpp"
#include "opto/compile.hpp"
#include "opto/intrinsicnode.hpp"
#include "opto/node.hpp"
#include "opto/output.hpp"
#endif
@ -4414,737 +4413,6 @@ void MacroAssembler::remove_frame(int framesize) {
}
}
#ifdef COMPILER2
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
// Search for str1 in str2 and return index or -1
void MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
Register tmp1, Register tmp2,
Register tmp3, Register tmp4,
Register tmp5, Register tmp6,
int icnt1, Register result, int ae) {
// NOTE: tmp5, tmp6 can be zr depending on specific method version
Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
Register ch1 = rscratch1;
Register ch2 = rscratch2;
Register cnt1tmp = tmp1;
Register cnt2tmp = tmp2;
Register cnt1_neg = cnt1;
Register cnt2_neg = cnt2;
Register result_tmp = tmp4;
bool isL = ae == StrIntrinsicNode::LL;
bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
int str1_chr_shift = str1_isL ? 0:1;
int str2_chr_shift = str2_isL ? 0:1;
int str1_chr_size = str1_isL ? 1:2;
int str2_chr_size = str2_isL ? 1:2;
chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
// Note, inline_string_indexOf() generates checks:
// if (substr.count > string.count) return -1;
// if (substr.count == 0) return 0;
// We have two strings, a source string in str2, cnt2 and a pattern string
// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
// For larger pattern and source we use a simplified Boyer Moore algorithm.
// With a small pattern and source we use linear scan.
if (icnt1 == -1) {
sub(result_tmp, cnt2, cnt1);
cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
br(LT, LINEARSEARCH);
dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
subs(zr, cnt1, 256);
lsr(tmp1, cnt2, 2);
ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
br(GE, LINEARSTUB);
}
// The Boyer Moore alogorithm is based on the description here:-
//
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
//
// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
// and the 'Good Suffix' rule.
//
// These rules are essentially heuristics for how far we can shift the
// pattern along the search string.
//
// The implementation here uses the 'Bad Character' rule only because of the
// complexity of initialisation for the 'Good Suffix' rule.
//
// This is also known as the Boyer-Moore-Horspool algorithm:-
//
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
//
// This particular implementation has few java-specific optimizations.
//
// #define ASIZE 256
//
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
// int i, j;
// unsigned c;
// unsigned char bc[ASIZE];
//
// /* Preprocessing */
// for (i = 0; i < ASIZE; ++i)
// bc[i] = m;
// for (i = 0; i < m - 1; ) {
// c = x[i];
// ++i;
// // c < 256 for Latin1 string, so, no need for branch
// #ifdef PATTERN_STRING_IS_LATIN1
// bc[c] = m - i;
// #else
// if (c < ASIZE) bc[c] = m - i;
// #endif
// }
//
// /* Searching */
// j = 0;
// while (j <= n - m) {
// c = y[i+j];
// if (x[m-1] == c)
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
// if (i < 0) return j;
// // c < 256 for Latin1 string, so, no need for branch
// #ifdef SOURCE_STRING_IS_LATIN1
// // LL case: (c< 256) always true. Remove branch
// j += bc[y[j+m-1]];
// #endif
// #ifndef PATTERN_STRING_IS_UTF
// // UU case: need if (c<ASIZE) check. Skip 1 character if not.
// if (c < ASIZE)
// j += bc[y[j+m-1]];
// else
// j += 1
// #endif
// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
// if (c < ASIZE)
// j += bc[y[j+m-1]];
// else
// j += m
// #endif
// }
// }
if (icnt1 == -1) {
Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
Register cnt1end = tmp2;
Register str2end = cnt2;
Register skipch = tmp2;
// str1 length is >=8, so, we can read at least 1 register for cases when
// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
// UL case. We'll re-read last character in inner pre-loop code to have
// single outer pre-loop load
const int firstStep = isL ? 7 : 3;
const int ASIZE = 256;
const int STORED_BYTES = 32; // amount of bytes stored per instruction
sub(sp, sp, ASIZE);
mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
mov(ch1, sp);
BIND(BM_INIT_LOOP);
stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
subs(tmp5, tmp5, 1);
br(GT, BM_INIT_LOOP);
sub(cnt1tmp, cnt1, 1);
mov(tmp5, str2);
add(str2end, str2, result_tmp, LSL, str2_chr_shift);
sub(ch2, cnt1, 1);
mov(tmp3, str1);
BIND(BCLOOP);
(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
if (!str1_isL) {
subs(zr, ch1, ASIZE);
br(HS, BCSKIP);
}
strb(ch2, Address(sp, ch1));
BIND(BCSKIP);
subs(ch2, ch2, 1);
br(GT, BCLOOP);
add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
if (str1_isL == str2_isL) {
// load last 8 bytes (8LL/4UU symbols)
ldr(tmp6, Address(tmp6, -wordSize));
} else {
ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
// convert Latin1 to UTF. We'll have to wait until load completed, but
// it's still faster than per-character loads+checks
lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
ubfx(ch1, tmp6, 8, 8); // str1[N-2]
ubfx(ch2, tmp6, 16, 8); // str1[N-3]
andr(tmp6, tmp6, 0xFF); // str1[N-4]
orr(ch2, ch1, ch2, LSL, 16);
orr(tmp6, tmp6, tmp3, LSL, 48);
orr(tmp6, tmp6, ch2, LSL, 16);
}
BIND(BMLOOPSTR2);
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
if (str1_isL == str2_isL) {
// re-init tmp3. It's for free because it's executed in parallel with
// load above. Alternative is to initialize it before loop, but it'll
// affect performance on in-order systems with 2 or more ld/st pipelines
lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
}
if (!isL) { // UU/UL case
lsl(ch2, cnt1tmp, 1); // offset in bytes
}
cmp(tmp3, skipch);
br(NE, BMSKIP);
ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
mov(ch1, tmp6);
if (isL) {
b(BMLOOPSTR1_AFTER_LOAD);
} else {
sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
b(BMLOOPSTR1_CMP);
}
BIND(BMLOOPSTR1);
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
BIND(BMLOOPSTR1_AFTER_LOAD);
subs(cnt1tmp, cnt1tmp, 1);
br(LT, BMLOOPSTR1_LASTCMP);
BIND(BMLOOPSTR1_CMP);
cmp(ch1, ch2);
br(EQ, BMLOOPSTR1);
BIND(BMSKIP);
if (!isL) {
// if we've met UTF symbol while searching Latin1 pattern, then we can
// skip cnt1 symbols
if (str1_isL != str2_isL) {
mov(result_tmp, cnt1);
} else {
mov(result_tmp, 1);
}
subs(zr, skipch, ASIZE);
br(HS, BMADV);
}
ldrb(result_tmp, Address(sp, skipch)); // load skip distance
BIND(BMADV);
sub(cnt1tmp, cnt1, 1);
add(str2, str2, result_tmp, LSL, str2_chr_shift);
cmp(str2, str2end);
br(LE, BMLOOPSTR2);
add(sp, sp, ASIZE);
b(NOMATCH);
BIND(BMLOOPSTR1_LASTCMP);
cmp(ch1, ch2);
br(NE, BMSKIP);
BIND(BMMATCH);
sub(result, str2, tmp5);
if (!str2_isL) lsr(result, result, 1);
add(sp, sp, ASIZE);
b(DONE);
BIND(LINEARSTUB);
cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
br(LT, LINEAR_MEDIUM);
mov(result, zr);
RuntimeAddress stub = NULL;
if (isL) {
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
} else if (str1_isL) {
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
} else {
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
}
trampoline_call(stub);
b(DONE);
}
BIND(LINEARSEARCH);
{
Label DO1, DO2, DO3;
Register str2tmp = tmp2;
Register first = tmp3;
if (icnt1 == -1)
{
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
br(LT, DOSHORT);
BIND(LINEAR_MEDIUM);
(this->*str1_load_1chr)(first, Address(str1));
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(FIRST_LOOP);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
cmp(first, ch2);
br(EQ, STR1_LOOP);
BIND(STR2_NEXT);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, FIRST_LOOP);
b(NOMATCH);
BIND(STR1_LOOP);
adds(cnt1tmp, cnt1_neg, str1_chr_size);
add(cnt2tmp, cnt2_neg, str2_chr_size);
br(GE, MATCH);
BIND(STR1_NEXT);
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
cmp(ch1, ch2);
br(NE, STR2_NEXT);
adds(cnt1tmp, cnt1tmp, str1_chr_size);
add(cnt2tmp, cnt2tmp, str2_chr_size);
br(LT, STR1_NEXT);
b(MATCH);
BIND(DOSHORT);
if (str1_isL == str2_isL) {
cmp(cnt1, (u1)2);
br(LT, DO1);
br(GT, DO3);
}
}
if (icnt1 == 4) {
Label CH1_LOOP;
(this->*load_4chr)(ch1, str1);
sub(result_tmp, cnt2, 4);
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, CH1_LOOP);
b(NOMATCH);
}
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
Label CH1_LOOP;
BIND(DO2);
(this->*load_2chr)(ch1, str1);
if (icnt1 == 2) {
sub(result_tmp, cnt2, 2);
}
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(CH1_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmp(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, CH1_LOOP);
b(NOMATCH);
}
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
BIND(DO3);
(this->*load_2chr)(first, str1);
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
if (icnt1 == 3) {
sub(result_tmp, cnt2, 3);
}
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
BIND(FIRST_LOOP);
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
cmpw(first, ch2);
br(EQ, STR1_LOOP);
BIND(STR2_NEXT);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LE, FIRST_LOOP);
b(NOMATCH);
BIND(STR1_LOOP);
add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
cmp(ch1, ch2);
br(NE, STR2_NEXT);
b(MATCH);
}
if (icnt1 == -1 || icnt1 == 1) {
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
BIND(DO1);
(this->*str1_load_1chr)(ch1, str1);
cmp(cnt2, (u1)8);
br(LT, DO1_SHORT);
sub(result_tmp, cnt2, 8/str2_chr_size);
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
if (str2_isL) {
orr(ch1, ch1, ch1, LSL, 8);
}
orr(ch1, ch1, ch1, LSL, 16);
orr(ch1, ch1, ch1, LSL, 32);
BIND(CH1_LOOP);
ldr(ch2, Address(str2, cnt2_neg));
eor(ch2, ch1, ch2);
sub(tmp1, ch2, tmp3);
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
bics(tmp1, tmp1, tmp2);
br(NE, HAS_ZERO);
adds(cnt2_neg, cnt2_neg, 8);
br(LT, CH1_LOOP);
cmp(cnt2_neg, (u1)8);
mov(cnt2_neg, 0);
br(LT, CH1_LOOP);
b(NOMATCH);
BIND(HAS_ZERO);
rev(tmp1, tmp1);
clz(tmp1, tmp1);
add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
b(MATCH);
BIND(DO1_SHORT);
mov(result_tmp, cnt2);
lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
BIND(DO1_LOOP);
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
cmpw(ch1, ch2);
br(EQ, MATCH);
adds(cnt2_neg, cnt2_neg, str2_chr_size);
br(LT, DO1_LOOP);
}
}
BIND(NOMATCH);
mov(result, -1);
b(DONE);
BIND(MATCH);
add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
BIND(DONE);
}
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3)
{
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
Register cnt1_neg = cnt1;
Register ch1 = rscratch1;
Register result_tmp = rscratch2;
cbz(cnt1, NOMATCH);
cmp(cnt1, (u1)4);
br(LT, DO1_SHORT);
orr(ch, ch, ch, LSL, 16);
orr(ch, ch, ch, LSL, 32);
sub(cnt1, cnt1, 4);
mov(result_tmp, cnt1);
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
sub(cnt1_neg, zr, cnt1, LSL, 1);
mov(tmp3, 0x0001000100010001);
BIND(CH1_LOOP);
ldr(ch1, Address(str1, cnt1_neg));
eor(ch1, ch, ch1);
sub(tmp1, ch1, tmp3);
orr(tmp2, ch1, 0x7fff7fff7fff7fff);
bics(tmp1, tmp1, tmp2);
br(NE, HAS_ZERO);
adds(cnt1_neg, cnt1_neg, 8);
br(LT, CH1_LOOP);
cmp(cnt1_neg, (u1)8);
mov(cnt1_neg, 0);
br(LT, CH1_LOOP);
b(NOMATCH);
BIND(HAS_ZERO);
rev(tmp1, tmp1);
clz(tmp1, tmp1);
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
b(MATCH);
BIND(DO1_SHORT);
mov(result_tmp, cnt1);
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
sub(cnt1_neg, zr, cnt1, LSL, 1);
BIND(DO1_LOOP);
ldrh(ch1, Address(str1, cnt1_neg));
cmpw(ch, ch1);
br(EQ, MATCH);
adds(cnt1_neg, cnt1_neg, 2);
br(LT, DO1_LOOP);
BIND(NOMATCH);
mov(result, -1);
b(DONE);
BIND(MATCH);
add(result, result_tmp, cnt1_neg, ASR, 1);
BIND(DONE);
}
// Compare strings.
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, TAIL_CHECK;
bool isLL = ae == StrIntrinsicNode::LL;
bool isLU = ae == StrIntrinsicNode::LU;
bool isUL = ae == StrIntrinsicNode::UL;
// The stub threshold for LL strings is: 72 (64 + 8) chars
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
bool str1_isL = isLL || isLU;
bool str2_isL = isLL || isUL;
int str1_chr_shift = str1_isL ? 0 : 1;
int str2_chr_shift = str2_isL ? 0 : 1;
int str1_chr_size = str1_isL ? 1 : 2;
int str2_chr_size = str2_isL ? 1 : 2;
int minCharsInWord = isLL ? wordSize : wordSize/2;
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
(uxt_insn)&MacroAssembler::uxthw;
BLOCK_COMMENT("string_compare {");
// Bizzarely, the counts are passed in bytes, regardless of whether they
// are L or U strings, however the result is always in characters.
if (!str1_isL) asrw(cnt1, cnt1, 1);
if (!str2_isL) asrw(cnt2, cnt2, 1);
// Compute the minimum of the string lengths and save the difference.
subsw(result, cnt1, cnt2);
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
// A very short string
cmpw(cnt2, minCharsInWord);
br(Assembler::LE, SHORT_STRING);
// Compare longwords
// load first parts of strings and finish initialization while loading
{
if (str1_isL == str2_isL) { // LL or UU
ldr(tmp1, Address(str1));
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldr(tmp2, Address(str2));
cmp(cnt2, stub_threshold);
br(GE, STUB);
subsw(cnt2, cnt2, minCharsInWord);
br(EQ, TAIL_CHECK);
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
} else if (isLU) {
ldrs(vtmp, Address(str1));
ldr(tmp2, Address(str2));
cmp(cnt2, stub_threshold);
br(GE, STUB);
subw(cnt2, cnt2, 4);
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
zip1(vtmp, T8B, vtmp, vtmpZ);
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
add(cnt1, cnt1, 4);
fmovd(tmp1, vtmp);
} else { // UL case
ldr(tmp1, Address(str1));
ldrs(vtmp, Address(str2));
cmp(cnt2, stub_threshold);
br(GE, STUB);
subw(cnt2, cnt2, 4);
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
zip1(vtmp, T8B, vtmp, vtmpZ);
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
add(cnt1, cnt1, 8);
fmovd(tmp2, vtmp);
}
adds(cnt2, cnt2, isUL ? 4 : 8);
br(GE, TAIL);
eor(rscratch2, tmp1, tmp2);
cbnz(rscratch2, DIFFERENCE);
// main loop
bind(NEXT_WORD);
if (str1_isL == str2_isL) {
ldr(tmp1, Address(str1, cnt2));
ldr(tmp2, Address(str2, cnt2));
adds(cnt2, cnt2, 8);
} else if (isLU) {
ldrs(vtmp, Address(str1, cnt1));
ldr(tmp2, Address(str2, cnt2));
add(cnt1, cnt1, 4);
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp1, vtmp);
adds(cnt2, cnt2, 8);
} else { // UL
ldrs(vtmp, Address(str2, cnt2));
ldr(tmp1, Address(str1, cnt1));
zip1(vtmp, T8B, vtmp, vtmpZ);
add(cnt1, cnt1, 8);
fmovd(tmp2, vtmp);
adds(cnt2, cnt2, 4);
}
br(GE, TAIL);
eor(rscratch2, tmp1, tmp2);
cbz(rscratch2, NEXT_WORD);
b(DIFFERENCE);
bind(TAIL);
eor(rscratch2, tmp1, tmp2);
cbnz(rscratch2, DIFFERENCE);
// Last longword. In the case where length == 4 we compare the
// same longword twice, but that's still faster than another
// conditional branch.
if (str1_isL == str2_isL) {
ldr(tmp1, Address(str1));
ldr(tmp2, Address(str2));
} else if (isLU) {
ldrs(vtmp, Address(str1));
ldr(tmp2, Address(str2));
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp1, vtmp);
} else { // UL
ldrs(vtmp, Address(str2));
ldr(tmp1, Address(str1));
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp2, vtmp);
}
bind(TAIL_CHECK);
eor(rscratch2, tmp1, tmp2);
cbz(rscratch2, DONE);
// Find the first different characters in the longwords and
// compute their difference.
bind(DIFFERENCE);
rev(rscratch2, rscratch2);
clz(rscratch2, rscratch2);
andr(rscratch2, rscratch2, isLL ? -8 : -16);
lsrv(tmp1, tmp1, rscratch2);
(this->*ext_chr)(tmp1, tmp1);
lsrv(tmp2, tmp2, rscratch2);
(this->*ext_chr)(tmp2, tmp2);
subw(result, tmp1, tmp2);
b(DONE);
}
bind(STUB);
RuntimeAddress stub = NULL;
switch(ae) {
case StrIntrinsicNode::LL:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
break;
case StrIntrinsicNode::UU:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
break;
case StrIntrinsicNode::LU:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
break;
case StrIntrinsicNode::UL:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
break;
default:
ShouldNotReachHere();
}
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
trampoline_call(stub);
b(DONE);
bind(SHORT_STRING);
// Is the minimum length zero?
cbz(cnt2, DONE);
// arrange code to do most branches while loading and loading next characters
// while comparing previous
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST_INIT);
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
b(SHORT_LOOP_START);
bind(SHORT_LOOP);
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST);
bind(SHORT_LOOP_START);
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
cmp(tmp1, cnt1);
br(NE, SHORT_LOOP_TAIL);
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST2);
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
cmp(tmp2, rscratch1);
br(EQ, SHORT_LOOP);
sub(result, tmp2, rscratch1);
b(DONE);
bind(SHORT_LOOP_TAIL);
sub(result, tmp1, cnt1);
b(DONE);
bind(SHORT_LAST2);
cmp(tmp2, rscratch1);
br(EQ, DONE);
sub(result, tmp2, rscratch1);
b(DONE);
bind(SHORT_LAST_INIT);
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
bind(SHORT_LAST);
cmp(tmp1, cnt1);
br(EQ, DONE);
sub(result, tmp1, cnt1);
bind(DONE);
BLOCK_COMMENT("} string_compare");
}
#endif // COMPILER2
// This method checks if provided byte array contains byte with highest bit set.
void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {

View File

@ -1227,11 +1227,6 @@ public:
Register table0, Register table1, Register table2, Register table3,
bool upper = false);
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1, Register tmp2, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
void has_negatives(Register ary1, Register len, Register result);
void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
@ -1260,15 +1255,6 @@ public:
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,
FloatRegister Vtmp3, FloatRegister Vtmp4);
void string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register tmp2,
Register tmp3, Register tmp4,
Register tmp5, Register tmp6,
int int_cnt1, Register result, int ae);
void string_indexof_char(Register str1, Register cnt1,
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3);
void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,

View File

@ -137,7 +137,7 @@ bool SafePointNode::needs_polling_address_input() {
// emit an interrupt that is caught by the debugger (for debugging compiler)
void emit_break(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ breakpoint();
}
@ -157,7 +157,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
void emit_nop(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ nop();
}
@ -165,7 +165,7 @@ void emit_nop(CodeBuffer &cbuf) {
void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
int call_site_offset = cbuf.insts()->mark_off();
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
address target = (address)m->method();
assert(n->as_MachCall()->entry_point() == target, "sanity");
@ -212,7 +212,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
Compile* C = ra_->C;
ConstantTable& constant_table = C->output()->constant_table();
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register r = as_Register(ra_->get_encode(this));
CodeSection* consts_section = __ code()->consts();
@ -269,7 +269,7 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
for (int i = 0; i < OptoPrologueNops; i++) {
__ nop();
@ -339,7 +339,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
#endif
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Compile* C = ra_->C;
size_t framesize = C->output()->frame_size_in_bytes();
@ -429,7 +429,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
// Bailout only for real instruction emit.
// This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
// --------------------------------------
// Check for mem-mem move. Load into unused float registers and fall into
@ -790,7 +790,7 @@ void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
#endif
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
for(int i = 0; i < _count; i += 1) {
__ nop();
}
@ -811,7 +811,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
#endif
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
int reg = ra_->get_encode(this);
Register dst = reg_to_register_object(reg);
@ -847,7 +847,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
#endif
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register iCache = reg_to_register_object(Matcher::inline_cache_reg_encode());
assert(iCache == Ricklass, "should be");
Register receiver = R0;
@ -866,7 +866,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
// Emit exception handler code.
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_exception_handler());
if (base == NULL) {
@ -889,7 +889,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
// Can't use any of the current frame's registers as we may have deopted
// at a poll and everything can be live.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_deopt_handler());
if (base == NULL) {
@ -1280,7 +1280,7 @@ encode %{
// preserve mark
address mark = cbuf.insts()->mark();
debug_only(int off0 = cbuf.insts_size());
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int ret_addr_offset = as_MachCall()->ret_addr_offset();
__ adr(LR, mark + ret_addr_offset);
__ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset()));
@ -1294,7 +1294,7 @@ encode %{
// preserve mark
address mark = cbuf.insts()->mark();
debug_only(int off0 = cbuf.insts_size());
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// FP is preserved across all calls, even compiled calls.
// Use it to preserve SP in places where the callee might change the SP.
__ mov(Rmh_SP_save, SP);
@ -1305,12 +1305,12 @@ encode %{
%}
enc_class restore_SP %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(SP, Rmh_SP_save);
%}
enc_class Java_Dynamic_Call (method meth) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
assert(R8_ic_reg == Ricklass, "should be");
__ set_inst_mark();
@ -1338,7 +1338,7 @@ encode %{
val |= (val << bit_width);
}
}
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (val == -1) {
__ mvn($tmp$$Register, 0);
@ -1355,7 +1355,7 @@ encode %{
// Replicate float con 2 times and pack into vector (8 bytes) in regD.
float fval = $src$$constant;
int val = *((int*)&fval);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (val == -1) {
__ mvn($tmp$$Register, 0);
@ -1370,7 +1370,7 @@ encode %{
enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{
Label Ldone, Lloop;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register str1_reg = $str1$$Register;
Register str2_reg = $str2$$Register;
@ -1462,7 +1462,7 @@ encode %{
enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{
Label Lchar, Lchar_loop, Ldone, Lequal;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register str1_reg = $str1$$Register;
Register str2_reg = $str2$$Register;
@ -1524,7 +1524,7 @@ encode %{
enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{
Label Ldone, Lloop, Lequal;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register ary1_reg = $ary1$$Register;
Register ary2_reg = $ary2$$Register;

View File

@ -0,0 +1,160 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "runtime/basicLock.hpp"
// TODO: 8 bytes at a time? pre-fetch?
// Compare char[] arrays aligned to 4 bytes.
void C2_MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
Register limit, Register result,
Register chr1, Register chr2, Label& Ldone) {
Label Lvector, Lloop;
// if (ary1 == ary2)
// return true;
cmpoop(ary1, ary2);
b(Ldone, eq);
// Note: limit contains number of bytes (2*char_elements) != 0.
tst(limit, 0x2); // trailing character ?
b(Lvector, eq);
// compare the trailing char
sub(limit, limit, sizeof(jchar));
ldrh(chr1, Address(ary1, limit));
ldrh(chr2, Address(ary2, limit));
cmp(chr1, chr2);
mov(result, 0, ne); // not equal
b(Ldone, ne);
// only one char ?
tst(limit, limit);
mov(result, 1, eq);
b(Ldone, eq);
// word by word compare, dont't need alignment check
bind(Lvector);
// Shift ary1 and ary2 to the end of the arrays, negate limit
add(ary1, limit, ary1);
add(ary2, limit, ary2);
neg(limit, limit);
bind(Lloop);
ldr_u32(chr1, Address(ary1, limit));
ldr_u32(chr2, Address(ary2, limit));
cmp_32(chr1, chr2);
mov(result, 0, ne); // not equal
b(Ldone, ne);
adds(limit, limit, 2*sizeof(jchar));
b(Lloop, ne);
// Caller should set it:
// mov(result_reg, 1); //equal
}
void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3) {
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
Register Rmark = Rscratch2;
assert(Roop != Rscratch, "");
assert(Roop != Rmark, "");
assert(Rbox != Rscratch, "");
assert(Rbox != Rmark, "");
Label fast_lock, done;
if (UseBiasedLocking && !UseOptoBiasInlining) {
assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
// Fall through if lock not biased otherwise branch to done
}
// Invariant: Rmark loaded below does not contain biased lock pattern
ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
tst(Rmark, markWord::unlocked_value);
b(fast_lock, ne);
// Check for recursive lock
// See comments in InterpreterMacroAssembler::lock_object for
// explanations on the fast recursive locking check.
// -1- test low 2 bits
movs(Rscratch, AsmOperand(Rmark, lsl, 30));
// -2- test (hdr - SP) if the low two bits are 0
sub(Rscratch, Rmark, SP, eq);
movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
// If still 'eq' then recursive locking OK
// set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
b(done);
bind(fast_lock);
str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
bool allow_fallthrough_on_failure = true;
bool one_shot = true;
cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
bind(done);
// At this point flags are set as follows:
// EQ -> Success
// NE -> Failure, branch to slow path
}
void C2_MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) {
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
Register Rmark = Rscratch2;
assert(Roop != Rscratch, "");
assert(Roop != Rmark, "");
assert(Rbox != Rscratch, "");
assert(Rbox != Rmark, "");
Label done;
if (UseBiasedLocking && !UseOptoBiasInlining) {
biased_locking_exit(Roop, Rscratch, done);
}
ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
// If hdr is NULL, we've got recursive locking and there's nothing more to do
cmp(Rmark, 0);
b(done, eq);
// Restore the object header
bool allow_fallthrough_on_failure = true;
bool one_shot = true;
cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
bind(done);
}

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
#define CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
// C2_MacroAssembler contains high-level macros for C2
public:
// Compare char[] arrays aligned to 4 bytes.
void char_arrays_equals(Register ary1, Register ary2,
Register limit, Register result,
Register chr1, Register chr2, Label& Ldone);
void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
#endif // CPU_ARM_C2_MACROASSEMBLER_ARM_HPP

View File

@ -1626,57 +1626,6 @@ void MacroAssembler::lookup_interface_method(Register Rklass,
}
}
#ifdef COMPILER2
// TODO: 8 bytes at a time? pre-fetch?
// Compare char[] arrays aligned to 4 bytes.
void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
Register limit, Register result,
Register chr1, Register chr2, Label& Ldone) {
Label Lvector, Lloop;
// if (ary1 == ary2)
// return true;
cmpoop(ary1, ary2);
b(Ldone, eq);
// Note: limit contains number of bytes (2*char_elements) != 0.
tst(limit, 0x2); // trailing character ?
b(Lvector, eq);
// compare the trailing char
sub(limit, limit, sizeof(jchar));
ldrh(chr1, Address(ary1, limit));
ldrh(chr2, Address(ary2, limit));
cmp(chr1, chr2);
mov(result, 0, ne); // not equal
b(Ldone, ne);
// only one char ?
tst(limit, limit);
mov(result, 1, eq);
b(Ldone, eq);
// word by word compare, dont't need alignment check
bind(Lvector);
// Shift ary1 and ary2 to the end of the arrays, negate limit
add(ary1, limit, ary1);
add(ary2, limit, ary2);
neg(limit, limit);
bind(Lloop);
ldr_u32(chr1, Address(ary1, limit));
ldr_u32(chr2, Address(ary2, limit));
cmp_32(chr1, chr2);
mov(result, 0, ne); // not equal
b(Ldone, ne);
adds(limit, limit, 2*sizeof(jchar));
b(Lloop, ne);
// Caller should set it:
// mov(result_reg, 1); //equal
}
#endif
void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
mov_slow(tmpreg1, counter_addr);
@ -1970,92 +1919,6 @@ void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
return bs->resolve(this, decorators, obj);
}
#ifdef COMPILER2
void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3)
{
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
Register Rmark = Rscratch2;
assert(Roop != Rscratch, "");
assert(Roop != Rmark, "");
assert(Rbox != Rscratch, "");
assert(Rbox != Rmark, "");
Label fast_lock, done;
if (UseBiasedLocking && !UseOptoBiasInlining) {
assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
// Fall through if lock not biased otherwise branch to done
}
// Invariant: Rmark loaded below does not contain biased lock pattern
ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
tst(Rmark, markWord::unlocked_value);
b(fast_lock, ne);
// Check for recursive lock
// See comments in InterpreterMacroAssembler::lock_object for
// explanations on the fast recursive locking check.
// -1- test low 2 bits
movs(Rscratch, AsmOperand(Rmark, lsl, 30));
// -2- test (hdr - SP) if the low two bits are 0
sub(Rscratch, Rmark, SP, eq);
movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
// If still 'eq' then recursive locking OK
// set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
b(done);
bind(fast_lock);
str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
bool allow_fallthrough_on_failure = true;
bool one_shot = true;
cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
bind(done);
// At this point flags are set as follows:
// EQ -> Success
// NE -> Failure, branch to slow path
}
void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2)
{
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
Register Rmark = Rscratch2;
assert(Roop != Rscratch, "");
assert(Roop != Rmark, "");
assert(Rbox != Rscratch, "");
assert(Rbox != Rmark, "");
Label done;
if (UseBiasedLocking && !UseOptoBiasInlining) {
biased_locking_exit(Roop, Rscratch, done);
}
ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
// If hdr is NULL, we've got recursive locking and there's nothing more to do
cmp(Rmark, 0);
b(done, eq);
// Restore the object header
bool allow_fallthrough_on_failure = true;
bool one_shot = true;
cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
bind(done);
}
#endif // COMPILER2
void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
if (SafepointMechanism::uses_thread_local_poll()) {
ldr_u32(tmp1, Address(Rthread, Thread::polling_page_offset()));

View File

@ -1068,11 +1068,6 @@ public:
Register temp_reg2,
Label& L_no_such_interface);
// Compare char[] arrays aligned to 4 bytes.
void char_arrays_equals(Register ary1, Register ary2,
Register limit, Register result,
Register chr1, Register chr2, Label& Ldone);
void floating_cmp(Register dst);
@ -1090,11 +1085,6 @@ public:
void restore_default_fp_mode();
#ifdef COMPILER2
void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
#endif
void safepoint_poll(Register tmp1, Label& slow_path);
void get_polling_page(Register dest);
void read_polling_page(Register dest, relocInfo::relocType rtype);

View File

@ -0,0 +1,580 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/intrinsicnode.hpp"
#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
#else
#define BLOCK_COMMENT(str) block_comment(str)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
// Intrinsics for CompactStrings
// Compress char[] to byte[] by compressing 16 bytes at once.
void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
Label& Lfailure) {
const Register tmp0 = R0;
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
Label Lloop, Lslow;
// Check if cnt >= 8 (= 16 bytes)
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
srwi_(tmp2, cnt, 3);
beq(CCR0, Lslow);
ori(tmp1, tmp1, 0xFF);
rldimi(tmp1, tmp1, 32, 0);
mtctr(tmp2);
// 2x unrolled loop
bind(Lloop);
ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)
ld(tmp4, 8, src); // _4_5_6_7
orr(tmp0, tmp2, tmp4);
rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3
rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7
andc_(tmp0, tmp0, tmp1);
bne(CCR0, Lfailure); // Not latin1.
addi(src, src, 16);
rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
srdi(tmp2, tmp2, 3*8); // ____0_2_
rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
srdi(tmp4, tmp4, 3*8); // ____4_6_
orr(tmp2, tmp2, tmp3); // ____0123
orr(tmp4, tmp4, tmp5); // ____4567
stw(tmp2, 0, dst);
stw(tmp4, 4, dst);
addi(dst, dst, 8);
bdnz(Lloop);
bind(Lslow); // Fallback to slow version
}
// Compress char[] to byte[]. cnt must be positive int.
void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
Label Lloop;
mtctr(cnt);
bind(Lloop);
lhz(tmp, 0, src);
cmplwi(CCR0, tmp, 0xff);
bgt(CCR0, Lfailure); // Not latin1.
addi(src, src, 2);
stb(tmp, 0, dst);
addi(dst, dst, 1);
bdnz(Lloop);
}
// Inflate byte[] to char[] by inflating 16 bytes at once.
void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
const Register tmp0 = R0;
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
Label Lloop, Lslow;
// Check if cnt >= 8
srwi_(tmp2, cnt, 3);
beq(CCR0, Lslow);
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF
ori(tmp1, tmp1, 0xFF);
mtctr(tmp2);
// 2x unrolled loop
bind(Lloop);
lwz(tmp2, 0, src); // ____0123 (Big Endian)
lwz(tmp4, 4, src); // ____4567
addi(src, src, 8);
rldicl(tmp3, tmp2, 7*8, 64-8); // _______2
rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
rldicl(tmp5, tmp4, 7*8, 64-8); // _______6
rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
andc(tmp0, tmp2, tmp1); // ____0_1_
rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
andc(tmp3, tmp4, tmp1); // ____4_5_
rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3
rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7
std(tmp2, 0, dst);
std(tmp4, 8, dst);
addi(dst, dst, 16);
bdnz(Lloop);
bind(Lslow); // Fallback to slow version
}
// Inflate byte[] to char[]. cnt must be positive int.
void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
Label Lloop;
mtctr(cnt);
bind(Lloop);
lbz(tmp, 0, src);
addi(src, src, 1);
sth(tmp, 0, dst);
addi(dst, dst, 2);
bdnz(Lloop);
}
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register result, int ae) {
const Register tmp0 = R0,
diff = tmp1;
assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
Label Ldone, Lslow, Lloop, Lreturn_diff;
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
// we interchange str1 and str2 in the UL case and negate the result.
// Like this, str1 is always latin1 encoded, except for the UU case.
// In addition, we need 0 (or sign which is 0) extend.
if (ae == StrIntrinsicNode::UU) {
srwi(cnt1, cnt1, 1);
} else {
clrldi(cnt1, cnt1, 32);
}
if (ae != StrIntrinsicNode::LL) {
srwi(cnt2, cnt2, 1);
} else {
clrldi(cnt2, cnt2, 32);
}
// See if the lengths are different, and calculate min in cnt1.
// Save diff in case we need it for a tie-breaker.
subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
// if (diff > 0) { cnt1 = cnt2; }
if (VM_Version::has_isel()) {
isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
} else {
Label Lskip;
blt(CCR0, Lskip);
mr(cnt1, cnt2);
bind(Lskip);
}
// Rename registers
Register chr1 = result;
Register chr2 = tmp0;
// Compare multiple characters in fast loop (only implemented for same encoding).
int stride1 = 8, stride2 = 8;
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
Label Lfastloop, Lskipfast;
srwi_(tmp0, cnt1, log2_chars_per_iter);
beq(CCR0, Lskipfast);
rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
mtctr(tmp0);
bind(Lfastloop);
ld(chr1, 0, str1);
ld(chr2, 0, str2);
cmpd(CCR0, chr1, chr2);
bne(CCR0, Lslow);
addi(str1, str1, stride1);
addi(str2, str2, stride2);
bdnz(Lfastloop);
mr(cnt1, cnt2); // Remaining characters.
bind(Lskipfast);
}
// Loop which searches the first difference character by character.
cmpwi(CCR0, cnt1, 0);
beq(CCR0, Lreturn_diff);
bind(Lslow);
mtctr(cnt1);
switch (ae) {
case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
case StrIntrinsicNode::UL: // fallthru (see comment above)
case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
default: ShouldNotReachHere(); break;
}
bind(Lloop);
if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
subf_(result, chr2, chr1); // result = chr1 - chr2
bne(CCR0, Ldone);
addi(str1, str1, stride1);
addi(str2, str2, stride2);
bdnz(Lloop);
// If strings are equal up to min length, return the length difference.
bind(Lreturn_diff);
mr(result, diff);
// Otherwise, return the difference between the first mismatched chars.
bind(Ldone);
if (ae == StrIntrinsicNode::UL) {
neg(result, result); // Negate result (see note above).
}
}
void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp1, Register result, bool is_byte) {
const Register tmp0 = R0;
assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
bool limit_needs_shift = false;
if (is_array_equ) {
const int length_offset = arrayOopDesc::length_offset_in_bytes();
const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
// Return true if the same array.
cmpd(CCR0, ary1, ary2);
beq(CCR0, Lskiploop);
// Return false if one of them is NULL.
cmpdi(CCR0, ary1, 0);
cmpdi(CCR1, ary2, 0);
li(result, 0);
cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
beq(CCR0, Ldone);
// Load the lengths of arrays.
lwz(limit, length_offset, ary1);
lwz(tmp0, length_offset, ary2);
// Return false if the two arrays are not equal length.
cmpw(CCR0, limit, tmp0);
bne(CCR0, Ldone);
// Load array addresses.
addi(ary1, ary1, base_offset);
addi(ary2, ary2, base_offset);
} else {
limit_needs_shift = !is_byte;
li(result, 0); // Assume not equal.
}
// Rename registers
Register chr1 = tmp0;
Register chr2 = tmp1;
// Compare 8 bytes per iteration in fast loop.
const int log2_chars_per_iter = is_byte ? 3 : 2;
srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
beq(CCR0, Lskipfast);
mtctr(tmp0);
bind(Lfastloop);
ld(chr1, 0, ary1);
ld(chr2, 0, ary2);
addi(ary1, ary1, 8);
addi(ary2, ary2, 8);
cmpd(CCR0, chr1, chr2);
bne(CCR0, Ldone);
bdnz(Lfastloop);
bind(Lskipfast);
rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
beq(CCR0, Lskiploop);
mtctr(limit);
// Character by character.
bind(Lloop);
if (is_byte) {
lbz(chr1, 0, ary1);
lbz(chr2, 0, ary2);
addi(ary1, ary1, 1);
addi(ary2, ary2, 1);
} else {
lhz(chr1, 0, ary1);
lhz(chr2, 0, ary2);
addi(ary1, ary1, 2);
addi(ary2, ary2, 2);
}
cmpw(CCR0, chr1, chr2);
bne(CCR0, Ldone);
bdnz(Lloop);
bind(Lskiploop);
li(result, 1); // All characters are equal.
bind(Ldone);
}
void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
Label L_TooShort, L_Found, L_NotFound, L_End;
Register last_addr = haycnt, // Kill haycnt at the beginning.
addr = tmp1,
n_start = tmp2,
ch1 = tmp3,
ch2 = R0;
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
// **************************************************************************************************
// Prepare for main loop: optimized for needle count >=2, bail out otherwise.
// **************************************************************************************************
// Compute last haystack addr to use if no match gets found.
clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.
addi(addr, haystack, -h_csize); // Accesses use pre-increment.
if (needlecntval == 0) { // variable needlecnt
cmpwi(CCR6, needlecnt, 2);
clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.
blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
}
if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
if (needlecntval == 0) { // variable needlecnt
subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
addi(needlecnt, needlecnt, -2); // Rest of needle.
} else { // constant needlecnt
guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
}
if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
if (ae ==StrIntrinsicNode::UL) {
srwi(tmp4, n_start, 1*8); // ___0
rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
}
add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
// Main Loop (now we have at least 2 characters).
Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
bind(L_OuterLoop); // Search for 1st 2 characters.
Register addr_diff = tmp4;
subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
addi(addr, addr, h_csize); // This is the new address we want to use for comparing.
srdi_(ch2, addr_diff, h_csize);
beq(CCR0, L_FinalCheck); // 2 characters left?
mtctr(ch2); // num of characters / 2
bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).
lwz(ch1, 0, addr);
lwz(ch2, 2, addr);
} else {
lhz(ch1, 0, addr);
lhz(ch2, 1, addr);
}
cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
cmpw(CCR1, ch2, n_start);
beq(CCR0, L_Comp1); // Did we find the needle start?
beq(CCR1, L_Comp2);
addi(addr, addr, 2 * h_csize);
bdnz(L_InnerLoop);
bind(L_FinalCheck);
andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
beq(CCR0, L_NotFound);
if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
cmpw(CCR1, ch1, n_start);
beq(CCR1, L_Comp1);
bind(L_NotFound);
li(result, -1); // not found
b(L_End);
// **************************************************************************************************
// Special Case: unfortunately, the variable needle case can be called with needlecnt<2
// **************************************************************************************************
if (needlecntval == 0) { // We have to handle these cases separately.
Label L_OneCharLoop;
bind(L_TooShort);
mtctr(haycnt);
if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
bind(L_OneCharLoop);
if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
cmpw(CCR1, ch1, n_start);
beq(CCR1, L_Found); // Did we find the one character needle?
bdnz(L_OneCharLoop);
li(result, -1); // Not found.
b(L_End);
}
// **************************************************************************************************
// Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
// **************************************************************************************************
// Compare the rest
bind(L_Comp2);
addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.
bind(L_Comp1); // Addr points to possible needle start.
if (needlecntval != 2) { // Const needlecnt==2?
if (needlecntval != 3) {
if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
Register n_ind = tmp4,
h_ind = n_ind;
li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.
mtctr(needlecnt); // Decremented by 2, still > 0.
Label L_CompLoop;
bind(L_CompLoop);
if (ae ==StrIntrinsicNode::UL) {
h_ind = ch1;
sldi(h_ind, n_ind, 1);
}
if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
cmpw(CCR1, ch1, ch2);
bne(CCR1, L_OuterLoop);
addi(n_ind, n_ind, n_csize);
bdnz(L_CompLoop);
} else { // No loop required if there's only one needle character left.
if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
cmpw(CCR1, ch1, ch2);
bne(CCR1, L_OuterLoop);
}
}
// Return index ...
bind(L_Found);
subf(result, haystack, addr); // relative to haystack, ...
if (h_csize == 2) { srdi(result, result, 1); } // in characters.
bind(L_End);
} // string_indexof
void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
Register addr = tmp1,
ch1 = tmp2,
ch2 = R0;
const int h_csize = is_byte ? 1 : 2;
//4:
srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
mr(addr, haystack);
beq(CCR0, L_FinalCheck);
mtctr(tmp2); // Move to count register.
//8:
bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
if (!is_byte) {
lhz(ch1, 0, addr);
lhz(ch2, 2, addr);
} else {
lbz(ch1, 0, addr);
lbz(ch2, 1, addr);
}
(needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
(needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
beq(CCR0, L_Found1); // Did we find the needle?
beq(CCR1, L_Found2);
addi(addr, addr, 2 * h_csize);
bdnz(L_InnerLoop);
//16:
bind(L_FinalCheck);
andi_(R0, haycnt, 1);
beq(CCR0, L_NotFound);
if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
(needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
beq(CCR1, L_Found1);
//21:
bind(L_NotFound);
li(result, -1); // Not found.
b(L_End);
bind(L_Found2);
addi(addr, addr, h_csize);
//24:
bind(L_Found1); // Return index ...
subf(result, haystack, addr); // relative to haystack, ...
if (!is_byte) { srdi(result, result, 1); } // in characters.
bind(L_End);
} // string_indexof_char
void C2_MacroAssembler::has_negatives(Register src, Register cnt, Register result,
Register tmp1, Register tmp2) {
const Register tmp0 = R0;
assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
// Check if cnt >= 8 (= 16 bytes)
lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080
srwi_(tmp2, cnt, 4);
li(result, 1); // Assume there's a negative byte.
beq(CCR0, Lslow);
ori(tmp1, tmp1, 0x8080);
rldimi(tmp1, tmp1, 32, 0);
mtctr(tmp2);
// 2x unrolled loop
bind(Lfastloop);
ld(tmp2, 0, src);
ld(tmp0, 8, src);
orr(tmp0, tmp2, tmp0);
and_(tmp0, tmp0, tmp1);
bne(CCR0, Ldone); // Found negative byte.
addi(src, src, 16);
bdnz(Lfastloop);
bind(Lslow); // Fallback to slow version
rldicl_(tmp0, cnt, 0, 64-4);
beq(CCR0, Lnoneg);
mtctr(tmp0);
bind(Lloop);
lbz(tmp0, 0, src);
addi(src, src, 1);
andi_(tmp0, tmp0, 0x80);
bne(CCR0, Ldone); // Found negative byte.
bdnz(Lloop);
bind(Lnoneg);
li(result, 0);
bind(Ldone);
}

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
#define CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
// C2_MacroAssembler contains high-level macros for C2
public:
// Intrinsics for CompactStrings
// Compress char[] to byte[] by compressing 16 bytes at once.
void string_compress_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
Label& Lfailure);
// Compress char[] to byte[]. cnt must be positive int.
void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
// Inflate byte[] to char[] by inflating 16 bytes at once.
void string_inflate_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
// Inflate byte[] to char[]. cnt must be positive int.
void string_inflate(Register src, Register dst, Register cnt, Register tmp);
void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
Register tmp1, Register result, int ae);
void array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp1, Register result, bool is_byte);
void string_indexof(Register result, Register haystack, Register haycnt,
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
void string_indexof_char(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
#endif // CPU_PPC_C2_MACROASSEMBLER_PPC_HPP

View File

@ -33,6 +33,7 @@
#include "memory/resourceArea.hpp"
#include "nativeInst_ppc.hpp"
#include "oops/klass.inline.hpp"
#include "oops/methodData.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/biasedLocking.hpp"
#include "runtime/icache.hpp"
@ -45,9 +46,6 @@
#include "runtime/stubRoutines.hpp"
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
#ifdef COMPILER2
#include "opto/intrinsicnode.hpp"
#endif
#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
@ -3311,552 +3309,6 @@ void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwo
/////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
#ifdef COMPILER2
// Intrinsics for CompactStrings
// Compress char[] to byte[] by compressing 16 bytes at once.
void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
Label& Lfailure) {
const Register tmp0 = R0;
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
Label Lloop, Lslow;
// Check if cnt >= 8 (= 16 bytes)
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
srwi_(tmp2, cnt, 3);
beq(CCR0, Lslow);
ori(tmp1, tmp1, 0xFF);
rldimi(tmp1, tmp1, 32, 0);
mtctr(tmp2);
// 2x unrolled loop
bind(Lloop);
ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)
ld(tmp4, 8, src); // _4_5_6_7
orr(tmp0, tmp2, tmp4);
rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3
rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7
andc_(tmp0, tmp0, tmp1);
bne(CCR0, Lfailure); // Not latin1.
addi(src, src, 16);
rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
srdi(tmp2, tmp2, 3*8); // ____0_2_
rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
srdi(tmp4, tmp4, 3*8); // ____4_6_
orr(tmp2, tmp2, tmp3); // ____0123
orr(tmp4, tmp4, tmp5); // ____4567
stw(tmp2, 0, dst);
stw(tmp4, 4, dst);
addi(dst, dst, 8);
bdnz(Lloop);
bind(Lslow); // Fallback to slow version
}
// Compress char[] to byte[]. cnt must be positive int.
void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
Label Lloop;
mtctr(cnt);
bind(Lloop);
lhz(tmp, 0, src);
cmplwi(CCR0, tmp, 0xff);
bgt(CCR0, Lfailure); // Not latin1.
addi(src, src, 2);
stb(tmp, 0, dst);
addi(dst, dst, 1);
bdnz(Lloop);
}
// Inflate byte[] to char[] by inflating 16 bytes at once.
void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
const Register tmp0 = R0;
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
Label Lloop, Lslow;
// Check if cnt >= 8
srwi_(tmp2, cnt, 3);
beq(CCR0, Lslow);
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF
ori(tmp1, tmp1, 0xFF);
mtctr(tmp2);
// 2x unrolled loop
bind(Lloop);
lwz(tmp2, 0, src); // ____0123 (Big Endian)
lwz(tmp4, 4, src); // ____4567
addi(src, src, 8);
rldicl(tmp3, tmp2, 7*8, 64-8); // _______2
rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
rldicl(tmp5, tmp4, 7*8, 64-8); // _______6
rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
andc(tmp0, tmp2, tmp1); // ____0_1_
rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
andc(tmp3, tmp4, tmp1); // ____4_5_
rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3
rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7
std(tmp2, 0, dst);
std(tmp4, 8, dst);
addi(dst, dst, 16);
bdnz(Lloop);
bind(Lslow); // Fallback to slow version
}
// Inflate byte[] to char[]. cnt must be positive int.
void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
Label Lloop;
mtctr(cnt);
bind(Lloop);
lbz(tmp, 0, src);
addi(src, src, 1);
sth(tmp, 0, dst);
addi(dst, dst, 2);
bdnz(Lloop);
}
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register result, int ae) {
const Register tmp0 = R0,
diff = tmp1;
assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
Label Ldone, Lslow, Lloop, Lreturn_diff;
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
// we interchange str1 and str2 in the UL case and negate the result.
// Like this, str1 is always latin1 encoded, except for the UU case.
// In addition, we need 0 (or sign which is 0) extend.
if (ae == StrIntrinsicNode::UU) {
srwi(cnt1, cnt1, 1);
} else {
clrldi(cnt1, cnt1, 32);
}
if (ae != StrIntrinsicNode::LL) {
srwi(cnt2, cnt2, 1);
} else {
clrldi(cnt2, cnt2, 32);
}
// See if the lengths are different, and calculate min in cnt1.
// Save diff in case we need it for a tie-breaker.
subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
// if (diff > 0) { cnt1 = cnt2; }
if (VM_Version::has_isel()) {
isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
} else {
Label Lskip;
blt(CCR0, Lskip);
mr(cnt1, cnt2);
bind(Lskip);
}
// Rename registers
Register chr1 = result;
Register chr2 = tmp0;
// Compare multiple characters in fast loop (only implemented for same encoding).
int stride1 = 8, stride2 = 8;
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
Label Lfastloop, Lskipfast;
srwi_(tmp0, cnt1, log2_chars_per_iter);
beq(CCR0, Lskipfast);
rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
mtctr(tmp0);
bind(Lfastloop);
ld(chr1, 0, str1);
ld(chr2, 0, str2);
cmpd(CCR0, chr1, chr2);
bne(CCR0, Lslow);
addi(str1, str1, stride1);
addi(str2, str2, stride2);
bdnz(Lfastloop);
mr(cnt1, cnt2); // Remaining characters.
bind(Lskipfast);
}
// Loop which searches the first difference character by character.
cmpwi(CCR0, cnt1, 0);
beq(CCR0, Lreturn_diff);
bind(Lslow);
mtctr(cnt1);
switch (ae) {
case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
case StrIntrinsicNode::UL: // fallthru (see comment above)
case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
default: ShouldNotReachHere(); break;
}
bind(Lloop);
if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
subf_(result, chr2, chr1); // result = chr1 - chr2
bne(CCR0, Ldone);
addi(str1, str1, stride1);
addi(str2, str2, stride2);
bdnz(Lloop);
// If strings are equal up to min length, return the length difference.
bind(Lreturn_diff);
mr(result, diff);
// Otherwise, return the difference between the first mismatched chars.
bind(Ldone);
if (ae == StrIntrinsicNode::UL) {
neg(result, result); // Negate result (see note above).
}
}
void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp1, Register result, bool is_byte) {
const Register tmp0 = R0;
assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
bool limit_needs_shift = false;
if (is_array_equ) {
const int length_offset = arrayOopDesc::length_offset_in_bytes();
const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
// Return true if the same array.
cmpd(CCR0, ary1, ary2);
beq(CCR0, Lskiploop);
// Return false if one of them is NULL.
cmpdi(CCR0, ary1, 0);
cmpdi(CCR1, ary2, 0);
li(result, 0);
cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
beq(CCR0, Ldone);
// Load the lengths of arrays.
lwz(limit, length_offset, ary1);
lwz(tmp0, length_offset, ary2);
// Return false if the two arrays are not equal length.
cmpw(CCR0, limit, tmp0);
bne(CCR0, Ldone);
// Load array addresses.
addi(ary1, ary1, base_offset);
addi(ary2, ary2, base_offset);
} else {
limit_needs_shift = !is_byte;
li(result, 0); // Assume not equal.
}
// Rename registers
Register chr1 = tmp0;
Register chr2 = tmp1;
// Compare 8 bytes per iteration in fast loop.
const int log2_chars_per_iter = is_byte ? 3 : 2;
srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
beq(CCR0, Lskipfast);
mtctr(tmp0);
bind(Lfastloop);
ld(chr1, 0, ary1);
ld(chr2, 0, ary2);
addi(ary1, ary1, 8);
addi(ary2, ary2, 8);
cmpd(CCR0, chr1, chr2);
bne(CCR0, Ldone);
bdnz(Lfastloop);
bind(Lskipfast);
rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
beq(CCR0, Lskiploop);
mtctr(limit);
// Character by character.
bind(Lloop);
if (is_byte) {
lbz(chr1, 0, ary1);
lbz(chr2, 0, ary2);
addi(ary1, ary1, 1);
addi(ary2, ary2, 1);
} else {
lhz(chr1, 0, ary1);
lhz(chr2, 0, ary2);
addi(ary1, ary1, 2);
addi(ary2, ary2, 2);
}
cmpw(CCR0, chr1, chr2);
bne(CCR0, Ldone);
bdnz(Lloop);
bind(Lskiploop);
li(result, 1); // All characters are equal.
bind(Ldone);
}
void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
Label L_TooShort, L_Found, L_NotFound, L_End;
Register last_addr = haycnt, // Kill haycnt at the beginning.
addr = tmp1,
n_start = tmp2,
ch1 = tmp3,
ch2 = R0;
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
// **************************************************************************************************
// Prepare for main loop: optimized for needle count >=2, bail out otherwise.
// **************************************************************************************************
// Compute last haystack addr to use if no match gets found.
clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.
addi(addr, haystack, -h_csize); // Accesses use pre-increment.
if (needlecntval == 0) { // variable needlecnt
cmpwi(CCR6, needlecnt, 2);
clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.
blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
}
if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
if (needlecntval == 0) { // variable needlecnt
subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
addi(needlecnt, needlecnt, -2); // Rest of needle.
} else { // constant needlecnt
guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
}
if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
if (ae ==StrIntrinsicNode::UL) {
srwi(tmp4, n_start, 1*8); // ___0
rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
}
add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
// Main Loop (now we have at least 2 characters).
Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
bind(L_OuterLoop); // Search for 1st 2 characters.
Register addr_diff = tmp4;
subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
addi(addr, addr, h_csize); // This is the new address we want to use for comparing.
srdi_(ch2, addr_diff, h_csize);
beq(CCR0, L_FinalCheck); // 2 characters left?
mtctr(ch2); // num of characters / 2
bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).
lwz(ch1, 0, addr);
lwz(ch2, 2, addr);
} else {
lhz(ch1, 0, addr);
lhz(ch2, 1, addr);
}
cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
cmpw(CCR1, ch2, n_start);
beq(CCR0, L_Comp1); // Did we find the needle start?
beq(CCR1, L_Comp2);
addi(addr, addr, 2 * h_csize);
bdnz(L_InnerLoop);
bind(L_FinalCheck);
andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
beq(CCR0, L_NotFound);
if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
cmpw(CCR1, ch1, n_start);
beq(CCR1, L_Comp1);
bind(L_NotFound);
li(result, -1); // not found
b(L_End);
// **************************************************************************************************
// Special Case: unfortunately, the variable needle case can be called with needlecnt<2
// **************************************************************************************************
if (needlecntval == 0) { // We have to handle these cases separately.
Label L_OneCharLoop;
bind(L_TooShort);
mtctr(haycnt);
if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
bind(L_OneCharLoop);
if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
cmpw(CCR1, ch1, n_start);
beq(CCR1, L_Found); // Did we find the one character needle?
bdnz(L_OneCharLoop);
li(result, -1); // Not found.
b(L_End);
}
// **************************************************************************************************
// Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
// **************************************************************************************************
// Compare the rest
bind(L_Comp2);
addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.
bind(L_Comp1); // Addr points to possible needle start.
if (needlecntval != 2) { // Const needlecnt==2?
if (needlecntval != 3) {
if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
Register n_ind = tmp4,
h_ind = n_ind;
li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.
mtctr(needlecnt); // Decremented by 2, still > 0.
Label L_CompLoop;
bind(L_CompLoop);
if (ae ==StrIntrinsicNode::UL) {
h_ind = ch1;
sldi(h_ind, n_ind, 1);
}
if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
cmpw(CCR1, ch1, ch2);
bne(CCR1, L_OuterLoop);
addi(n_ind, n_ind, n_csize);
bdnz(L_CompLoop);
} else { // No loop required if there's only one needle character left.
if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
cmpw(CCR1, ch1, ch2);
bne(CCR1, L_OuterLoop);
}
}
// Return index ...
bind(L_Found);
subf(result, haystack, addr); // relative to haystack, ...
if (h_csize == 2) { srdi(result, result, 1); } // in characters.
bind(L_End);
} // string_indexof
void MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
Register addr = tmp1,
ch1 = tmp2,
ch2 = R0;
const int h_csize = is_byte ? 1 : 2;
//4:
srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
mr(addr, haystack);
beq(CCR0, L_FinalCheck);
mtctr(tmp2); // Move to count register.
//8:
bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
if (!is_byte) {
lhz(ch1, 0, addr);
lhz(ch2, 2, addr);
} else {
lbz(ch1, 0, addr);
lbz(ch2, 1, addr);
}
(needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
(needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
beq(CCR0, L_Found1); // Did we find the needle?
beq(CCR1, L_Found2);
addi(addr, addr, 2 * h_csize);
bdnz(L_InnerLoop);
//16:
bind(L_FinalCheck);
andi_(R0, haycnt, 1);
beq(CCR0, L_NotFound);
if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
(needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
beq(CCR1, L_Found1);
//21:
bind(L_NotFound);
li(result, -1); // Not found.
b(L_End);
bind(L_Found2);
addi(addr, addr, h_csize);
//24:
bind(L_Found1); // Return index ...
subf(result, haystack, addr); // relative to haystack, ...
if (!is_byte) { srdi(result, result, 1); } // in characters.
bind(L_End);
} // string_indexof_char
void MacroAssembler::has_negatives(Register src, Register cnt, Register result,
Register tmp1, Register tmp2) {
const Register tmp0 = R0;
assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
// Check if cnt >= 8 (= 16 bytes)
lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080
srwi_(tmp2, cnt, 4);
li(result, 1); // Assume there's a negative byte.
beq(CCR0, Lslow);
ori(tmp1, tmp1, 0x8080);
rldimi(tmp1, tmp1, 32, 0);
mtctr(tmp2);
// 2x unrolled loop
bind(Lfastloop);
ld(tmp2, 0, src);
ld(tmp0, 8, src);
orr(tmp0, tmp2, tmp0);
and_(tmp0, tmp0, tmp1);
bne(CCR0, Ldone); // Found negative byte.
addi(src, src, 16);
bdnz(Lfastloop);
bind(Lslow); // Fallback to slow version
rldicl_(tmp0, cnt, 0, 64-4);
beq(CCR0, Lnoneg);
mtctr(tmp0);
bind(Lloop);
lbz(tmp0, 0, src);
addi(src, src, 1);
andi_(tmp0, tmp0, 0x80);
bne(CCR0, Ldone); // Found negative byte.
bdnz(Lloop);
bind(Lnoneg);
li(result, 0);
bind(Ldone);
}
#endif // Compiler2
// Helpers for Intrinsic Emitters
//
// Revert the byte order of a 32bit value in a register

View File

@ -761,39 +761,6 @@ class MacroAssembler: public Assembler {
void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);
#ifdef COMPILER2
// Intrinsics for CompactStrings
// Compress char[] to byte[] by compressing 16 bytes at once.
void string_compress_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
Label& Lfailure);
// Compress char[] to byte[]. cnt must be positive int.
void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
// Inflate byte[] to char[] by inflating 16 bytes at once.
void string_inflate_16(Register src, Register dst, Register cnt,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
// Inflate byte[] to char[]. cnt must be positive int.
void string_inflate(Register src, Register dst, Register cnt, Register tmp);
void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
Register tmp1, Register result, int ae);
void array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp1, Register result, bool is_byte);
void string_indexof(Register result, Register haystack, Register haycnt,
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
void string_indexof_char(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
#endif
// Emitters for BigInteger.multiplyToLen intrinsic.
inline void multiply64(Register dest_hi, Register dest_lo,
Register x, Register y);

View File

@ -1144,7 +1144,7 @@ bool SafePointNode::needs_polling_address_input() {
// Emit an interrupt that is caught by the debugger (for debugging compiler).
void emit_break(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ illtrap();
}
@ -1165,7 +1165,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
//=============================================================================
void emit_nop(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ nop();
}
@ -1184,12 +1184,14 @@ source_hpp %{ // Header information of the source block.
//---< Used for optimization in Compile::Shorten_branches >---
//--------------------------------------------------------------
class C2_MacroAssembler;
class CallStubImpl {
public:
// Emit call stub, compiled java to interpreter.
static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
// Size of call trampoline stub.
// This doesn't need to be accurate to the byte, but it
@ -1220,7 +1222,7 @@ source %{
// load the call target from the constant pool
// branch via CTR (LR/link still points to the call-site above)
void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
if (stub == NULL) {
ciEnv::current()->record_out_of_memory_failure();
@ -1251,7 +1253,7 @@ typedef struct {
// - Add a relocation at the branch-and-link instruction.
// - Emit a branch-and-link.
// - Remember the return pc offset.
EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
EmitCallOffsets offsets = { -1, -1 };
const int start_offset = __ offset();
offsets.insts_call_instruction_offset = __ offset();
@ -1379,7 +1381,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
const long framesize = C->output()->frame_size_in_bytes();
assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
@ -1571,7 +1573,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
assert(framesize >= 0, "negative frame-size?");
@ -1637,7 +1639,7 @@ int MachEpilogNode::safepoint_offset() const {
#if 0 // TODO: PPC port
void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (LoadPollAddressFromThread) {
_masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread);
} else {
@ -1754,7 +1756,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
int src_offset = ra_->reg2offset(src_lo);
int dst_offset = ra_->reg2offset(dst_lo);
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
__ ld(R0, src_offset, R1_SP);
__ std(R0, dst_offset, R1_SP);
__ ld(R0, src_offset+8, R1_SP);
@ -1767,7 +1769,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
int dst_offset = ra_->reg2offset(dst_lo);
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
__ addi(R0, R1_SP, dst_offset);
__ stxvd2x(Rsrc, R0);
}
@ -1778,7 +1780,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
int src_offset = ra_->reg2offset(src_lo);
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
__ addi(R0, R1_SP, src_offset);
__ lxvd2x(Rdst, R0);
}
@ -1789,7 +1791,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
__ xxlor(Rdst, Rsrc, Rsrc);
}
size += 4;
@ -1833,7 +1835,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
size = (Rsrc != Rdst) ? 4 : 0;
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
if (size) {
__ mr(Rdst, Rsrc);
}
@ -1879,7 +1881,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
// Check for float reg-reg copy.
if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
__ fmr(Rdst, Rsrc);
@ -2049,7 +2051,7 @@ void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
#endif
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// _count contains the number of nops needed for padding.
for (int i = 0; i < _count; i++) {
__ nop();
@ -2070,7 +2072,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
#endif
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
int reg = ra_->get_encode(this);
@ -2096,7 +2098,7 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
// This is the unverified entry point.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// Inline_cache contains a klass.
Register ic_klass = as_Register(Matcher::inline_cache_reg_encode());
@ -2179,7 +2181,7 @@ class HandlerImpl {
source %{
int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_exception_handler());
if (base == NULL) return 0; // CodeBuffer::expand failed
@ -2196,7 +2198,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
// The deopt_handler is like the exception handler, but it calls to
// the deoptimization blob instead of jumping to the exception blob.
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_deopt_handler());
if (base == NULL) return 0; // CodeBuffer::expand failed
@ -2660,14 +2662,14 @@ const bool Matcher::convi2l_type_required = true;
encode %{
enc_class enc_unimplemented %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ unimplemented("Unimplemented mach node encoding in AD file.", 13);
%}
enc_class enc_untested %{
#ifdef ASSERT
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ untested("Untested mach node encoding in AD file.");
#else
// TODO: PPC port $archOpcode(ppc64Opcode_none);
@ -2676,7 +2678,7 @@ encode %{
enc_class enc_lbz(iRegIdst dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_lbz);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lbz($dst$$Register, Idisp, $mem$$base$$Register);
%}
@ -2684,7 +2686,7 @@ encode %{
// Load acquire.
enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lbz($dst$$Register, Idisp, $mem$$base$$Register);
__ twi_0($dst$$Register);
@ -2694,7 +2696,7 @@ encode %{
enc_class enc_lhz(iRegIdst dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_lhz);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lhz($dst$$Register, Idisp, $mem$$base$$Register);
%}
@ -2703,7 +2705,7 @@ encode %{
enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lhz($dst$$Register, Idisp, $mem$$base$$Register);
__ twi_0($dst$$Register);
@ -2713,7 +2715,7 @@ encode %{
enc_class enc_lwz(iRegIdst dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_lwz);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lwz($dst$$Register, Idisp, $mem$$base$$Register);
%}
@ -2722,7 +2724,7 @@ encode %{
enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lwz($dst$$Register, Idisp, $mem$$base$$Register);
__ twi_0($dst$$Register);
@ -2731,7 +2733,7 @@ encode %{
enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_ld);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
// Operand 'ds' requires 4-alignment.
assert((Idisp & 0x3) == 0, "unaligned offset");
@ -2741,7 +2743,7 @@ encode %{
// Load acquire.
enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
// Operand 'ds' requires 4-alignment.
assert((Idisp & 0x3) == 0, "unaligned offset");
@ -2752,7 +2754,7 @@ encode %{
enc_class enc_lfd(RegF dst, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_lfd);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
%}
@ -2760,7 +2762,7 @@ encode %{
enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
// TODO: PPC port $archOpcode(ppc64Opcode_ld);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int toc_offset = 0;
address const_toc_addr;
@ -2784,7 +2786,7 @@ encode %{
enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
// TODO: PPC port $archOpcode(ppc64Opcode_addis);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (!ra_->C->output()->in_scratch_emit_size()) {
address const_toc_addr;
@ -3019,7 +3021,7 @@ encode %{
enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
// TODO: PPC port $archOpcode(ppc64Opcode_ld);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int toc_offset = 0;
intptr_t val = $src$$constant;
@ -3052,7 +3054,7 @@ encode %{
enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
// TODO: PPC port $archOpcode(ppc64Opcode_addis);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (!ra_->C->output()->in_scratch_emit_size()) {
intptr_t val = $src$$constant;
relocInfo::relocType constant_reloc = $src->constant_reloc(); // src
@ -3186,14 +3188,14 @@ encode %{
enc_class enc_stw(iRegIsrc src, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_stw);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ stw($src$$Register, Idisp, $mem$$base$$Register);
%}
enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_std);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
// Operand 'ds' requires 4-alignment.
assert((Idisp & 0x3) == 0, "unaligned offset");
@ -3202,14 +3204,14 @@ encode %{
enc_class enc_stfs(RegF src, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_stfs);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
%}
enc_class enc_stfd(RegF src, memory mem) %{
// TODO: PPC port $archOpcode(ppc64Opcode_stfd);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
__ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
%}
@ -3230,7 +3232,7 @@ encode %{
// __ bind(skip_release);
// __ stb(card mark);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label skip_storestore;
#if 0 // TODO: PPC port
@ -3451,7 +3453,7 @@ encode %{
enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int cc = $cmp$$cmpcode;
int flags_reg = $crx$$reg;
Label done;
@ -3466,7 +3468,7 @@ encode %{
enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label done;
assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
// Branch if not (cmp crx).
@ -3480,14 +3482,14 @@ encode %{
// input mapping for latency computation.
enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
// TODO: PPC port $archOpcode(ppc64Opcode_andc);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ andc($dst$$Register, $src1$$Register, $src2$$Register);
%}
enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label done;
__ cmpwi($crx$$CondRegister, $src$$Register, 0);
@ -3500,7 +3502,7 @@ encode %{
enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label done;
__ cmpdi($crx$$CondRegister, $src$$Register, 0);
@ -3513,7 +3515,7 @@ encode %{
enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
Label done;
__ bso($crx$$CondRegister, done);
@ -3525,7 +3527,7 @@ encode %{
enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label done;
__ bso($crx$$CondRegister, done);
__ mffprd($dst$$Register, $src$$FloatRegister);
@ -3536,7 +3538,7 @@ encode %{
enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label d; // dummy
__ bind(d);
Label* p = ($lbl$$label);
@ -3566,7 +3568,7 @@ encode %{
// to ppc64Opcode_bc in order to hide this detail from the scheduler.
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label d; // dummy
__ bind(d);
Label* p = ($lbl$$label);
@ -3598,7 +3600,7 @@ encode %{
// to ppc64Opcode_bc in order to hide this detail from the scheduler.
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label d; // dummy
__ bind(d);
Label* p = ($lbl$$label);
@ -3683,7 +3685,7 @@ encode %{
// Fake operand dst needed for PPC scheduler.
assert($dst$$constant == 0x0, "dst must be 0x0");
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// Mark the code position where the load from the safepoint
// polling page was emitted as relocInfo::poll_type.
__ relocate(relocInfo::poll_type);
@ -3739,7 +3741,7 @@ encode %{
enc_class enc_java_static_call(method meth) %{
// TODO: PPC port $archOpcode(ppc64Opcode_bl);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address entry_point = (address)$meth$$method;
if (!_method) {
@ -3789,7 +3791,7 @@ encode %{
enc_class enc_java_dynamic_call_sched(method meth) %{
// TODO: PPC port $archOpcode(ppc64Opcode_bl);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (!ra_->C->output()->in_scratch_emit_size()) {
// Create a call trampoline stub for the given method.
@ -3892,7 +3894,7 @@ encode %{
// In the code we have to use $constanttablebase.
enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int start_offset = __ offset();
Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
@ -3951,7 +3953,7 @@ encode %{
enc_class enc_java_to_runtime_call (method meth) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
const address start_pc = __ pc();
#if defined(ABI_ELFv2)
@ -3984,7 +3986,7 @@ encode %{
// input mapping for latency computation.
enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
// TODO: PPC port $archOpcode(ppc64Opcode_mtctr);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mtctr($src$$Register);
%}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_S390_C2_MACROASSEMBLER_S390_HPP
#define CPU_S390_C2_MACROASSEMBLER_S390_HPP
// C2_MacroAssembler contains high-level macros for C2
public:
//-------------------------------------------
// Special String Intrinsics Implementation.
//-------------------------------------------
// Intrinsics for CompactStrings
// Restores: src, dst
// Uses: cnt
// Kills: tmp, Z_R0, Z_R1.
// Early clobber: result.
// Boolean precise controls accuracy of result value.
unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
Register tmp, bool precise);
// Inflate byte[] to char[].
unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
// Inflate byte[] to char[].
// Restores: src, dst
// Uses: cnt
// Kills: tmp, Z_R0, Z_R1.
unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp);
// Inflate byte[] to char[], length known at compile time.
// Restores: src, dst
// Kills: tmp, Z_R0, Z_R1.
// Note:
// len is signed int. Counts # characters, not bytes.
unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
// Kills src.
unsigned int has_negatives(Register result, Register src, Register cnt,
Register odd_reg, Register even_reg, Register tmp);
unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
Register odd_reg, Register even_reg, Register result, int ae);
unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
Register odd_reg, Register even_reg, Register result, bool is_byte);
unsigned int string_indexof(Register result, Register haystack, Register haycnt,
Register needle, Register needlecnt, int needlecntval,
Register odd_reg, Register even_reg, int ae);
unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
#endif // CPU_S390_C2_MACROASSEMBLER_S390_HPP

File diff suppressed because it is too large Load Diff

View File

@ -841,52 +841,6 @@ class MacroAssembler: public Assembler {
Register cnt_reg,
Register tmp1_reg, Register tmp2_reg);
//-------------------------------------------
// Special String Intrinsics Implementation.
//-------------------------------------------
// Intrinsics for CompactStrings
// Restores: src, dst
// Uses: cnt
// Kills: tmp, Z_R0, Z_R1.
// Early clobber: result.
// Boolean precise controls accuracy of result value.
#ifdef COMPILER2
unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
Register tmp, bool precise);
// Inflate byte[] to char[].
unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
// Inflate byte[] to char[].
// Restores: src, dst
// Uses: cnt
// Kills: tmp, Z_R0, Z_R1.
unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp);
// Inflate byte[] to char[], length known at compile time.
// Restores: src, dst
// Kills: tmp, Z_R0, Z_R1.
// Note:
// len is signed int. Counts # characters, not bytes.
unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
// Kills src.
unsigned int has_negatives(Register result, Register src, Register cnt,
Register odd_reg, Register even_reg, Register tmp);
unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
Register odd_reg, Register even_reg, Register result, int ae);
unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
Register odd_reg, Register even_reg, Register result, bool is_byte);
unsigned int string_indexof(Register result, Register haystack, Register haycnt,
Register needle, Register needlecnt, int needlecntval,
Register odd_reg, Register even_reg, int ae);
unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
#endif
// Emit an oop const to the constant pool and set a relocation info
// with address current_pc. Return the TOC offset of the constant.

View File

@ -664,13 +664,13 @@ bool SafePointNode::needs_polling_address_input() {
}
void emit_nop(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ z_nop();
}
// Emit an interrupt that is caught by the debugger (for debugging compiler).
void emit_break(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ z_illtrap();
}
@ -735,7 +735,7 @@ static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
// Check effective address (at runtime) for required alignment.
static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ z_lay(Z_R0, disp, index, base);
__ z_nill(Z_R0, alignment-1);
@ -743,7 +743,7 @@ static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index,
__ z_illtrap();
}
int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
PhaseRegAlloc* ra_, bool is_native_call = false) {
__ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
address old_mark = __ inst_mark();
@ -774,7 +774,7 @@ int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relo
return (ret_off - start_off);
}
static int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
static int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
__ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
address old_mark = __ inst_mark();
unsigned int start_off = __ offset();
@ -807,7 +807,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
// Even with PC-relative TOC addressing, we still need this node.
// Float loads/stores do not support PC-relative addresses.
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rtoc = as_Register(ra_->get_encode(this));
__ load_toc(Rtoc);
}
@ -858,7 +858,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ verify_thread();
@ -932,7 +932,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
#endif
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Compile* C = ra_->C;
__ verify_thread();
@ -1034,7 +1034,7 @@ static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigne
static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
__ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP);
}
@ -1108,7 +1108,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
// Check for integer reg-reg copy.
if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
__ z_lgr(Rdst, Rsrc);
@ -1155,7 +1155,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
// Check for float reg-reg copy.
if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
__ z_ldr(Rdst, Rsrc);
@ -1254,7 +1254,7 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const {
#endif
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int rem_space = 0;
if (!(ra_->C->output()->in_scratch_emit_size())) {
@ -1294,7 +1294,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
// Take care of the size function, if you make changes here!
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
int reg = ra_->get_encode(this);
@ -1360,7 +1360,7 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
#endif
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
const int ic_miss_offset = 2;
// Inline_cache contains a klass.
@ -1455,7 +1455,7 @@ source %{
// aware of the exception.
int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
Register temp_reg = Z_R1;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_exception_handler());
if (base == NULL) {
@ -1476,7 +1476,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
// Emit deopt handler code.
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_deopt_handler());
if (base == NULL) {
@ -1841,13 +1841,13 @@ void Compile::reshape_address(AddPNode* addp) {
// needs for encoding need to be specified.
encode %{
enc_class enc_unimplemented %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ unimplemented("Unimplemented mach node encoding in AD file.", 13);
%}
enc_class enc_untested %{
#ifdef ASSERT
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ untested("Untested mach node encoding in AD file.");
#endif
%}
@ -2033,7 +2033,7 @@ encode %{
Assembler::reg(Ridx, 12, 48) |
Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
} else {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ load_const_optimized(Z_R1_scratch, $mem$$disp);
if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); }
z_emit_inst(cbuf, $secondary |
@ -2045,7 +2045,7 @@ encode %{
%}
enc_class z_enc_brul(Label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2058,7 +2058,7 @@ encode %{
%}
enc_class z_enc_bru(Label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2071,7 +2071,7 @@ encode %{
%}
enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2084,7 +2084,7 @@ encode %{
%}
enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2097,7 +2097,7 @@ encode %{
%}
enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2121,7 +2121,7 @@ encode %{
%}
enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2147,7 +2147,7 @@ encode %{
%}
enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2172,7 +2172,7 @@ encode %{
%}
enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* p = $lbl$$label;
// 'p' is `NULL' when this encoding class is used only to
@ -2199,7 +2199,7 @@ encode %{
// Call from Java to runtime.
enc_class z_enc_java_to_runtime_call(method meth) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
// Save return pc before call to the place where we need it, since
// callee doesn't.
@ -2227,7 +2227,7 @@ encode %{
enc_class z_enc_java_static_call(method meth) %{
// Call to fixup routine. Fixup routine uses ScopeDesc info to determine
// whom we intended to call.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int ret_offset = 0;
if (!_method) {
@ -2256,7 +2256,7 @@ encode %{
// Java dynamic call
enc_class z_enc_java_dynamic_call(method meth) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
unsigned int start_off = __ offset();
int vtable_index = this->_vtable_index;
@ -2311,7 +2311,7 @@ encode %{
%}
enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdst = reg_to_register_object($dst$$reg);
Register Rsrc = reg_to_register_object($src$$reg);
@ -2332,7 +2332,7 @@ encode %{
%}
enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdst = reg_to_register_object($dst$$reg);
int Csrc = $src$$constant;
Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
@ -2349,7 +2349,7 @@ encode %{
%}
enc_class z_enc_cctobool(iRegI res) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rres = reg_to_register_object($res$$reg);
if (VM_Version::has_LoadStoreConditional()) {
@ -2366,7 +2366,7 @@ encode %{
%}
enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rcomp = reg_to_register_object($compare_value$$reg);
Register Rnew = reg_to_register_object($exchange_value$$reg);
Register Raddr = reg_to_register_object($addr_ptr$$reg);
@ -2375,7 +2375,7 @@ encode %{
%}
enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rcomp = reg_to_register_object($compare_value$$reg);
Register Rnew = reg_to_register_object($exchange_value$$reg);
Register Raddr = reg_to_register_object($addr_ptr$$reg);
@ -2384,7 +2384,7 @@ encode %{
%}
enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdst = reg_to_register_object($dst$$reg);
Register Rtmp = reg_to_register_object($tmp$$reg);
guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
@ -2400,7 +2400,7 @@ encode %{
%}
enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdst = reg_to_register_object($dst$$reg);
Register Rtmp = reg_to_register_object($tmp$$reg);
guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");

View File

@ -0,0 +1,526 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "oops/arrayOop.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/intrinsicnode.hpp"
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#define STOP(error) stop(error)
#else
#define BLOCK_COMMENT(str) block_comment(str)
#define STOP(error) block_comment(error); stop(error)
#endif
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
Label Lloop, Lslow;
assert(UseVIS >= 3, "VIS3 is required");
assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
assert_different_registers(ftmp1, ftmp2, ftmp3);
// Check if cnt >= 8 (= 16 bytes)
cmp(cnt, 8);
br(Assembler::less, false, Assembler::pn, Lslow);
delayed()->mov(cnt, result); // copy count
// Check for 8-byte alignment of src and dst
or3(src, dst, tmp1);
andcc(tmp1, 7, G0);
br(Assembler::notZero, false, Assembler::pn, Lslow);
delayed()->nop();
// Set mask for bshuffle instruction
Register mask = tmp4;
set(0x13579bdf, mask);
bmask(mask, G0, G0);
// Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00
sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000
or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00
// Load first 8 bytes
ldx(src, 0, tmp1);
bind(Lloop);
// Load next 8 bytes
ldx(src, 8, tmp2);
// Check for non-latin1 character by testing if the most significant byte of a char is set.
// Although we have to move the data between integer and floating point registers, this is
// still faster than the corresponding VIS instructions (ford/fand/fcmpd).
or3(tmp1, tmp2, tmp3);
btst(tmp3, mask);
// annul zeroing if branch is not taken to preserve original count
brx(Assembler::notZero, true, Assembler::pn, Ldone);
delayed()->mov(G0, result); // 0 - failed
// Move bytes into float register
movxtod(tmp1, ftmp1);
movxtod(tmp2, ftmp2);
// Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
bshuffle(ftmp1, ftmp2, ftmp3);
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
// Increment addresses and decrement count
inc(src, 16);
inc(dst, 8);
dec(cnt, 8);
cmp(cnt, 8);
// annul LDX if branch is not taken to prevent access past end of string
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
delayed()->ldx(src, 0, tmp1);
// Fallback to slow version
bind(Lslow);
}
// Compress char[] to byte[]. Return 0 on failure.
void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
Label Lloop;
assert_different_registers(src, dst, cnt, tmp, result);
lduh(src, 0, tmp);
bind(Lloop);
inc(src, sizeof(jchar));
cmp(tmp, 0xff);
// annul zeroing if branch is not taken to preserve original count
br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
delayed()->mov(G0, result); // 0 - failed
deccc(cnt);
stb(tmp, dst, 0);
inc(dst);
// annul LDUH if branch is not taken to prevent access past end of string
br(Assembler::notZero, true, Assembler::pt, Lloop);
delayed()->lduh(src, 0, tmp); // hoisted
}
// Inflate byte[] to char[] by inflating 16 bytes at once.
void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
Label Lloop, Lslow;
assert(UseVIS >= 3, "VIS3 is required");
assert_different_registers(src, dst, cnt, tmp);
assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
// Check if cnt >= 8 (= 16 bytes)
cmp(cnt, 8);
br(Assembler::less, false, Assembler::pn, Lslow);
delayed()->nop();
// Check for 8-byte alignment of src and dst
or3(src, dst, tmp);
andcc(tmp, 7, G0);
br(Assembler::notZero, false, Assembler::pn, Lslow);
// Initialize float register to zero
FloatRegister zerof = ftmp4;
delayed()->fzero(FloatRegisterImpl::D, zerof);
// Load first 8 bytes
ldf(FloatRegisterImpl::D, src, 0, ftmp1);
bind(Lloop);
inc(src, 8);
dec(cnt, 8);
// Inflate the string by interleaving each byte from the source array
// with a zero byte and storing the result in the destination array.
fpmerge(zerof, ftmp1->successor(), ftmp2);
stf(FloatRegisterImpl::D, ftmp2, dst, 8);
fpmerge(zerof, ftmp1, ftmp3);
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
inc(dst, 16);
cmp(cnt, 8);
// annul LDX if branch is not taken to prevent access past end of string
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
// Fallback to slow version
bind(Lslow);
}
// Inflate byte[] to char[].
void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
Label Loop;
assert_different_registers(src, dst, cnt, tmp);
ldub(src, 0, tmp);
bind(Loop);
inc(src);
deccc(cnt);
sth(tmp, dst, 0);
inc(dst, sizeof(jchar));
// annul LDUB if branch is not taken to prevent access past end of string
br(Assembler::notZero, true, Assembler::pt, Loop);
delayed()->ldub(src, 0, tmp); // hoisted
}
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register tmp2,
Register result, int ae) {
Label Ldone, Lloop;
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
int stride1, stride2;
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
// we interchange str1 and str2 in the UL case and negate the result.
// Like this, str1 is always latin1 encoded, expect for the UU case.
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
srl(cnt2, 1, cnt2);
}
// See if the lengths are different, and calculate min in cnt1.
// Save diff in case we need it for a tie-breaker.
Label Lskip;
Register diff = tmp1;
subcc(cnt1, cnt2, diff);
br(Assembler::greater, true, Assembler::pt, Lskip);
// cnt2 is shorter, so use its count:
delayed()->mov(cnt2, cnt1);
bind(Lskip);
// Rename registers
Register limit1 = cnt1;
Register limit2 = limit1;
Register chr1 = result;
Register chr2 = cnt2;
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
// We need an additional register to keep track of two limits
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
limit2 = tmp2;
}
// Is the minimum length zero?
cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
br(Assembler::equal, true, Assembler::pn, Ldone);
// result is difference in lengths
if (ae == StrIntrinsicNode::UU) {
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
} else {
delayed()->mov(diff, result);
}
// Load first characters
if (ae == StrIntrinsicNode::LL) {
stride1 = stride2 = sizeof(jbyte);
ldub(str1, 0, chr1);
ldub(str2, 0, chr2);
} else if (ae == StrIntrinsicNode::UU) {
stride1 = stride2 = sizeof(jchar);
lduh(str1, 0, chr1);
lduh(str2, 0, chr2);
} else {
stride1 = sizeof(jbyte);
stride2 = sizeof(jchar);
ldub(str1, 0, chr1);
lduh(str2, 0, chr2);
}
// Compare first characters
subcc(chr1, chr2, chr1);
br(Assembler::notZero, false, Assembler::pt, Ldone);
assert(chr1 == result, "result must be pre-placed");
delayed()->nop();
// Check if the strings start at same location
cmp(str1, str2);
brx(Assembler::equal, true, Assembler::pn, Ldone);
delayed()->mov(G0, result); // result is zero
// We have no guarantee that on 64 bit the higher half of limit is 0
signx(limit1);
// Get limit
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
sll(limit1, 1, limit2);
subcc(limit2, stride2, chr2);
}
subcc(limit1, stride1, chr1);
br(Assembler::zero, true, Assembler::pn, Ldone);
// result is difference in lengths
if (ae == StrIntrinsicNode::UU) {
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
} else {
delayed()->mov(diff, result);
}
// Shift str1 and str2 to the end of the arrays, negate limit
add(str1, limit1, str1);
add(str2, limit2, str2);
neg(chr1, limit1); // limit1 = -(limit1-stride1)
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
neg(chr2, limit2); // limit2 = -(limit2-stride2)
}
// Compare the rest of the characters
load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
bind(Lloop);
load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false);
subcc(chr1, chr2, chr1);
br(Assembler::notZero, false, Assembler::pt, Ldone);
assert(chr1 == result, "result must be pre-placed");
delayed()->inccc(limit1, stride1);
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
inccc(limit2, stride2);
}
// annul LDUB if branch is not taken to prevent access past end of string
br(Assembler::notZero, true, Assembler::pt, Lloop);
delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
// If strings are equal up to min length, return the length difference.
if (ae == StrIntrinsicNode::UU) {
// Divide by 2 to get number of chars
sra(diff, 1, result);
} else {
mov(diff, result);
}
// Otherwise, return the difference between the first mismatched chars.
bind(Ldone);
if(ae == StrIntrinsicNode::UL) {
// Negate result (see note above)
neg(result);
}
}
void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp, Register result, bool is_byte) {
Label Ldone, Lloop, Lremaining;
assert_different_registers(ary1, ary2, limit, tmp, result);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned");
if (is_array_equ) {
// return true if the same array
cmp(ary1, ary2);
brx(Assembler::equal, true, Assembler::pn, Ldone);
delayed()->mov(1, result); // equal
br_null(ary1, true, Assembler::pn, Ldone);
delayed()->clr(result); // not equal
br_null(ary2, true, Assembler::pn, Ldone);
delayed()->clr(result); // not equal
// load the lengths of arrays
ld(Address(ary1, length_offset), limit);
ld(Address(ary2, length_offset), tmp);
// return false if the two arrays are not equal length
cmp(limit, tmp);
br(Assembler::notEqual, true, Assembler::pn, Ldone);
delayed()->clr(result); // not equal
}
cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
delayed()->mov(1, result); // zero-length arrays are equal
if (is_array_equ) {
// load array addresses
add(ary1, base_offset, ary1);
add(ary2, base_offset, ary2);
// set byte count
if (!is_byte) {
sll(limit, exact_log2(sizeof(jchar)), limit);
}
} else {
// We have no guarantee that on 64 bit the higher half of limit is 0
signx(limit);
}
#ifdef ASSERT
// Sanity check for doubleword (8-byte) alignment of ary1 and ary2.
// Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()).
Label Laligned;
or3(ary1, ary2, tmp);
andcc(tmp, 7, tmp);
br_null_short(tmp, Assembler::pn, Laligned);
STOP("First array element is not 8-byte aligned.");
should_not_reach_here();
bind(Laligned);
#endif
// Shift ary1 and ary2 to the end of the arrays, negate limit
add(ary1, limit, ary1);
add(ary2, limit, ary2);
neg(limit, limit);
// MAIN LOOP
// Load and compare array elements of size 'byte_width' until the elements are not
// equal or we reached the end of the arrays. If the size of the arrays is not a
// multiple of 'byte_width', we simply read over the end of the array, bail out and
// compare the remaining bytes below by skipping the garbage bytes.
ldx(ary1, limit, result);
bind(Lloop);
ldx(ary2, limit, tmp);
inccc(limit, 8);
// Bail out if we reached the end (but still do the comparison)
br(Assembler::positive, false, Assembler::pn, Lremaining);
delayed()->cmp(result, tmp);
// Check equality of elements
brx(Assembler::equal, false, Assembler::pt, target(Lloop));
delayed()->ldx(ary1, limit, result);
ba(Ldone);
delayed()->clr(result); // not equal
// TAIL COMPARISON
// We got here because we reached the end of the arrays. 'limit' is the number of
// garbage bytes we may have compared by reading over the end of the arrays. Shift
// out the garbage and compare the remaining elements.
bind(Lremaining);
// Optimistic shortcut: elements potentially including garbage are equal
brx(Assembler::equal, true, Assembler::pt, target(Ldone));
delayed()->mov(1, result); // equal
// Shift 'limit' bytes to the right and compare
sll(limit, 3, limit); // bytes to bits
srlx(result, limit, result);
srlx(tmp, limit, tmp);
cmp(result, tmp);
clr(result);
movcc(Assembler::equal, false, xcc, 1, result);
bind(Ldone);
}
void C2_MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
// test for negative bytes in input string of a given size
// result 1 if found, 0 otherwise.
Label Lcore, Ltail, Lreturn, Lcore_rpt;
assert_different_registers(inp, size, t2, t3, t4, t5, result);
Register i = result; // result used as integer index i until very end
Register lmask = t2; // t2 is aliased to lmask
// INITIALIZATION
// ===========================================================
// initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b)
// compute unaligned offset -> i
// compute core end index -> t5
Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal
add(t2, 0x80, t2);
sllx(t2, 32, t3);
or3(t3, t2, lmask); // 0x8080808080808080 -> lmask
sra(size,0,size);
andcc(inp, 0x7, i); // unaligned offset -> i
br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
delayed()->add(size, -8, t5); // (annuled) core end index -> t5
// ===========================================================
// UNALIGNED HEAD
// ===========================================================
// * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
// * obliterate (ignore) bytes outside string by shifting off reg ends
// * compare with bitmask, short circuit return true if one or more high
// bits set.
cmp(size, 0);
br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
delayed()->mov(0,result); // annuled so i not clobbered for following
neg(i, t4);
add(i, size, t5);
ldx(inp, t4, t3); // raw aligned 8B containing unaligned head -> t3
mov(8, t4);
sub(t4, t5, t4);
sra(t4, 31, t5);
andn(t4, t5, t5);
add(i, t5, t4);
sll(t5, 3, t5);
sll(t4, 3, t4); // # bits to shift right, left -> t5,t4
srlx(t3, t5, t3);
sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
andcc(lmask, t3, G0);
brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
delayed()->mov(1,result); // annuled so i not clobbered for following
add(size, -8, t5); // core end index -> t5
mov(8, t4);
sub(t4, i, i); // # bytes examined in unalgn head (<8) -> i
// ===========================================================
// ALIGNED CORE
// ===========================================================
// * iterate index i over aligned 8B sections of core, comparing with
// bitmask, short circuit return true if one or more high bits set
// t5 contains core end index/loop limit which is the index
// of the MSB of last (unaligned) 8B fully contained in the string.
// inp contains address of first byte in string/array
// lmask contains 8B high bit mask for comparison
// i contains next index to be processed (adr. inp+i is on 8B boundary)
bind(Lcore);
cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
bind(Lcore_rpt);
ldx(inp, i, t3);
andcc(t3, lmask, G0);
brx(Assembler::notZero, true, Assembler::pn, Lreturn);
delayed()->mov(1, result); // annuled so i not clobbered for following
add(i, 8, i);
cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
// ===========================================================
// ALIGNED TAIL (<8B)
// ===========================================================
// handle aligned tail of 7B or less as complete 8B, obliterating end of
// string bytes by shifting them off end, compare what's left with bitmask
// inp contains address of first byte in string/array
// lmask contains 8B high bit mask for comparison
// i contains next index to be processed (adr. inp+i is on 8B boundary)
bind(Ltail);
subcc(size, i, t4); // # of remaining bytes in string -> t4
// return 0 if no more remaining bytes
br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
delayed()->mov(0, result); // annuled so i not clobbered for following
ldx(inp, i, t3); // load final 8B (aligned) containing tail -> t3
mov(8, t5);
sub(t5, t4, t4);
mov(0, result); // ** i clobbered at this point
sll(t4, 3, t4); // bits beyond end of string -> t4
srlx(t3, t4, t3); // bytes beyond end now obliterated -> t3
andcc(lmask, t3, G0);
movcc(Assembler::notZero, false, xcc, 1, result);
bind(Lreturn);
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
#define CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
// C2_MacroAssembler contains high-level macros for C2
public:
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
void string_compress_16(Register src, Register dst, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
// Compress char[] to byte[]. Return 0 on failure.
void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
// Inflate byte[] to char[] by inflating 16 bytes at once.
void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
// Inflate byte[] to char[].
void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register tmp2,
Register result, int ae);
void array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp, Register result, bool is_byte);
// test for negative bytes in input string of a given size, result 0 if none
void has_negatives(Register inp, Register size, Register result,
Register t2, Register t3, Register t4,
Register t5);
#endif // CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP

View File

@ -49,9 +49,6 @@
#include "utilities/align.hpp"
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
#ifdef COMPILER2
#include "opto/intrinsicnode.hpp"
#endif
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
@ -3436,498 +3433,6 @@ void MacroAssembler::reinit_heapbase() {
}
}
#ifdef COMPILER2
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
Label Lloop, Lslow;
assert(UseVIS >= 3, "VIS3 is required");
assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
assert_different_registers(ftmp1, ftmp2, ftmp3);
// Check if cnt >= 8 (= 16 bytes)
cmp(cnt, 8);
br(Assembler::less, false, Assembler::pn, Lslow);
delayed()->mov(cnt, result); // copy count
// Check for 8-byte alignment of src and dst
or3(src, dst, tmp1);
andcc(tmp1, 7, G0);
br(Assembler::notZero, false, Assembler::pn, Lslow);
delayed()->nop();
// Set mask for bshuffle instruction
Register mask = tmp4;
set(0x13579bdf, mask);
bmask(mask, G0, G0);
// Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00
sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000
or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00
// Load first 8 bytes
ldx(src, 0, tmp1);
bind(Lloop);
// Load next 8 bytes
ldx(src, 8, tmp2);
// Check for non-latin1 character by testing if the most significant byte of a char is set.
// Although we have to move the data between integer and floating point registers, this is
// still faster than the corresponding VIS instructions (ford/fand/fcmpd).
or3(tmp1, tmp2, tmp3);
btst(tmp3, mask);
// annul zeroing if branch is not taken to preserve original count
brx(Assembler::notZero, true, Assembler::pn, Ldone);
delayed()->mov(G0, result); // 0 - failed
// Move bytes into float register
movxtod(tmp1, ftmp1);
movxtod(tmp2, ftmp2);
// Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
bshuffle(ftmp1, ftmp2, ftmp3);
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
// Increment addresses and decrement count
inc(src, 16);
inc(dst, 8);
dec(cnt, 8);
cmp(cnt, 8);
// annul LDX if branch is not taken to prevent access past end of string
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
delayed()->ldx(src, 0, tmp1);
// Fallback to slow version
bind(Lslow);
}
// Compress char[] to byte[]. Return 0 on failure.
void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
Label Lloop;
assert_different_registers(src, dst, cnt, tmp, result);
lduh(src, 0, tmp);
bind(Lloop);
inc(src, sizeof(jchar));
cmp(tmp, 0xff);
// annul zeroing if branch is not taken to preserve original count
br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
delayed()->mov(G0, result); // 0 - failed
deccc(cnt);
stb(tmp, dst, 0);
inc(dst);
// annul LDUH if branch is not taken to prevent access past end of string
br(Assembler::notZero, true, Assembler::pt, Lloop);
delayed()->lduh(src, 0, tmp); // hoisted
}
// Inflate byte[] to char[] by inflating 16 bytes at once.
void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
Label Lloop, Lslow;
assert(UseVIS >= 3, "VIS3 is required");
assert_different_registers(src, dst, cnt, tmp);
assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
// Check if cnt >= 8 (= 16 bytes)
cmp(cnt, 8);
br(Assembler::less, false, Assembler::pn, Lslow);
delayed()->nop();
// Check for 8-byte alignment of src and dst
or3(src, dst, tmp);
andcc(tmp, 7, G0);
br(Assembler::notZero, false, Assembler::pn, Lslow);
// Initialize float register to zero
FloatRegister zerof = ftmp4;
delayed()->fzero(FloatRegisterImpl::D, zerof);
// Load first 8 bytes
ldf(FloatRegisterImpl::D, src, 0, ftmp1);
bind(Lloop);
inc(src, 8);
dec(cnt, 8);
// Inflate the string by interleaving each byte from the source array
// with a zero byte and storing the result in the destination array.
fpmerge(zerof, ftmp1->successor(), ftmp2);
stf(FloatRegisterImpl::D, ftmp2, dst, 8);
fpmerge(zerof, ftmp1, ftmp3);
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
inc(dst, 16);
cmp(cnt, 8);
// annul LDX if branch is not taken to prevent access past end of string
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
// Fallback to slow version
bind(Lslow);
}
// Inflate byte[] to char[].
void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
Label Loop;
assert_different_registers(src, dst, cnt, tmp);
ldub(src, 0, tmp);
bind(Loop);
inc(src);
deccc(cnt);
sth(tmp, dst, 0);
inc(dst, sizeof(jchar));
// annul LDUB if branch is not taken to prevent access past end of string
br(Assembler::notZero, true, Assembler::pt, Loop);
delayed()->ldub(src, 0, tmp); // hoisted
}
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register tmp2,
Register result, int ae) {
Label Ldone, Lloop;
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
int stride1, stride2;
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
// we interchange str1 and str2 in the UL case and negate the result.
// Like this, str1 is always latin1 encoded, expect for the UU case.
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
srl(cnt2, 1, cnt2);
}
// See if the lengths are different, and calculate min in cnt1.
// Save diff in case we need it for a tie-breaker.
Label Lskip;
Register diff = tmp1;
subcc(cnt1, cnt2, diff);
br(Assembler::greater, true, Assembler::pt, Lskip);
// cnt2 is shorter, so use its count:
delayed()->mov(cnt2, cnt1);
bind(Lskip);
// Rename registers
Register limit1 = cnt1;
Register limit2 = limit1;
Register chr1 = result;
Register chr2 = cnt2;
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
// We need an additional register to keep track of two limits
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
limit2 = tmp2;
}
// Is the minimum length zero?
cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
br(Assembler::equal, true, Assembler::pn, Ldone);
// result is difference in lengths
if (ae == StrIntrinsicNode::UU) {
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
} else {
delayed()->mov(diff, result);
}
// Load first characters
if (ae == StrIntrinsicNode::LL) {
stride1 = stride2 = sizeof(jbyte);
ldub(str1, 0, chr1);
ldub(str2, 0, chr2);
} else if (ae == StrIntrinsicNode::UU) {
stride1 = stride2 = sizeof(jchar);
lduh(str1, 0, chr1);
lduh(str2, 0, chr2);
} else {
stride1 = sizeof(jbyte);
stride2 = sizeof(jchar);
ldub(str1, 0, chr1);
lduh(str2, 0, chr2);
}
// Compare first characters
subcc(chr1, chr2, chr1);
br(Assembler::notZero, false, Assembler::pt, Ldone);
assert(chr1 == result, "result must be pre-placed");
delayed()->nop();
// Check if the strings start at same location
cmp(str1, str2);
brx(Assembler::equal, true, Assembler::pn, Ldone);
delayed()->mov(G0, result); // result is zero
// We have no guarantee that on 64 bit the higher half of limit is 0
signx(limit1);
// Get limit
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
sll(limit1, 1, limit2);
subcc(limit2, stride2, chr2);
}
subcc(limit1, stride1, chr1);
br(Assembler::zero, true, Assembler::pn, Ldone);
// result is difference in lengths
if (ae == StrIntrinsicNode::UU) {
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
} else {
delayed()->mov(diff, result);
}
// Shift str1 and str2 to the end of the arrays, negate limit
add(str1, limit1, str1);
add(str2, limit2, str2);
neg(chr1, limit1); // limit1 = -(limit1-stride1)
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
neg(chr2, limit2); // limit2 = -(limit2-stride2)
}
// Compare the rest of the characters
load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
bind(Lloop);
load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false);
subcc(chr1, chr2, chr1);
br(Assembler::notZero, false, Assembler::pt, Ldone);
assert(chr1 == result, "result must be pre-placed");
delayed()->inccc(limit1, stride1);
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
inccc(limit2, stride2);
}
// annul LDUB if branch is not taken to prevent access past end of string
br(Assembler::notZero, true, Assembler::pt, Lloop);
delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
// If strings are equal up to min length, return the length difference.
if (ae == StrIntrinsicNode::UU) {
// Divide by 2 to get number of chars
sra(diff, 1, result);
} else {
mov(diff, result);
}
// Otherwise, return the difference between the first mismatched chars.
bind(Ldone);
if(ae == StrIntrinsicNode::UL) {
// Negate result (see note above)
neg(result);
}
}
void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp, Register result, bool is_byte) {
Label Ldone, Lloop, Lremaining;
assert_different_registers(ary1, ary2, limit, tmp, result);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned");
if (is_array_equ) {
// return true if the same array
cmp(ary1, ary2);
brx(Assembler::equal, true, Assembler::pn, Ldone);
delayed()->mov(1, result); // equal
br_null(ary1, true, Assembler::pn, Ldone);
delayed()->clr(result); // not equal
br_null(ary2, true, Assembler::pn, Ldone);
delayed()->clr(result); // not equal
// load the lengths of arrays
ld(Address(ary1, length_offset), limit);
ld(Address(ary2, length_offset), tmp);
// return false if the two arrays are not equal length
cmp(limit, tmp);
br(Assembler::notEqual, true, Assembler::pn, Ldone);
delayed()->clr(result); // not equal
}
cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
delayed()->mov(1, result); // zero-length arrays are equal
if (is_array_equ) {
// load array addresses
add(ary1, base_offset, ary1);
add(ary2, base_offset, ary2);
// set byte count
if (!is_byte) {
sll(limit, exact_log2(sizeof(jchar)), limit);
}
} else {
// We have no guarantee that on 64 bit the higher half of limit is 0
signx(limit);
}
#ifdef ASSERT
// Sanity check for doubleword (8-byte) alignment of ary1 and ary2.
// Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()).
Label Laligned;
or3(ary1, ary2, tmp);
andcc(tmp, 7, tmp);
br_null_short(tmp, Assembler::pn, Laligned);
STOP("First array element is not 8-byte aligned.");
should_not_reach_here();
bind(Laligned);
#endif
// Shift ary1 and ary2 to the end of the arrays, negate limit
add(ary1, limit, ary1);
add(ary2, limit, ary2);
neg(limit, limit);
// MAIN LOOP
// Load and compare array elements of size 'byte_width' until the elements are not
// equal or we reached the end of the arrays. If the size of the arrays is not a
// multiple of 'byte_width', we simply read over the end of the array, bail out and
// compare the remaining bytes below by skipping the garbage bytes.
ldx(ary1, limit, result);
bind(Lloop);
ldx(ary2, limit, tmp);
inccc(limit, 8);
// Bail out if we reached the end (but still do the comparison)
br(Assembler::positive, false, Assembler::pn, Lremaining);
delayed()->cmp(result, tmp);
// Check equality of elements
brx(Assembler::equal, false, Assembler::pt, target(Lloop));
delayed()->ldx(ary1, limit, result);
ba(Ldone);
delayed()->clr(result); // not equal
// TAIL COMPARISON
// We got here because we reached the end of the arrays. 'limit' is the number of
// garbage bytes we may have compared by reading over the end of the arrays. Shift
// out the garbage and compare the remaining elements.
bind(Lremaining);
// Optimistic shortcut: elements potentially including garbage are equal
brx(Assembler::equal, true, Assembler::pt, target(Ldone));
delayed()->mov(1, result); // equal
// Shift 'limit' bytes to the right and compare
sll(limit, 3, limit); // bytes to bits
srlx(result, limit, result);
srlx(tmp, limit, tmp);
cmp(result, tmp);
clr(result);
movcc(Assembler::equal, false, xcc, 1, result);
bind(Ldone);
}
void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
// test for negative bytes in input string of a given size
// result 1 if found, 0 otherwise.
Label Lcore, Ltail, Lreturn, Lcore_rpt;
assert_different_registers(inp, size, t2, t3, t4, t5, result);
Register i = result; // result used as integer index i until very end
Register lmask = t2; // t2 is aliased to lmask
// INITIALIZATION
// ===========================================================
// initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b)
// compute unaligned offset -> i
// compute core end index -> t5
Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal
add(t2, 0x80, t2);
sllx(t2, 32, t3);
or3(t3, t2, lmask); // 0x8080808080808080 -> lmask
sra(size,0,size);
andcc(inp, 0x7, i); // unaligned offset -> i
br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
delayed()->add(size, -8, t5); // (annuled) core end index -> t5
// ===========================================================
// UNALIGNED HEAD
// ===========================================================
// * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
// * obliterate (ignore) bytes outside string by shifting off reg ends
// * compare with bitmask, short circuit return true if one or more high
// bits set.
cmp(size, 0);
br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
delayed()->mov(0,result); // annuled so i not clobbered for following
neg(i, t4);
add(i, size, t5);
ldx(inp, t4, t3); // raw aligned 8B containing unaligned head -> t3
mov(8, t4);
sub(t4, t5, t4);
sra(t4, 31, t5);
andn(t4, t5, t5);
add(i, t5, t4);
sll(t5, 3, t5);
sll(t4, 3, t4); // # bits to shift right, left -> t5,t4
srlx(t3, t5, t3);
sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
andcc(lmask, t3, G0);
brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
delayed()->mov(1,result); // annuled so i not clobbered for following
add(size, -8, t5); // core end index -> t5
mov(8, t4);
sub(t4, i, i); // # bytes examined in unalgn head (<8) -> i
// ===========================================================
// ALIGNED CORE
// ===========================================================
// * iterate index i over aligned 8B sections of core, comparing with
// bitmask, short circuit return true if one or more high bits set
// t5 contains core end index/loop limit which is the index
// of the MSB of last (unaligned) 8B fully contained in the string.
// inp contains address of first byte in string/array
// lmask contains 8B high bit mask for comparison
// i contains next index to be processed (adr. inp+i is on 8B boundary)
bind(Lcore);
cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
bind(Lcore_rpt);
ldx(inp, i, t3);
andcc(t3, lmask, G0);
brx(Assembler::notZero, true, Assembler::pn, Lreturn);
delayed()->mov(1, result); // annuled so i not clobbered for following
add(i, 8, i);
cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
// ===========================================================
// ALIGNED TAIL (<8B)
// ===========================================================
// handle aligned tail of 7B or less as complete 8B, obliterating end of
// string bytes by shifting them off end, compare what's left with bitmask
// inp contains address of first byte in string/array
// lmask contains 8B high bit mask for comparison
// i contains next index to be processed (adr. inp+i is on 8B boundary)
bind(Ltail);
subcc(size, i, t4); // # of remaining bytes in string -> t4
// return 0 if no more remaining bytes
br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
delayed()->mov(0, result); // annuled so i not clobbered for following
ldx(inp, i, t3); // load final 8B (aligned) containing tail -> t3
mov(8, t5);
sub(t5, t4, t4);
mov(0, result); // ** i clobbered at this point
sll(t4, 3, t4); // bits beyond end of string -> t4
srlx(t3, t4, t3); // bytes beyond end now obliterated -> t3
andcc(lmask, t3, G0);
movcc(Assembler::notZero, false, xcc, 1, result);
bind(Lreturn);
}
#endif
// Use BIS for zeroing (count is in bytes).
void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
assert(UseBlockZeroing && VM_Version::has_blk_zeroing(), "only works with BIS zeroing");

View File

@ -1301,36 +1301,6 @@ public:
void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
void inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2);
#ifdef COMPILER2
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
void string_compress_16(Register src, Register dst, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
// Compress char[] to byte[]. Return 0 on failure.
void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
// Inflate byte[] to char[] by inflating 16 bytes at once.
void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
// Inflate byte[] to char[].
void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2,
Register tmp1, Register tmp2,
Register result, int ae);
void array_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register tmp, Register result, bool is_byte);
// test for negative bytes in input string of a given size, result 0 if none
void has_negatives(Register inp, Register size, Register result,
Register t2, Register t3, Register t4,
Register t5);
#endif
// Use BIS for zeroing
void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);

View File

@ -592,7 +592,7 @@ bool SafePointNode::needs_polling_address_input() {
// emit an interrupt that is caught by the debugger (for debugging compiler)
void emit_break(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ breakpoint_trap();
}
@ -612,7 +612,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
// Traceable jump
void emit_jmpl(CodeBuffer &cbuf, int jump_target) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register rdest = reg_to_register_object(jump_target);
__ JMP(rdest, 0);
__ delayed()->nop();
@ -620,19 +620,19 @@ void emit_jmpl(CodeBuffer &cbuf, int jump_target) {
// Traceable jump and set exception pc
void emit_jmpl_set_exception_pc(CodeBuffer &cbuf, int jump_target) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register rdest = reg_to_register_object(jump_target);
__ JMP(rdest, 0);
__ delayed()->add(O7, frame::pc_return_offset, Oissuing_pc );
}
void emit_nop(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ nop();
}
void emit_illtrap(CodeBuffer &cbuf) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ illtrap(0);
}
@ -908,7 +908,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
disp += STACK_BIAS;
// Check that stack offset fits, load into O7 if not
if (!Assembler::is_simm13(disp)) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ set(disp, O7);
if (index != R_G0_enc) {
__ add(O7, reg_to_register_object(index), O7);
@ -932,7 +932,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
#ifdef ASSERT
if (VerifyOops) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
if (is_verified_oop_base) {
__ verify_oop(reg_to_register_object(src1_enc));
}
@ -960,7 +960,7 @@ void emit_call_reloc(CodeBuffer &cbuf, intptr_t entry_point, RelocationHolder co
// putting the "mov" instruction in the delay slot but the problem
// may bite us again at some other point and a cleaner/generic
// solution using relocations would be needed.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ set_inst_mark();
// We flush the current window just so that there is a valid stack copy
@ -1024,7 +1024,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
Compile* C = ra_->C;
ConstantTable& constant_table = C->output()->constant_table();
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register r = as_Register(ra_->get_encode(this));
CodeSection* consts_section = __ code()->consts();
@ -1153,7 +1153,7 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
for (int i = 0; i < OptoPrologueNops; i++) {
__ nop();
@ -1226,7 +1226,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
#endif
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Compile* C = ra_->C;
__ verify_thread();
@ -1534,7 +1534,7 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const {
#endif
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
for (int i = 0; i < _count; i += 1) {
__ nop();
}
@ -1555,7 +1555,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
#endif
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()) + STACK_BIAS;
int reg = ra_->get_encode(this);
@ -1599,7 +1599,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
#endif
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
Register temp_reg = G3;
assert( G5_ic_reg != temp_reg, "conflicting registers" );
@ -1624,7 +1624,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
Register temp_reg = G3;
AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point());
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_exception_handler());
if (base == NULL) {
@ -1649,7 +1649,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
// at a poll and everything (including G3) can be live.
Register temp_reg = L0;
AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack());
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_deopt_handler());
if (base == NULL) {
@ -2007,7 +2007,7 @@ void Compile::reshape_address(AddPNode* addp) {
encode %{
enc_class enc_untested %{
#ifdef ASSERT
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ untested("encoding");
#endif
%}
@ -2142,7 +2142,7 @@ encode %{
/* %%% merge with enc_to_bool */
enc_class enc_convP2B( iRegI dst, iRegP src ) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register src_reg = reg_to_register_object($src$$reg);
Register dst_reg = reg_to_register_object($dst$$reg);
@ -2151,7 +2151,7 @@ encode %{
enc_class enc_cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp ) %{
// (Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)))
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register p_reg = reg_to_register_object($p$$reg);
Register q_reg = reg_to_register_object($q$$reg);
@ -2284,13 +2284,13 @@ encode %{
%}
enc_class enc_PartialSubtypeCheck() %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type);
__ delayed()->nop();
%}
enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* L = $labl$$label;
Assembler::Predict predict_taken =
cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
@ -2300,7 +2300,7 @@ encode %{
%}
enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label* L = $labl$$label;
Assembler::Predict predict_taken =
cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
@ -2419,13 +2419,13 @@ encode %{
%}
enc_class Set32( immI src, iRegI rd ) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ set($src$$constant, reg_to_register_object($rd$$reg));
%}
enc_class call_epilog %{
if( VerifyStackAtCalls ) {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
int framesize = ra_->C->output()->frame_size_in_bytes();
Register temp_reg = G3;
__ add(SP, framesize, temp_reg);
@ -2447,12 +2447,12 @@ encode %{
%}
enc_class preserve_SP %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(SP, L7_mh_SP_save);
%}
enc_class restore_SP %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(L7_mh_SP_save, SP);
%}
@ -2477,7 +2477,7 @@ encode %{
%}
enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ set_inst_mark();
int vtable_index = this->_vtable_index;
// MachCallDynamicJavaNode::ret_addr_offset uses this same test
@ -2526,7 +2526,7 @@ encode %{
%}
enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
Register temp_reg = G3; // caller must kill G3! We cannot reuse G5_ic_reg here because
@ -2543,7 +2543,7 @@ encode %{
%}
enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdividend = reg_to_register_object($src1$$reg);
Register Rdivisor = reg_to_register_object($src2$$reg);
Register Rresult = reg_to_register_object($dst$$reg);
@ -2554,7 +2554,7 @@ enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
%}
enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdividend = reg_to_register_object($src1$$reg);
int divisor = $imm$$constant;
@ -2565,7 +2565,7 @@ enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
%}
enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rsrc1 = reg_to_register_object($src1$$reg);
Register Rsrc2 = reg_to_register_object($src2$$reg);
Register Rdst = reg_to_register_object($dst$$reg);
@ -2577,7 +2577,7 @@ enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
%}
enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdividend = reg_to_register_object($src1$$reg);
Register Rdivisor = reg_to_register_object($src2$$reg);
Register Rresult = reg_to_register_object($dst$$reg);
@ -2594,7 +2594,7 @@ enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch
%}
enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdividend = reg_to_register_object($src1$$reg);
int divisor = $imm$$constant;
@ -2610,7 +2610,7 @@ enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
%}
enc_class fabss (sflt_reg dst, sflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
@ -2619,7 +2619,7 @@ enc_class fabss (sflt_reg dst, sflt_reg src) %{
%}
enc_class fabsd (dflt_reg dst, dflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2628,7 +2628,7 @@ enc_class fabsd (dflt_reg dst, dflt_reg src) %{
%}
enc_class fnegd (dflt_reg dst, dflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2637,7 +2637,7 @@ enc_class fnegd (dflt_reg dst, dflt_reg src) %{
%}
enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
@ -2646,7 +2646,7 @@ enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
%}
enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2656,7 +2656,7 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2667,7 +2667,7 @@ enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
%}
enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2678,7 +2678,7 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
%}
enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2689,7 +2689,7 @@ enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
%}
enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2700,7 +2700,7 @@ enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
%}
enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2711,7 +2711,7 @@ enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
%}
enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2722,7 +2722,7 @@ enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
%}
enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
@ -2733,7 +2733,7 @@ enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
%}
enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
@ -2745,7 +2745,7 @@ enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
@ -2754,7 +2754,7 @@ enc_class fmovs (dflt_reg dst, dflt_reg src) %{
%}
enc_class fmovd (dflt_reg dst, dflt_reg src) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
@ -2763,7 +2763,7 @@ enc_class fmovd (dflt_reg dst, dflt_reg src) %{
%}
enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Roop = reg_to_register_object($oop$$reg);
Register Rbox = reg_to_register_object($box$$reg);
@ -2779,7 +2779,7 @@ enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
%}
enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Roop = reg_to_register_object($oop$$reg);
Register Rbox = reg_to_register_object($box$$reg);
@ -2795,7 +2795,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
%}
enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rmem = reg_to_register_object($mem$$reg);
Register Rold = reg_to_register_object($old$$reg);
Register Rnew = reg_to_register_object($new$$reg);
@ -2809,7 +2809,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
Register Rold = reg_to_register_object($old$$reg);
Register Rnew = reg_to_register_object($new$$reg);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(Rnew, O7);
__ casx(Rmem, Rold, O7);
__ cmp( Rold, O7 );
@ -2821,7 +2821,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
Register Rold = reg_to_register_object($old$$reg);
Register Rnew = reg_to_register_object($new$$reg);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(Rnew, O7);
__ cas(Rmem, Rold, O7);
__ cmp( Rold, O7 );
@ -2833,7 +2833,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
Register Rold = reg_to_register_object($old$$reg);
Register Rnew = reg_to_register_object($new$$reg);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ cas(Rmem, Rold, Rnew);
%}
@ -2843,14 +2843,14 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
Register Rold = reg_to_register_object($old$$reg);
Register Rnew = reg_to_register_object($new$$reg);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ casx(Rmem, Rold, Rnew);
%}
enc_class enc_lflags_ne_to_boolean( iRegI res ) %{
Register Rres = reg_to_register_object($res$$reg);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(1, Rres);
__ movcc( Assembler::notEqual, false, Assembler::xcc, G0, Rres );
%}
@ -2858,13 +2858,13 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
enc_class enc_iflags_ne_to_boolean( iRegI res ) %{
Register Rres = reg_to_register_object($res$$reg);
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ mov(1, Rres);
__ movcc( Assembler::notEqual, false, Assembler::icc, G0, Rres );
%}
enc_class floating_cmp ( iRegP dst, regF src1, regF src2 ) %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Register Rdst = reg_to_register_object($dst$$reg);
FloatRegister Fsrc1 = $primary ? reg_to_SingleFloatRegister_object($src1$$reg)
: reg_to_DoubleFloatRegister_object($src1$$reg);
@ -2880,7 +2880,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
Register temp_reg = G3;
AddressLiteral rethrow_stub(OptoRuntime::rethrow_stub());
assert(temp_reg != reg_to_register_object(R_I0_num), "temp must not break oop_reg");
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
#ifdef ASSERT
__ save_frame(0);
AddressLiteral last_rethrow_addrlit(&last_rethrow);
@ -2911,17 +2911,17 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
%}
enc_class enc_membar_acquire %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::LoadLoad) );
%}
enc_class enc_membar_release %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore) );
%}
enc_class enc_membar_volatile %{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
%}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,166 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
#define CPU_X86_C2_MACROASSEMBLER_X86_HPP
// C2_MacroAssembler contains high-level macros for C2
public:
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp.
void fast_lock(Register obj, Register box, Register tmp,
Register scr, Register cx1, Register cx2,
BiasedLockingCounters* counters,
RTMLockingCounters* rtm_counters,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data,
bool use_rtm, bool profile_rtm);
void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
#if INCLUDE_RTM_OPT
void rtm_counters_update(Register abort_status, Register rtm_counters);
void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data);
void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
void rtm_stack_locking(Register obj, Register tmp, Register scr,
Register retry_on_abort_count,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated);
void rtm_inflated_locking(Register obj, Register box, Register tmp,
Register scr, Register retry_on_busy_count,
Register retry_on_abort_count,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL);
#endif
// Generic instructions support for use in .ad files C2 code generation
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Reductions for vectors of ints, longs, floats, and doubles.
// dst = src1 + reduce(op, src2) using vtmp as temps
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
// dst = reduce(op, src2) using vtmp as temps
void reduce_fp(int opcode, int vlen,
XMMRegister dst, XMMRegister src,
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
private:
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
public:
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
// IndexOf strings.
// Small strings are loaded through stack if they cross page boundary.
void string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp,
int ae);
// IndexOf for constant substrings with size >= 8 elements
// which don't need to be loaded through stack.
void string_indexofC8(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp,
int ae);
// Smallest code: we don't need to load through stack,
// check string tail.
// helper function for string_compare
void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
Address::ScaleFactor scale, Address::ScaleFactor scale1,
Address::ScaleFactor scale2, Register index, int ae);
// Compare strings.
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae);
// Search for Non-ASCII character (Negative byte value) in a byte array,
// return true if it has any and false otherwise.
void has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2);
// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char);
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP

File diff suppressed because it is too large Load Diff

View File

@ -158,12 +158,6 @@ class MacroAssembler: public Assembler {
void incrementq(Register reg, int value = 1);
void incrementq(Address dst, int value = 1);
#ifdef COMPILER2
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
#endif
// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (dst-> encoding() == src->encoding()) return;
@ -681,40 +675,6 @@ class MacroAssembler: public Assembler {
Label& done, Label* slow_case = NULL,
BiasedLockingCounters* counters = NULL);
void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
#ifdef COMPILER2
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp.
void fast_lock(Register obj, Register box, Register tmp,
Register scr, Register cx1, Register cx2,
BiasedLockingCounters* counters,
RTMLockingCounters* rtm_counters,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data,
bool use_rtm, bool profile_rtm);
void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
#if INCLUDE_RTM_OPT
void rtm_counters_update(Register abort_status, Register rtm_counters);
void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data);
void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
void rtm_stack_locking(Register obj, Register tmp, Register scr,
Register retry_on_abort_count,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated);
void rtm_inflated_locking(Register obj, Register box, Register tmp,
Register scr, Register retry_on_busy_count,
Register retry_on_abort_count,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL);
#endif
#endif
Condition negate_condition(Condition cond);
@ -1635,60 +1595,6 @@ public:
void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
#ifdef COMPILER2
// Generic instructions support for use in .ad files C2 code generation
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Reductions for vectors of ints, longs, floats, and doubles.
// dst = src1 + reduce(op, src2) using vtmp as temps
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
// dst = reduce(op, src2) using vtmp as temps
void reduce_fp(int opcode, int vlen,
XMMRegister dst, XMMRegister src,
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
private:
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
#endif
public:
// C2 compiled method's prolog code.
@ -1701,51 +1607,6 @@ public:
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
void xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp);
#ifdef COMPILER2
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
// IndexOf strings.
// Small strings are loaded through stack if they cross page boundary.
void string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp,
int ae);
// IndexOf for constant substrings with size >= 8 elements
// which don't need to be loaded through stack.
void string_indexofC8(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp,
int ae);
// Smallest code: we don't need to load through stack,
// check string tail.
// helper function for string_compare
void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
Address::ScaleFactor scale, Address::ScaleFactor scale1,
Address::ScaleFactor scale2, Register index, int ae);
// Compare strings.
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae);
// Search for Non-ASCII character (Negative byte value) in a byte array,
// return true if it has any and false otherwise.
void has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2);
// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char);
#endif
// Fill primitive arrays
void generate_fill(BasicType t, bool aligned,
Register to, Register value, Register count,

View File

@ -1177,7 +1177,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
// Note that the code buffer's insts_mark is always relative to insts.
// That's why we must use the macroassembler to generate a handler.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_exception_handler());
if (base == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
@ -1195,7 +1195,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
// Note that the code buffer's insts_mark is always relative to insts.
// That's why we must use the macroassembler to generate a handler.
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
address base = __ start_a_stub(size_deopt_handler());
if (base == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
@ -1716,7 +1716,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
"no non-adjacent vector moves" );
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS: // copy whole register
@ -1782,7 +1782,7 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
if (cbuf) {
MacroAssembler _masm(cbuf);
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
if (is_load) {
switch (ireg) {
@ -1985,7 +1985,7 @@ static inline jlong replicate8_imm(int con, int width) {
#endif
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ nop(_count);
}
@ -2000,7 +2000,7 @@ static inline jlong replicate8_imm(int con, int width) {
#endif
void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
__ int3();
}
@ -2016,7 +2016,7 @@ encode %{
if (VerifyStackAtCalls) {
// Check that stack depth is unchanged: find majik cookie on stack
int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);
Label L;
__ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
__ jccb(Assembler::equal, L);

View File

@ -2881,7 +2881,7 @@ void ADLParser::ins_encode_parse_block(InstructForm& inst) {
// name is chosen to match the __ idiom used for assembly in other
// parts of hotspot and assumes the existence of the standard
// #define __ _masm.
encoding->add_code(" MacroAssembler _masm(&cbuf);\n");
encoding->add_code(" C2_MacroAssembler _masm(&cbuf);\n");
}
// Parse the following %{ }% block
@ -3004,9 +3004,9 @@ void ADLParser::ins_encode_parse_block_impl(InstructForm& inst, EncClass* encodi
// which synthesizes a new encoding class taking the same arguments as
// the InstructForm, and automatically prefixes the definition with:
//
// MacroAssembler masm(&cbuf);\n");
// C2_MacroAssembler masm(&cbuf);\n");
//
// making it more compact to take advantage of the MacroAssembler and
// making it more compact to take advantage of the C2_MacroAssembler and
// placing the assembly closer to it's use by instructions.
void ADLParser::ins_encode_parse(InstructForm& inst) {

View File

@ -211,7 +211,6 @@ int main(int argc, char *argv[])
AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._VM_file._name));
AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._HPP_file._name));
AD.addInclude(AD._CPP_file, "memory/allocation.inline.hpp");
AD.addInclude(AD._CPP_file, "asm/macroAssembler.inline.hpp");
AD.addInclude(AD._CPP_file, "code/compiledIC.hpp");
AD.addInclude(AD._CPP_file, "code/nativeInst.hpp");
AD.addInclude(AD._CPP_file, "code/vmreg.inline.hpp");
@ -221,6 +220,7 @@ int main(int argc, char *argv[])
AD.addInclude(AD._CPP_file, "oops/markWord.hpp");
AD.addInclude(AD._CPP_file, "oops/method.hpp");
AD.addInclude(AD._CPP_file, "oops/oop.inline.hpp");
AD.addInclude(AD._CPP_file, "opto/c2_MacroAssembler.hpp");
AD.addInclude(AD._CPP_file, "opto/cfgnode.hpp");
AD.addInclude(AD._CPP_file, "opto/intrinsicnode.hpp");
AD.addInclude(AD._CPP_file, "opto/locknode.hpp");

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_OPTO_C2_MACROASSEMBLER_HPP
#define SHARE_OPTO_C2_MACROASSEMBLER_HPP
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "utilities/macros.hpp"
class C2_MacroAssembler: public MacroAssembler {
public:
// creation
C2_MacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
#include CPU_HEADER(c2_MacroAssembler)
};
#endif // SHARE_OPTO_C2_MACROASSEMBLER_HPP