mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-13 01:13:10 +00:00
8241436: C2: Factor out C2-specific code from MacroAssembler
Reviewed-by: mdoerr, kvn, adinn
This commit is contained in:
parent
1dd60a35d1
commit
536e062a56
@ -1545,7 +1545,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
#endif
|
||||
|
||||
void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ brk(0);
|
||||
}
|
||||
|
||||
@ -1562,7 +1562,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
for (int i = 0; i < _count; i++) {
|
||||
__ nop();
|
||||
}
|
||||
@ -1622,7 +1622,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
|
||||
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
// n.b. frame size includes space for return pc and rfp
|
||||
const long framesize = C->output()->frame_size_in_bytes();
|
||||
@ -1704,7 +1704,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
|
||||
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int framesize = C->output()->frame_slots() << LogBytesPerInt;
|
||||
|
||||
__ remove_frame(framesize);
|
||||
@ -1806,7 +1806,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
uint ireg = ideal_reg();
|
||||
assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
|
||||
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
|
||||
// stack->stack
|
||||
@ -1834,7 +1834,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
}
|
||||
}
|
||||
} else if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
switch (src_lo_rc) {
|
||||
case rc_int:
|
||||
if (dst_lo_rc == rc_int) { // gpr --> gpr copy
|
||||
@ -1842,7 +1842,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
__ mov(as_Register(Matcher::_regEncode[dst_lo]),
|
||||
as_Register(Matcher::_regEncode[src_lo]));
|
||||
} else {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
__ movw(as_Register(Matcher::_regEncode[dst_lo]),
|
||||
as_Register(Matcher::_regEncode[src_lo]));
|
||||
}
|
||||
@ -1952,7 +1952,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
#endif
|
||||
|
||||
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
||||
int reg = ra_->get_encode(this);
|
||||
@ -1991,7 +1991,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
|
||||
void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
|
||||
{
|
||||
// This is the unverified entry point.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
__ cmp_klass(j_rarg0, rscratch2, rscratch1);
|
||||
Label skip;
|
||||
@ -2018,7 +2018,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
|
||||
// br rscratch1
|
||||
// Note that the code buffer's insts_mark is always relative to insts.
|
||||
// That's why we must use the macroassembler to generate a handler.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address base = __ start_a_stub(size_exception_handler());
|
||||
if (base == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
@ -2036,7 +2036,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
|
||||
{
|
||||
// Note that the code buffer's insts_mark is always relative to insts.
|
||||
// That's why we must use the macroassembler to generate a handler.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address base = __ start_a_stub(size_deopt_handler());
|
||||
if (base == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
@ -2403,7 +2403,7 @@ void Compile::reshape_address(AddPNode* addp) {
|
||||
|
||||
|
||||
#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \
|
||||
MacroAssembler _masm(&cbuf); \
|
||||
C2_MacroAssembler _masm(&cbuf); \
|
||||
{ \
|
||||
guarantee(INDEX == -1, "mode not permitted for volatile"); \
|
||||
guarantee(DISP == 0, "mode not permitted for volatile"); \
|
||||
@ -2448,7 +2448,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
// Used for all non-volatile memory accesses. The use of
|
||||
// $mem->opcode() to discover whether this pattern uses sign-extended
|
||||
// offsets is something of a kludge.
|
||||
static void loadStore(MacroAssembler masm, mem_insn insn,
|
||||
static void loadStore(C2_MacroAssembler masm, mem_insn insn,
|
||||
Register reg, int opcode,
|
||||
Register base, int index, int scale, int disp,
|
||||
int size_in_memory)
|
||||
@ -2467,7 +2467,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
(masm.*insn)(reg, addr);
|
||||
}
|
||||
|
||||
static void loadStore(MacroAssembler masm, mem_float_insn insn,
|
||||
static void loadStore(C2_MacroAssembler masm, mem_float_insn insn,
|
||||
FloatRegister reg, int opcode,
|
||||
Register base, int index, int size, int disp,
|
||||
int size_in_memory)
|
||||
@ -2498,7 +2498,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
}
|
||||
}
|
||||
|
||||
static void loadStore(MacroAssembler masm, mem_vector_insn insn,
|
||||
static void loadStore(C2_MacroAssembler masm, mem_vector_insn insn,
|
||||
FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
|
||||
int opcode, Register base, int index, int size, int disp)
|
||||
{
|
||||
@ -2551,7 +2551,7 @@ encode %{
|
||||
|
||||
// catch all for unimplemented encodings
|
||||
enc_class enc_unimplemented %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ unimplemented("C2 catch all");
|
||||
%}
|
||||
|
||||
@ -2561,7 +2561,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
%}
|
||||
|
||||
@ -2569,7 +2569,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
%}
|
||||
|
||||
@ -2577,7 +2577,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
%}
|
||||
|
||||
@ -2585,7 +2585,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
%}
|
||||
|
||||
@ -2593,7 +2593,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
|
||||
%}
|
||||
|
||||
@ -2601,7 +2601,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
|
||||
%}
|
||||
|
||||
@ -2609,7 +2609,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
|
||||
%}
|
||||
|
||||
@ -2617,7 +2617,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
|
||||
%}
|
||||
|
||||
@ -2625,7 +2625,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
|
||||
@ -2633,7 +2633,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
|
||||
@ -2641,7 +2641,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
|
||||
@ -2649,7 +2649,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
|
||||
%}
|
||||
|
||||
@ -2657,7 +2657,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
|
||||
@ -2665,7 +2665,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
|
||||
%}
|
||||
|
||||
@ -2673,14 +2673,14 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
|
||||
Register src_reg = as_Register($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
%}
|
||||
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strb0(memory1 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
%}
|
||||
@ -2689,14 +2689,14 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
|
||||
Register src_reg = as_Register($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
|
||||
%}
|
||||
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strh0(memory2 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
|
||||
%}
|
||||
@ -2705,14 +2705,14 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
|
||||
Register src_reg = as_Register($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strw0(memory4 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
@ -2724,19 +2724,19 @@ encode %{
|
||||
// we sometimes get asked to store the stack pointer into the
|
||||
// current thread -- we cannot do that directly on AArch64
|
||||
if (src_reg == r31_sp) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
|
||||
__ mov(rscratch2, sp);
|
||||
src_reg = rscratch2;
|
||||
}
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
|
||||
%}
|
||||
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_str0(memory8 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
|
||||
%}
|
||||
@ -2745,7 +2745,7 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
%}
|
||||
|
||||
@ -2753,14 +2753,14 @@ encode %{
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
|
||||
%}
|
||||
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strw_immn(immN src, memory1 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address con = (address)$src$$constant;
|
||||
// need to do this the hard way until we can manage relocs
|
||||
// for 32 bit constants
|
||||
@ -2773,7 +2773,7 @@ encode %{
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strw_immnk(immN src, memory4 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address con = (address)$src$$constant;
|
||||
// need to do this the hard way until we can manage relocs
|
||||
// for 32 bit constants
|
||||
@ -2786,7 +2786,7 @@ encode %{
|
||||
// This encoding class is generated automatically from ad_encode.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ membar(Assembler::StoreStore);
|
||||
loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
|
||||
@ -2797,37 +2797,37 @@ encode %{
|
||||
// Vector loads and stores
|
||||
enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_strvS(vecD src, memory mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_strvD(vecD src, memory mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
@ -2929,7 +2929,7 @@ encode %{
|
||||
// we sometimes get asked to store the stack pointer into the
|
||||
// current thread -- we cannot do that directly on AArch64
|
||||
if (src_reg == r31_sp) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
|
||||
__ mov(rscratch2, sp);
|
||||
src_reg = rscratch2;
|
||||
@ -2940,7 +2940,7 @@ encode %{
|
||||
|
||||
enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
|
||||
{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
__ fmovs(rscratch2, src_reg);
|
||||
}
|
||||
@ -2950,7 +2950,7 @@ encode %{
|
||||
|
||||
enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
|
||||
{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
__ fmovd(rscratch2, src_reg);
|
||||
}
|
||||
@ -2961,7 +2961,7 @@ encode %{
|
||||
// synchronized read/update encodings
|
||||
|
||||
enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register base = as_Register($mem$$base);
|
||||
int index = $mem$$index;
|
||||
@ -2990,7 +2990,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register src_reg = as_Register($src$$reg);
|
||||
Register base = as_Register($mem$$base);
|
||||
int index = $mem$$index;
|
||||
@ -3020,7 +3020,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
@ -3028,7 +3028,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
@ -3036,7 +3036,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ false, /*release*/ true,
|
||||
@ -3044,7 +3044,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ false, /*release*/ true,
|
||||
@ -3057,7 +3057,7 @@ encode %{
|
||||
// CompareAndSwap sequence to serve as a barrier on acquiring a
|
||||
// lock.
|
||||
enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
@ -3065,7 +3065,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
@ -3073,7 +3073,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ true, /*release*/ true,
|
||||
@ -3081,7 +3081,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ true, /*release*/ true,
|
||||
@ -3090,7 +3090,7 @@ encode %{
|
||||
|
||||
// auxiliary used for CompareAndSwapX to set result register
|
||||
enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register res_reg = as_Register($res$$reg);
|
||||
__ cset(res_reg, Assembler::EQ);
|
||||
%}
|
||||
@ -3098,7 +3098,7 @@ encode %{
|
||||
// prefetch encodings
|
||||
|
||||
enc_class aarch64_enc_prefetchw(memory mem) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register base = as_Register($mem$$base);
|
||||
int index = $mem$$index;
|
||||
int scale = $mem$$scale;
|
||||
@ -3119,7 +3119,7 @@ encode %{
|
||||
/// mov envcodings
|
||||
|
||||
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
u_int32_t con = (u_int32_t)$src$$constant;
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
if (con == 0) {
|
||||
@ -3130,7 +3130,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
u_int64_t con = (u_int64_t)$src$$constant;
|
||||
if (con == 0) {
|
||||
@ -3141,7 +3141,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
address con = (address)$src$$constant;
|
||||
if (con == NULL || con == (address)1) {
|
||||
@ -3166,19 +3166,19 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
__ mov(dst_reg, zr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
__ mov(dst_reg, (u_int64_t)1);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address page = (address)$src$$constant;
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
unsigned long off;
|
||||
@ -3187,12 +3187,12 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ load_byte_map_base($dst$$Register);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
address con = (address)$src$$constant;
|
||||
if (con == NULL) {
|
||||
@ -3205,13 +3205,13 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
__ mov(dst_reg, zr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
address con = (address)$src$$constant;
|
||||
if (con == NULL) {
|
||||
@ -3226,7 +3226,7 @@ encode %{
|
||||
// arithmetic encodings
|
||||
|
||||
enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register src_reg = as_Register($src1$$reg);
|
||||
int32_t con = (int32_t)$src2$$constant;
|
||||
@ -3240,7 +3240,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register src_reg = as_Register($src1$$reg);
|
||||
int32_t con = (int32_t)$src2$$constant;
|
||||
@ -3254,7 +3254,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register src1_reg = as_Register($src1$$reg);
|
||||
Register src2_reg = as_Register($src2$$reg);
|
||||
@ -3262,7 +3262,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register src1_reg = as_Register($src1$$reg);
|
||||
Register src2_reg = as_Register($src2$$reg);
|
||||
@ -3270,7 +3270,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register src1_reg = as_Register($src1$$reg);
|
||||
Register src2_reg = as_Register($src2$$reg);
|
||||
@ -3278,7 +3278,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register dst_reg = as_Register($dst$$reg);
|
||||
Register src1_reg = as_Register($src1$$reg);
|
||||
Register src2_reg = as_Register($src2$$reg);
|
||||
@ -3288,14 +3288,14 @@ encode %{
|
||||
// compare instruction encodings
|
||||
|
||||
enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg1 = as_Register($src1$$reg);
|
||||
Register reg2 = as_Register($src2$$reg);
|
||||
__ cmpw(reg1, reg2);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg = as_Register($src1$$reg);
|
||||
int32_t val = $src2$$constant;
|
||||
if (val >= 0) {
|
||||
@ -3306,7 +3306,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg1 = as_Register($src1$$reg);
|
||||
u_int32_t val = (u_int32_t)$src2$$constant;
|
||||
__ movw(rscratch1, val);
|
||||
@ -3314,14 +3314,14 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg1 = as_Register($src1$$reg);
|
||||
Register reg2 = as_Register($src2$$reg);
|
||||
__ cmp(reg1, reg2);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg = as_Register($src1$$reg);
|
||||
int64_t val = $src2$$constant;
|
||||
if (val >= 0) {
|
||||
@ -3336,7 +3336,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg1 = as_Register($src1$$reg);
|
||||
u_int64_t val = (u_int64_t)$src2$$constant;
|
||||
__ mov(rscratch1, val);
|
||||
@ -3344,45 +3344,45 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg1 = as_Register($src1$$reg);
|
||||
Register reg2 = as_Register($src2$$reg);
|
||||
__ cmp(reg1, reg2);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg1 = as_Register($src1$$reg);
|
||||
Register reg2 = as_Register($src2$$reg);
|
||||
__ cmpw(reg1, reg2);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_testp(iRegP src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg = as_Register($src$$reg);
|
||||
__ cmp(reg, zr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_testn(iRegN src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register reg = as_Register($src$$reg);
|
||||
__ cmpw(reg, zr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_b(label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label *L = $lbl$$label;
|
||||
__ b(*L);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label *L = $lbl$$label;
|
||||
__ br ((Assembler::Condition)$cmp$$cmpcode, *L);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label *L = $lbl$$label;
|
||||
__ br ((Assembler::Condition)$cmp$$cmpcode, *L);
|
||||
%}
|
||||
@ -3395,7 +3395,7 @@ encode %{
|
||||
Register result_reg = as_Register($result$$reg);
|
||||
|
||||
Label miss;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
|
||||
NULL, &miss,
|
||||
/*set_cond_codes:*/ true);
|
||||
@ -3406,7 +3406,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_java_static_call(method meth) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address addr = (address)$meth$$method;
|
||||
address call;
|
||||
@ -3433,7 +3433,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_java_dynamic_call(method meth) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int method_index = resolved_method_index(cbuf);
|
||||
address call = __ ic_call((address)$meth$$method, method_index);
|
||||
if (call == NULL) {
|
||||
@ -3443,7 +3443,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_call_epilog() %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
if (VerifyStackAtCalls) {
|
||||
// Check that stack depth is unchanged: find majik cookie on stack
|
||||
__ call_Unimplemented();
|
||||
@ -3451,7 +3451,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_java_to_runtime(method meth) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
// some calls to generated routines (arraycopy code) are scheduled
|
||||
// by C2 as runtime calls. if so we can call them using a br (they
|
||||
@ -3478,23 +3478,23 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_rethrow() %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_ret() %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ ret(lr);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_tail_call(iRegP jump_target) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register target_reg = as_Register($jump_target$$reg);
|
||||
__ br(target_reg);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register target_reg = as_Register($jump_target$$reg);
|
||||
// exception oop should be in r0
|
||||
// ret addr has been popped into lr
|
||||
@ -3504,7 +3504,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register oop = as_Register($object$$reg);
|
||||
Register box = as_Register($box$$reg);
|
||||
Register disp_hdr = as_Register($tmp$$reg);
|
||||
@ -3582,7 +3582,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register oop = as_Register($object$$reg);
|
||||
Register box = as_Register($box$$reg);
|
||||
Register disp_hdr = as_Register($tmp$$reg);
|
||||
@ -8075,7 +8075,7 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
|
||||
"mov $dst, $tmp\t# vector (1D)" %}
|
||||
ins_encode %{
|
||||
FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
|
||||
__ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
|
||||
__ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
|
||||
@ -8118,7 +8118,7 @@ instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
|
||||
"mov $dst, $tmp\t# vector (1D)" %}
|
||||
ins_encode %{
|
||||
FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
|
||||
loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
|
||||
__ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
|
||||
__ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
|
||||
|
||||
769
src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Normal file
769
src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Normal file
@ -0,0 +1,769 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
#define STOP(error) stop(error)
|
||||
#else
|
||||
#define BLOCK_COMMENT(str) block_comment(str)
|
||||
#define STOP(error) block_comment(error); stop(error)
|
||||
#endif
|
||||
|
||||
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
||||
|
||||
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
|
||||
|
||||
// Search for str1 in str2 and return index or -1
|
||||
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
|
||||
Register cnt2, Register cnt1,
|
||||
Register tmp1, Register tmp2,
|
||||
Register tmp3, Register tmp4,
|
||||
Register tmp5, Register tmp6,
|
||||
int icnt1, Register result, int ae) {
|
||||
// NOTE: tmp5, tmp6 can be zr depending on specific method version
|
||||
Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
|
||||
|
||||
Register ch1 = rscratch1;
|
||||
Register ch2 = rscratch2;
|
||||
Register cnt1tmp = tmp1;
|
||||
Register cnt2tmp = tmp2;
|
||||
Register cnt1_neg = cnt1;
|
||||
Register cnt2_neg = cnt2;
|
||||
Register result_tmp = tmp4;
|
||||
|
||||
bool isL = ae == StrIntrinsicNode::LL;
|
||||
|
||||
bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
|
||||
bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
|
||||
int str1_chr_shift = str1_isL ? 0:1;
|
||||
int str2_chr_shift = str2_isL ? 0:1;
|
||||
int str1_chr_size = str1_isL ? 1:2;
|
||||
int str2_chr_size = str2_isL ? 1:2;
|
||||
chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
|
||||
chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
|
||||
|
||||
// Note, inline_string_indexOf() generates checks:
|
||||
// if (substr.count > string.count) return -1;
|
||||
// if (substr.count == 0) return 0;
|
||||
|
||||
// We have two strings, a source string in str2, cnt2 and a pattern string
|
||||
// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
|
||||
|
||||
// For larger pattern and source we use a simplified Boyer Moore algorithm.
|
||||
// With a small pattern and source we use linear scan.
|
||||
|
||||
if (icnt1 == -1) {
|
||||
sub(result_tmp, cnt2, cnt1);
|
||||
cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
|
||||
br(LT, LINEARSEARCH);
|
||||
dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
|
||||
subs(zr, cnt1, 256);
|
||||
lsr(tmp1, cnt2, 2);
|
||||
ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
|
||||
br(GE, LINEARSTUB);
|
||||
}
|
||||
|
||||
// The Boyer Moore alogorithm is based on the description here:-
|
||||
//
|
||||
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
|
||||
//
|
||||
// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
|
||||
// and the 'Good Suffix' rule.
|
||||
//
|
||||
// These rules are essentially heuristics for how far we can shift the
|
||||
// pattern along the search string.
|
||||
//
|
||||
// The implementation here uses the 'Bad Character' rule only because of the
|
||||
// complexity of initialisation for the 'Good Suffix' rule.
|
||||
//
|
||||
// This is also known as the Boyer-Moore-Horspool algorithm:-
|
||||
//
|
||||
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
|
||||
//
|
||||
// This particular implementation has few java-specific optimizations.
|
||||
//
|
||||
// #define ASIZE 256
|
||||
//
|
||||
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
|
||||
// int i, j;
|
||||
// unsigned c;
|
||||
// unsigned char bc[ASIZE];
|
||||
//
|
||||
// /* Preprocessing */
|
||||
// for (i = 0; i < ASIZE; ++i)
|
||||
// bc[i] = m;
|
||||
// for (i = 0; i < m - 1; ) {
|
||||
// c = x[i];
|
||||
// ++i;
|
||||
// // c < 256 for Latin1 string, so, no need for branch
|
||||
// #ifdef PATTERN_STRING_IS_LATIN1
|
||||
// bc[c] = m - i;
|
||||
// #else
|
||||
// if (c < ASIZE) bc[c] = m - i;
|
||||
// #endif
|
||||
// }
|
||||
//
|
||||
// /* Searching */
|
||||
// j = 0;
|
||||
// while (j <= n - m) {
|
||||
// c = y[i+j];
|
||||
// if (x[m-1] == c)
|
||||
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
|
||||
// if (i < 0) return j;
|
||||
// // c < 256 for Latin1 string, so, no need for branch
|
||||
// #ifdef SOURCE_STRING_IS_LATIN1
|
||||
// // LL case: (c< 256) always true. Remove branch
|
||||
// j += bc[y[j+m-1]];
|
||||
// #endif
|
||||
// #ifndef PATTERN_STRING_IS_UTF
|
||||
// // UU case: need if (c<ASIZE) check. Skip 1 character if not.
|
||||
// if (c < ASIZE)
|
||||
// j += bc[y[j+m-1]];
|
||||
// else
|
||||
// j += 1
|
||||
// #endif
|
||||
// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
|
||||
// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
|
||||
// if (c < ASIZE)
|
||||
// j += bc[y[j+m-1]];
|
||||
// else
|
||||
// j += m
|
||||
// #endif
|
||||
// }
|
||||
// }
|
||||
|
||||
if (icnt1 == -1) {
|
||||
Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
|
||||
BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
|
||||
Register cnt1end = tmp2;
|
||||
Register str2end = cnt2;
|
||||
Register skipch = tmp2;
|
||||
|
||||
// str1 length is >=8, so, we can read at least 1 register for cases when
|
||||
// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
|
||||
// UL case. We'll re-read last character in inner pre-loop code to have
|
||||
// single outer pre-loop load
|
||||
const int firstStep = isL ? 7 : 3;
|
||||
|
||||
const int ASIZE = 256;
|
||||
const int STORED_BYTES = 32; // amount of bytes stored per instruction
|
||||
sub(sp, sp, ASIZE);
|
||||
mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
|
||||
mov(ch1, sp);
|
||||
BIND(BM_INIT_LOOP);
|
||||
stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
|
||||
subs(tmp5, tmp5, 1);
|
||||
br(GT, BM_INIT_LOOP);
|
||||
|
||||
sub(cnt1tmp, cnt1, 1);
|
||||
mov(tmp5, str2);
|
||||
add(str2end, str2, result_tmp, LSL, str2_chr_shift);
|
||||
sub(ch2, cnt1, 1);
|
||||
mov(tmp3, str1);
|
||||
BIND(BCLOOP);
|
||||
(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
|
||||
if (!str1_isL) {
|
||||
subs(zr, ch1, ASIZE);
|
||||
br(HS, BCSKIP);
|
||||
}
|
||||
strb(ch2, Address(sp, ch1));
|
||||
BIND(BCSKIP);
|
||||
subs(ch2, ch2, 1);
|
||||
br(GT, BCLOOP);
|
||||
|
||||
add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
|
||||
if (str1_isL == str2_isL) {
|
||||
// load last 8 bytes (8LL/4UU symbols)
|
||||
ldr(tmp6, Address(tmp6, -wordSize));
|
||||
} else {
|
||||
ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
|
||||
// convert Latin1 to UTF. We'll have to wait until load completed, but
|
||||
// it's still faster than per-character loads+checks
|
||||
lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
|
||||
ubfx(ch1, tmp6, 8, 8); // str1[N-2]
|
||||
ubfx(ch2, tmp6, 16, 8); // str1[N-3]
|
||||
andr(tmp6, tmp6, 0xFF); // str1[N-4]
|
||||
orr(ch2, ch1, ch2, LSL, 16);
|
||||
orr(tmp6, tmp6, tmp3, LSL, 48);
|
||||
orr(tmp6, tmp6, ch2, LSL, 16);
|
||||
}
|
||||
BIND(BMLOOPSTR2);
|
||||
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
|
||||
if (str1_isL == str2_isL) {
|
||||
// re-init tmp3. It's for free because it's executed in parallel with
|
||||
// load above. Alternative is to initialize it before loop, but it'll
|
||||
// affect performance on in-order systems with 2 or more ld/st pipelines
|
||||
lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
|
||||
}
|
||||
if (!isL) { // UU/UL case
|
||||
lsl(ch2, cnt1tmp, 1); // offset in bytes
|
||||
}
|
||||
cmp(tmp3, skipch);
|
||||
br(NE, BMSKIP);
|
||||
ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
|
||||
mov(ch1, tmp6);
|
||||
if (isL) {
|
||||
b(BMLOOPSTR1_AFTER_LOAD);
|
||||
} else {
|
||||
sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
|
||||
b(BMLOOPSTR1_CMP);
|
||||
}
|
||||
BIND(BMLOOPSTR1);
|
||||
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
|
||||
BIND(BMLOOPSTR1_AFTER_LOAD);
|
||||
subs(cnt1tmp, cnt1tmp, 1);
|
||||
br(LT, BMLOOPSTR1_LASTCMP);
|
||||
BIND(BMLOOPSTR1_CMP);
|
||||
cmp(ch1, ch2);
|
||||
br(EQ, BMLOOPSTR1);
|
||||
BIND(BMSKIP);
|
||||
if (!isL) {
|
||||
// if we've met UTF symbol while searching Latin1 pattern, then we can
|
||||
// skip cnt1 symbols
|
||||
if (str1_isL != str2_isL) {
|
||||
mov(result_tmp, cnt1);
|
||||
} else {
|
||||
mov(result_tmp, 1);
|
||||
}
|
||||
subs(zr, skipch, ASIZE);
|
||||
br(HS, BMADV);
|
||||
}
|
||||
ldrb(result_tmp, Address(sp, skipch)); // load skip distance
|
||||
BIND(BMADV);
|
||||
sub(cnt1tmp, cnt1, 1);
|
||||
add(str2, str2, result_tmp, LSL, str2_chr_shift);
|
||||
cmp(str2, str2end);
|
||||
br(LE, BMLOOPSTR2);
|
||||
add(sp, sp, ASIZE);
|
||||
b(NOMATCH);
|
||||
BIND(BMLOOPSTR1_LASTCMP);
|
||||
cmp(ch1, ch2);
|
||||
br(NE, BMSKIP);
|
||||
BIND(BMMATCH);
|
||||
sub(result, str2, tmp5);
|
||||
if (!str2_isL) lsr(result, result, 1);
|
||||
add(sp, sp, ASIZE);
|
||||
b(DONE);
|
||||
|
||||
BIND(LINEARSTUB);
|
||||
cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
|
||||
br(LT, LINEAR_MEDIUM);
|
||||
mov(result, zr);
|
||||
RuntimeAddress stub = NULL;
|
||||
if (isL) {
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
|
||||
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
|
||||
} else if (str1_isL) {
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
|
||||
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
|
||||
} else {
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
|
||||
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
|
||||
}
|
||||
trampoline_call(stub);
|
||||
b(DONE);
|
||||
}
|
||||
|
||||
BIND(LINEARSEARCH);
|
||||
{
|
||||
Label DO1, DO2, DO3;
|
||||
|
||||
Register str2tmp = tmp2;
|
||||
Register first = tmp3;
|
||||
|
||||
if (icnt1 == -1)
|
||||
{
|
||||
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
|
||||
|
||||
cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
|
||||
br(LT, DOSHORT);
|
||||
BIND(LINEAR_MEDIUM);
|
||||
(this->*str1_load_1chr)(first, Address(str1));
|
||||
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
|
||||
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
|
||||
BIND(FIRST_LOOP);
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmp(first, ch2);
|
||||
br(EQ, STR1_LOOP);
|
||||
BIND(STR2_NEXT);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, FIRST_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(STR1_LOOP);
|
||||
adds(cnt1tmp, cnt1_neg, str1_chr_size);
|
||||
add(cnt2tmp, cnt2_neg, str2_chr_size);
|
||||
br(GE, MATCH);
|
||||
|
||||
BIND(STR1_NEXT);
|
||||
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
|
||||
cmp(ch1, ch2);
|
||||
br(NE, STR2_NEXT);
|
||||
adds(cnt1tmp, cnt1tmp, str1_chr_size);
|
||||
add(cnt2tmp, cnt2tmp, str2_chr_size);
|
||||
br(LT, STR1_NEXT);
|
||||
b(MATCH);
|
||||
|
||||
BIND(DOSHORT);
|
||||
if (str1_isL == str2_isL) {
|
||||
cmp(cnt1, (u1)2);
|
||||
br(LT, DO1);
|
||||
br(GT, DO3);
|
||||
}
|
||||
}
|
||||
|
||||
if (icnt1 == 4) {
|
||||
Label CH1_LOOP;
|
||||
|
||||
(this->*load_4chr)(ch1, str1);
|
||||
sub(result_tmp, cnt2, 4);
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
|
||||
BIND(CH1_LOOP);
|
||||
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmp(ch1, ch2);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
}
|
||||
|
||||
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
|
||||
Label CH1_LOOP;
|
||||
|
||||
BIND(DO2);
|
||||
(this->*load_2chr)(ch1, str1);
|
||||
if (icnt1 == 2) {
|
||||
sub(result_tmp, cnt2, 2);
|
||||
}
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
BIND(CH1_LOOP);
|
||||
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmp(ch1, ch2);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
}
|
||||
|
||||
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
|
||||
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
|
||||
|
||||
BIND(DO3);
|
||||
(this->*load_2chr)(first, str1);
|
||||
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
|
||||
if (icnt1 == 3) {
|
||||
sub(result_tmp, cnt2, 3);
|
||||
}
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
BIND(FIRST_LOOP);
|
||||
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmpw(first, ch2);
|
||||
br(EQ, STR1_LOOP);
|
||||
BIND(STR2_NEXT);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, FIRST_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(STR1_LOOP);
|
||||
add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
|
||||
cmp(ch1, ch2);
|
||||
br(NE, STR2_NEXT);
|
||||
b(MATCH);
|
||||
}
|
||||
|
||||
if (icnt1 == -1 || icnt1 == 1) {
|
||||
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
|
||||
|
||||
BIND(DO1);
|
||||
(this->*str1_load_1chr)(ch1, str1);
|
||||
cmp(cnt2, (u1)8);
|
||||
br(LT, DO1_SHORT);
|
||||
|
||||
sub(result_tmp, cnt2, 8/str2_chr_size);
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
|
||||
if (str2_isL) {
|
||||
orr(ch1, ch1, ch1, LSL, 8);
|
||||
}
|
||||
orr(ch1, ch1, ch1, LSL, 16);
|
||||
orr(ch1, ch1, ch1, LSL, 32);
|
||||
BIND(CH1_LOOP);
|
||||
ldr(ch2, Address(str2, cnt2_neg));
|
||||
eor(ch2, ch1, ch2);
|
||||
sub(tmp1, ch2, tmp3);
|
||||
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
|
||||
bics(tmp1, tmp1, tmp2);
|
||||
br(NE, HAS_ZERO);
|
||||
adds(cnt2_neg, cnt2_neg, 8);
|
||||
br(LT, CH1_LOOP);
|
||||
|
||||
cmp(cnt2_neg, (u1)8);
|
||||
mov(cnt2_neg, 0);
|
||||
br(LT, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(HAS_ZERO);
|
||||
rev(tmp1, tmp1);
|
||||
clz(tmp1, tmp1);
|
||||
add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
|
||||
b(MATCH);
|
||||
|
||||
BIND(DO1_SHORT);
|
||||
mov(result_tmp, cnt2);
|
||||
lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
|
||||
BIND(DO1_LOOP);
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmpw(ch1, ch2);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LT, DO1_LOOP);
|
||||
}
|
||||
}
|
||||
BIND(NOMATCH);
|
||||
mov(result, -1);
|
||||
b(DONE);
|
||||
BIND(MATCH);
|
||||
add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
|
||||
typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
|
||||
|
||||
void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3)
|
||||
{
|
||||
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
|
||||
Register cnt1_neg = cnt1;
|
||||
Register ch1 = rscratch1;
|
||||
Register result_tmp = rscratch2;
|
||||
|
||||
cbz(cnt1, NOMATCH);
|
||||
|
||||
cmp(cnt1, (u1)4);
|
||||
br(LT, DO1_SHORT);
|
||||
|
||||
orr(ch, ch, ch, LSL, 16);
|
||||
orr(ch, ch, ch, LSL, 32);
|
||||
|
||||
sub(cnt1, cnt1, 4);
|
||||
mov(result_tmp, cnt1);
|
||||
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
|
||||
sub(cnt1_neg, zr, cnt1, LSL, 1);
|
||||
|
||||
mov(tmp3, 0x0001000100010001);
|
||||
|
||||
BIND(CH1_LOOP);
|
||||
ldr(ch1, Address(str1, cnt1_neg));
|
||||
eor(ch1, ch, ch1);
|
||||
sub(tmp1, ch1, tmp3);
|
||||
orr(tmp2, ch1, 0x7fff7fff7fff7fff);
|
||||
bics(tmp1, tmp1, tmp2);
|
||||
br(NE, HAS_ZERO);
|
||||
adds(cnt1_neg, cnt1_neg, 8);
|
||||
br(LT, CH1_LOOP);
|
||||
|
||||
cmp(cnt1_neg, (u1)8);
|
||||
mov(cnt1_neg, 0);
|
||||
br(LT, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(HAS_ZERO);
|
||||
rev(tmp1, tmp1);
|
||||
clz(tmp1, tmp1);
|
||||
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
|
||||
b(MATCH);
|
||||
|
||||
BIND(DO1_SHORT);
|
||||
mov(result_tmp, cnt1);
|
||||
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
|
||||
sub(cnt1_neg, zr, cnt1, LSL, 1);
|
||||
BIND(DO1_LOOP);
|
||||
ldrh(ch1, Address(str1, cnt1_neg));
|
||||
cmpw(ch, ch1);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt1_neg, cnt1_neg, 2);
|
||||
br(LT, DO1_LOOP);
|
||||
BIND(NOMATCH);
|
||||
mov(result, -1);
|
||||
b(DONE);
|
||||
BIND(MATCH);
|
||||
add(result, result_tmp, cnt1_neg, ASR, 1);
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
// Compare strings.
|
||||
void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
|
||||
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
|
||||
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
|
||||
SHORT_LOOP_START, TAIL_CHECK;
|
||||
|
||||
bool isLL = ae == StrIntrinsicNode::LL;
|
||||
bool isLU = ae == StrIntrinsicNode::LU;
|
||||
bool isUL = ae == StrIntrinsicNode::UL;
|
||||
|
||||
// The stub threshold for LL strings is: 72 (64 + 8) chars
|
||||
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
|
||||
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
|
||||
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
|
||||
|
||||
bool str1_isL = isLL || isLU;
|
||||
bool str2_isL = isLL || isUL;
|
||||
|
||||
int str1_chr_shift = str1_isL ? 0 : 1;
|
||||
int str2_chr_shift = str2_isL ? 0 : 1;
|
||||
int str1_chr_size = str1_isL ? 1 : 2;
|
||||
int str2_chr_size = str2_isL ? 1 : 2;
|
||||
int minCharsInWord = isLL ? wordSize : wordSize/2;
|
||||
|
||||
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
|
||||
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
|
||||
(uxt_insn)&MacroAssembler::uxthw;
|
||||
|
||||
BLOCK_COMMENT("string_compare {");
|
||||
|
||||
// Bizzarely, the counts are passed in bytes, regardless of whether they
|
||||
// are L or U strings, however the result is always in characters.
|
||||
if (!str1_isL) asrw(cnt1, cnt1, 1);
|
||||
if (!str2_isL) asrw(cnt2, cnt2, 1);
|
||||
|
||||
// Compute the minimum of the string lengths and save the difference.
|
||||
subsw(result, cnt1, cnt2);
|
||||
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
|
||||
|
||||
// A very short string
|
||||
cmpw(cnt2, minCharsInWord);
|
||||
br(Assembler::LE, SHORT_STRING);
|
||||
|
||||
// Compare longwords
|
||||
// load first parts of strings and finish initialization while loading
|
||||
{
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
ldr(tmp1, Address(str1));
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, DONE);
|
||||
ldr(tmp2, Address(str2));
|
||||
cmp(cnt2, stub_threshold);
|
||||
br(GE, STUB);
|
||||
subsw(cnt2, cnt2, minCharsInWord);
|
||||
br(EQ, TAIL_CHECK);
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
cmp(cnt2, stub_threshold);
|
||||
br(GE, STUB);
|
||||
subw(cnt2, cnt2, 4);
|
||||
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
add(cnt1, cnt1, 4);
|
||||
fmovd(tmp1, vtmp);
|
||||
} else { // UL case
|
||||
ldr(tmp1, Address(str1));
|
||||
ldrs(vtmp, Address(str2));
|
||||
cmp(cnt2, stub_threshold);
|
||||
br(GE, STUB);
|
||||
subw(cnt2, cnt2, 4);
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
add(cnt1, cnt1, 8);
|
||||
fmovd(tmp2, vtmp);
|
||||
}
|
||||
adds(cnt2, cnt2, isUL ? 4 : 8);
|
||||
br(GE, TAIL);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DIFFERENCE);
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
if (str1_isL == str2_isL) {
|
||||
ldr(tmp1, Address(str1, cnt2));
|
||||
ldr(tmp2, Address(str2, cnt2));
|
||||
adds(cnt2, cnt2, 8);
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1, cnt1));
|
||||
ldr(tmp2, Address(str2, cnt2));
|
||||
add(cnt1, cnt1, 4);
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp1, vtmp);
|
||||
adds(cnt2, cnt2, 8);
|
||||
} else { // UL
|
||||
ldrs(vtmp, Address(str2, cnt2));
|
||||
ldr(tmp1, Address(str1, cnt1));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
add(cnt1, cnt1, 8);
|
||||
fmovd(tmp2, vtmp);
|
||||
adds(cnt2, cnt2, 4);
|
||||
}
|
||||
br(GE, TAIL);
|
||||
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, NEXT_WORD);
|
||||
b(DIFFERENCE);
|
||||
bind(TAIL);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DIFFERENCE);
|
||||
// Last longword. In the case where length == 4 we compare the
|
||||
// same longword twice, but that's still faster than another
|
||||
// conditional branch.
|
||||
if (str1_isL == str2_isL) {
|
||||
ldr(tmp1, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp1, vtmp);
|
||||
} else { // UL
|
||||
ldrs(vtmp, Address(str2));
|
||||
ldr(tmp1, Address(str1));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp2, vtmp);
|
||||
}
|
||||
bind(TAIL_CHECK);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
bind(DIFFERENCE);
|
||||
rev(rscratch2, rscratch2);
|
||||
clz(rscratch2, rscratch2);
|
||||
andr(rscratch2, rscratch2, isLL ? -8 : -16);
|
||||
lsrv(tmp1, tmp1, rscratch2);
|
||||
(this->*ext_chr)(tmp1, tmp1);
|
||||
lsrv(tmp2, tmp2, rscratch2);
|
||||
(this->*ext_chr)(tmp2, tmp2);
|
||||
subw(result, tmp1, tmp2);
|
||||
b(DONE);
|
||||
}
|
||||
|
||||
bind(STUB);
|
||||
RuntimeAddress stub = NULL;
|
||||
switch(ae) {
|
||||
case StrIntrinsicNode::LL:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
|
||||
break;
|
||||
case StrIntrinsicNode::UU:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
|
||||
break;
|
||||
case StrIntrinsicNode::LU:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
|
||||
break;
|
||||
case StrIntrinsicNode::UL:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
|
||||
trampoline_call(stub);
|
||||
b(DONE);
|
||||
|
||||
bind(SHORT_STRING);
|
||||
// Is the minimum length zero?
|
||||
cbz(cnt2, DONE);
|
||||
// arrange code to do most branches while loading and loading next characters
|
||||
// while comparing previous
|
||||
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST_INIT);
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
b(SHORT_LOOP_START);
|
||||
bind(SHORT_LOOP);
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST);
|
||||
bind(SHORT_LOOP_START);
|
||||
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
|
||||
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
|
||||
cmp(tmp1, cnt1);
|
||||
br(NE, SHORT_LOOP_TAIL);
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST2);
|
||||
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
cmp(tmp2, rscratch1);
|
||||
br(EQ, SHORT_LOOP);
|
||||
sub(result, tmp2, rscratch1);
|
||||
b(DONE);
|
||||
bind(SHORT_LOOP_TAIL);
|
||||
sub(result, tmp1, cnt1);
|
||||
b(DONE);
|
||||
bind(SHORT_LAST2);
|
||||
cmp(tmp2, rscratch1);
|
||||
br(EQ, DONE);
|
||||
sub(result, tmp2, rscratch1);
|
||||
|
||||
b(DONE);
|
||||
bind(SHORT_LAST_INIT);
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
bind(SHORT_LAST);
|
||||
cmp(tmp1, cnt1);
|
||||
br(EQ, DONE);
|
||||
sub(result, tmp1, cnt1);
|
||||
|
||||
bind(DONE);
|
||||
|
||||
BLOCK_COMMENT("} string_compare");
|
||||
}
|
||||
48
src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
Normal file
48
src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
#define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1, Register tmp2, FloatRegister vtmp1,
|
||||
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
|
||||
|
||||
void string_indexof(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2,
|
||||
Register tmp3, Register tmp4,
|
||||
Register tmp5, Register tmp6,
|
||||
int int_cnt1, Register result, int ae);
|
||||
|
||||
void string_indexof_char(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
@ -54,7 +54,6 @@
|
||||
#ifdef COMPILER2
|
||||
#include "oops/oop.hpp"
|
||||
#include "opto/compile.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#include "opto/node.hpp"
|
||||
#include "opto/output.hpp"
|
||||
#endif
|
||||
@ -4414,737 +4413,6 @@ void MacroAssembler::remove_frame(int framesize) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
|
||||
|
||||
// Search for str1 in str2 and return index or -1
|
||||
void MacroAssembler::string_indexof(Register str2, Register str1,
|
||||
Register cnt2, Register cnt1,
|
||||
Register tmp1, Register tmp2,
|
||||
Register tmp3, Register tmp4,
|
||||
Register tmp5, Register tmp6,
|
||||
int icnt1, Register result, int ae) {
|
||||
// NOTE: tmp5, tmp6 can be zr depending on specific method version
|
||||
Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
|
||||
|
||||
Register ch1 = rscratch1;
|
||||
Register ch2 = rscratch2;
|
||||
Register cnt1tmp = tmp1;
|
||||
Register cnt2tmp = tmp2;
|
||||
Register cnt1_neg = cnt1;
|
||||
Register cnt2_neg = cnt2;
|
||||
Register result_tmp = tmp4;
|
||||
|
||||
bool isL = ae == StrIntrinsicNode::LL;
|
||||
|
||||
bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
|
||||
bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
|
||||
int str1_chr_shift = str1_isL ? 0:1;
|
||||
int str2_chr_shift = str2_isL ? 0:1;
|
||||
int str1_chr_size = str1_isL ? 1:2;
|
||||
int str2_chr_size = str2_isL ? 1:2;
|
||||
chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
|
||||
chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
|
||||
|
||||
// Note, inline_string_indexOf() generates checks:
|
||||
// if (substr.count > string.count) return -1;
|
||||
// if (substr.count == 0) return 0;
|
||||
|
||||
// We have two strings, a source string in str2, cnt2 and a pattern string
|
||||
// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
|
||||
|
||||
// For larger pattern and source we use a simplified Boyer Moore algorithm.
|
||||
// With a small pattern and source we use linear scan.
|
||||
|
||||
if (icnt1 == -1) {
|
||||
sub(result_tmp, cnt2, cnt1);
|
||||
cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
|
||||
br(LT, LINEARSEARCH);
|
||||
dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
|
||||
subs(zr, cnt1, 256);
|
||||
lsr(tmp1, cnt2, 2);
|
||||
ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
|
||||
br(GE, LINEARSTUB);
|
||||
}
|
||||
|
||||
// The Boyer Moore alogorithm is based on the description here:-
|
||||
//
|
||||
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
|
||||
//
|
||||
// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
|
||||
// and the 'Good Suffix' rule.
|
||||
//
|
||||
// These rules are essentially heuristics for how far we can shift the
|
||||
// pattern along the search string.
|
||||
//
|
||||
// The implementation here uses the 'Bad Character' rule only because of the
|
||||
// complexity of initialisation for the 'Good Suffix' rule.
|
||||
//
|
||||
// This is also known as the Boyer-Moore-Horspool algorithm:-
|
||||
//
|
||||
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
|
||||
//
|
||||
// This particular implementation has few java-specific optimizations.
|
||||
//
|
||||
// #define ASIZE 256
|
||||
//
|
||||
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
|
||||
// int i, j;
|
||||
// unsigned c;
|
||||
// unsigned char bc[ASIZE];
|
||||
//
|
||||
// /* Preprocessing */
|
||||
// for (i = 0; i < ASIZE; ++i)
|
||||
// bc[i] = m;
|
||||
// for (i = 0; i < m - 1; ) {
|
||||
// c = x[i];
|
||||
// ++i;
|
||||
// // c < 256 for Latin1 string, so, no need for branch
|
||||
// #ifdef PATTERN_STRING_IS_LATIN1
|
||||
// bc[c] = m - i;
|
||||
// #else
|
||||
// if (c < ASIZE) bc[c] = m - i;
|
||||
// #endif
|
||||
// }
|
||||
//
|
||||
// /* Searching */
|
||||
// j = 0;
|
||||
// while (j <= n - m) {
|
||||
// c = y[i+j];
|
||||
// if (x[m-1] == c)
|
||||
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
|
||||
// if (i < 0) return j;
|
||||
// // c < 256 for Latin1 string, so, no need for branch
|
||||
// #ifdef SOURCE_STRING_IS_LATIN1
|
||||
// // LL case: (c< 256) always true. Remove branch
|
||||
// j += bc[y[j+m-1]];
|
||||
// #endif
|
||||
// #ifndef PATTERN_STRING_IS_UTF
|
||||
// // UU case: need if (c<ASIZE) check. Skip 1 character if not.
|
||||
// if (c < ASIZE)
|
||||
// j += bc[y[j+m-1]];
|
||||
// else
|
||||
// j += 1
|
||||
// #endif
|
||||
// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
|
||||
// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
|
||||
// if (c < ASIZE)
|
||||
// j += bc[y[j+m-1]];
|
||||
// else
|
||||
// j += m
|
||||
// #endif
|
||||
// }
|
||||
// }
|
||||
|
||||
if (icnt1 == -1) {
|
||||
Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
|
||||
BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
|
||||
Register cnt1end = tmp2;
|
||||
Register str2end = cnt2;
|
||||
Register skipch = tmp2;
|
||||
|
||||
// str1 length is >=8, so, we can read at least 1 register for cases when
|
||||
// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
|
||||
// UL case. We'll re-read last character in inner pre-loop code to have
|
||||
// single outer pre-loop load
|
||||
const int firstStep = isL ? 7 : 3;
|
||||
|
||||
const int ASIZE = 256;
|
||||
const int STORED_BYTES = 32; // amount of bytes stored per instruction
|
||||
sub(sp, sp, ASIZE);
|
||||
mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
|
||||
mov(ch1, sp);
|
||||
BIND(BM_INIT_LOOP);
|
||||
stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
|
||||
subs(tmp5, tmp5, 1);
|
||||
br(GT, BM_INIT_LOOP);
|
||||
|
||||
sub(cnt1tmp, cnt1, 1);
|
||||
mov(tmp5, str2);
|
||||
add(str2end, str2, result_tmp, LSL, str2_chr_shift);
|
||||
sub(ch2, cnt1, 1);
|
||||
mov(tmp3, str1);
|
||||
BIND(BCLOOP);
|
||||
(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
|
||||
if (!str1_isL) {
|
||||
subs(zr, ch1, ASIZE);
|
||||
br(HS, BCSKIP);
|
||||
}
|
||||
strb(ch2, Address(sp, ch1));
|
||||
BIND(BCSKIP);
|
||||
subs(ch2, ch2, 1);
|
||||
br(GT, BCLOOP);
|
||||
|
||||
add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
|
||||
if (str1_isL == str2_isL) {
|
||||
// load last 8 bytes (8LL/4UU symbols)
|
||||
ldr(tmp6, Address(tmp6, -wordSize));
|
||||
} else {
|
||||
ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
|
||||
// convert Latin1 to UTF. We'll have to wait until load completed, but
|
||||
// it's still faster than per-character loads+checks
|
||||
lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
|
||||
ubfx(ch1, tmp6, 8, 8); // str1[N-2]
|
||||
ubfx(ch2, tmp6, 16, 8); // str1[N-3]
|
||||
andr(tmp6, tmp6, 0xFF); // str1[N-4]
|
||||
orr(ch2, ch1, ch2, LSL, 16);
|
||||
orr(tmp6, tmp6, tmp3, LSL, 48);
|
||||
orr(tmp6, tmp6, ch2, LSL, 16);
|
||||
}
|
||||
BIND(BMLOOPSTR2);
|
||||
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
|
||||
if (str1_isL == str2_isL) {
|
||||
// re-init tmp3. It's for free because it's executed in parallel with
|
||||
// load above. Alternative is to initialize it before loop, but it'll
|
||||
// affect performance on in-order systems with 2 or more ld/st pipelines
|
||||
lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
|
||||
}
|
||||
if (!isL) { // UU/UL case
|
||||
lsl(ch2, cnt1tmp, 1); // offset in bytes
|
||||
}
|
||||
cmp(tmp3, skipch);
|
||||
br(NE, BMSKIP);
|
||||
ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
|
||||
mov(ch1, tmp6);
|
||||
if (isL) {
|
||||
b(BMLOOPSTR1_AFTER_LOAD);
|
||||
} else {
|
||||
sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
|
||||
b(BMLOOPSTR1_CMP);
|
||||
}
|
||||
BIND(BMLOOPSTR1);
|
||||
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
|
||||
BIND(BMLOOPSTR1_AFTER_LOAD);
|
||||
subs(cnt1tmp, cnt1tmp, 1);
|
||||
br(LT, BMLOOPSTR1_LASTCMP);
|
||||
BIND(BMLOOPSTR1_CMP);
|
||||
cmp(ch1, ch2);
|
||||
br(EQ, BMLOOPSTR1);
|
||||
BIND(BMSKIP);
|
||||
if (!isL) {
|
||||
// if we've met UTF symbol while searching Latin1 pattern, then we can
|
||||
// skip cnt1 symbols
|
||||
if (str1_isL != str2_isL) {
|
||||
mov(result_tmp, cnt1);
|
||||
} else {
|
||||
mov(result_tmp, 1);
|
||||
}
|
||||
subs(zr, skipch, ASIZE);
|
||||
br(HS, BMADV);
|
||||
}
|
||||
ldrb(result_tmp, Address(sp, skipch)); // load skip distance
|
||||
BIND(BMADV);
|
||||
sub(cnt1tmp, cnt1, 1);
|
||||
add(str2, str2, result_tmp, LSL, str2_chr_shift);
|
||||
cmp(str2, str2end);
|
||||
br(LE, BMLOOPSTR2);
|
||||
add(sp, sp, ASIZE);
|
||||
b(NOMATCH);
|
||||
BIND(BMLOOPSTR1_LASTCMP);
|
||||
cmp(ch1, ch2);
|
||||
br(NE, BMSKIP);
|
||||
BIND(BMMATCH);
|
||||
sub(result, str2, tmp5);
|
||||
if (!str2_isL) lsr(result, result, 1);
|
||||
add(sp, sp, ASIZE);
|
||||
b(DONE);
|
||||
|
||||
BIND(LINEARSTUB);
|
||||
cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
|
||||
br(LT, LINEAR_MEDIUM);
|
||||
mov(result, zr);
|
||||
RuntimeAddress stub = NULL;
|
||||
if (isL) {
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
|
||||
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
|
||||
} else if (str1_isL) {
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
|
||||
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
|
||||
} else {
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
|
||||
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
|
||||
}
|
||||
trampoline_call(stub);
|
||||
b(DONE);
|
||||
}
|
||||
|
||||
BIND(LINEARSEARCH);
|
||||
{
|
||||
Label DO1, DO2, DO3;
|
||||
|
||||
Register str2tmp = tmp2;
|
||||
Register first = tmp3;
|
||||
|
||||
if (icnt1 == -1)
|
||||
{
|
||||
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
|
||||
|
||||
cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
|
||||
br(LT, DOSHORT);
|
||||
BIND(LINEAR_MEDIUM);
|
||||
(this->*str1_load_1chr)(first, Address(str1));
|
||||
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
|
||||
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
|
||||
BIND(FIRST_LOOP);
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmp(first, ch2);
|
||||
br(EQ, STR1_LOOP);
|
||||
BIND(STR2_NEXT);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, FIRST_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(STR1_LOOP);
|
||||
adds(cnt1tmp, cnt1_neg, str1_chr_size);
|
||||
add(cnt2tmp, cnt2_neg, str2_chr_size);
|
||||
br(GE, MATCH);
|
||||
|
||||
BIND(STR1_NEXT);
|
||||
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
|
||||
cmp(ch1, ch2);
|
||||
br(NE, STR2_NEXT);
|
||||
adds(cnt1tmp, cnt1tmp, str1_chr_size);
|
||||
add(cnt2tmp, cnt2tmp, str2_chr_size);
|
||||
br(LT, STR1_NEXT);
|
||||
b(MATCH);
|
||||
|
||||
BIND(DOSHORT);
|
||||
if (str1_isL == str2_isL) {
|
||||
cmp(cnt1, (u1)2);
|
||||
br(LT, DO1);
|
||||
br(GT, DO3);
|
||||
}
|
||||
}
|
||||
|
||||
if (icnt1 == 4) {
|
||||
Label CH1_LOOP;
|
||||
|
||||
(this->*load_4chr)(ch1, str1);
|
||||
sub(result_tmp, cnt2, 4);
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
|
||||
BIND(CH1_LOOP);
|
||||
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmp(ch1, ch2);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
}
|
||||
|
||||
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
|
||||
Label CH1_LOOP;
|
||||
|
||||
BIND(DO2);
|
||||
(this->*load_2chr)(ch1, str1);
|
||||
if (icnt1 == 2) {
|
||||
sub(result_tmp, cnt2, 2);
|
||||
}
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
BIND(CH1_LOOP);
|
||||
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmp(ch1, ch2);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
}
|
||||
|
||||
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
|
||||
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
|
||||
|
||||
BIND(DO3);
|
||||
(this->*load_2chr)(first, str1);
|
||||
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
|
||||
if (icnt1 == 3) {
|
||||
sub(result_tmp, cnt2, 3);
|
||||
}
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
BIND(FIRST_LOOP);
|
||||
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmpw(first, ch2);
|
||||
br(EQ, STR1_LOOP);
|
||||
BIND(STR2_NEXT);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LE, FIRST_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(STR1_LOOP);
|
||||
add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
|
||||
cmp(ch1, ch2);
|
||||
br(NE, STR2_NEXT);
|
||||
b(MATCH);
|
||||
}
|
||||
|
||||
if (icnt1 == -1 || icnt1 == 1) {
|
||||
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
|
||||
|
||||
BIND(DO1);
|
||||
(this->*str1_load_1chr)(ch1, str1);
|
||||
cmp(cnt2, (u1)8);
|
||||
br(LT, DO1_SHORT);
|
||||
|
||||
sub(result_tmp, cnt2, 8/str2_chr_size);
|
||||
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
|
||||
mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
|
||||
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
|
||||
|
||||
if (str2_isL) {
|
||||
orr(ch1, ch1, ch1, LSL, 8);
|
||||
}
|
||||
orr(ch1, ch1, ch1, LSL, 16);
|
||||
orr(ch1, ch1, ch1, LSL, 32);
|
||||
BIND(CH1_LOOP);
|
||||
ldr(ch2, Address(str2, cnt2_neg));
|
||||
eor(ch2, ch1, ch2);
|
||||
sub(tmp1, ch2, tmp3);
|
||||
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
|
||||
bics(tmp1, tmp1, tmp2);
|
||||
br(NE, HAS_ZERO);
|
||||
adds(cnt2_neg, cnt2_neg, 8);
|
||||
br(LT, CH1_LOOP);
|
||||
|
||||
cmp(cnt2_neg, (u1)8);
|
||||
mov(cnt2_neg, 0);
|
||||
br(LT, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(HAS_ZERO);
|
||||
rev(tmp1, tmp1);
|
||||
clz(tmp1, tmp1);
|
||||
add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
|
||||
b(MATCH);
|
||||
|
||||
BIND(DO1_SHORT);
|
||||
mov(result_tmp, cnt2);
|
||||
lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
|
||||
sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
|
||||
BIND(DO1_LOOP);
|
||||
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
|
||||
cmpw(ch1, ch2);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt2_neg, cnt2_neg, str2_chr_size);
|
||||
br(LT, DO1_LOOP);
|
||||
}
|
||||
}
|
||||
BIND(NOMATCH);
|
||||
mov(result, -1);
|
||||
b(DONE);
|
||||
BIND(MATCH);
|
||||
add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
|
||||
typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
|
||||
|
||||
void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3)
|
||||
{
|
||||
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
|
||||
Register cnt1_neg = cnt1;
|
||||
Register ch1 = rscratch1;
|
||||
Register result_tmp = rscratch2;
|
||||
|
||||
cbz(cnt1, NOMATCH);
|
||||
|
||||
cmp(cnt1, (u1)4);
|
||||
br(LT, DO1_SHORT);
|
||||
|
||||
orr(ch, ch, ch, LSL, 16);
|
||||
orr(ch, ch, ch, LSL, 32);
|
||||
|
||||
sub(cnt1, cnt1, 4);
|
||||
mov(result_tmp, cnt1);
|
||||
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
|
||||
sub(cnt1_neg, zr, cnt1, LSL, 1);
|
||||
|
||||
mov(tmp3, 0x0001000100010001);
|
||||
|
||||
BIND(CH1_LOOP);
|
||||
ldr(ch1, Address(str1, cnt1_neg));
|
||||
eor(ch1, ch, ch1);
|
||||
sub(tmp1, ch1, tmp3);
|
||||
orr(tmp2, ch1, 0x7fff7fff7fff7fff);
|
||||
bics(tmp1, tmp1, tmp2);
|
||||
br(NE, HAS_ZERO);
|
||||
adds(cnt1_neg, cnt1_neg, 8);
|
||||
br(LT, CH1_LOOP);
|
||||
|
||||
cmp(cnt1_neg, (u1)8);
|
||||
mov(cnt1_neg, 0);
|
||||
br(LT, CH1_LOOP);
|
||||
b(NOMATCH);
|
||||
|
||||
BIND(HAS_ZERO);
|
||||
rev(tmp1, tmp1);
|
||||
clz(tmp1, tmp1);
|
||||
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
|
||||
b(MATCH);
|
||||
|
||||
BIND(DO1_SHORT);
|
||||
mov(result_tmp, cnt1);
|
||||
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
|
||||
sub(cnt1_neg, zr, cnt1, LSL, 1);
|
||||
BIND(DO1_LOOP);
|
||||
ldrh(ch1, Address(str1, cnt1_neg));
|
||||
cmpw(ch, ch1);
|
||||
br(EQ, MATCH);
|
||||
adds(cnt1_neg, cnt1_neg, 2);
|
||||
br(LT, DO1_LOOP);
|
||||
BIND(NOMATCH);
|
||||
mov(result, -1);
|
||||
b(DONE);
|
||||
BIND(MATCH);
|
||||
add(result, result_tmp, cnt1_neg, ASR, 1);
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
// Compare strings.
|
||||
void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
|
||||
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
|
||||
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
|
||||
SHORT_LOOP_START, TAIL_CHECK;
|
||||
|
||||
bool isLL = ae == StrIntrinsicNode::LL;
|
||||
bool isLU = ae == StrIntrinsicNode::LU;
|
||||
bool isUL = ae == StrIntrinsicNode::UL;
|
||||
|
||||
// The stub threshold for LL strings is: 72 (64 + 8) chars
|
||||
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
|
||||
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
|
||||
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
|
||||
|
||||
bool str1_isL = isLL || isLU;
|
||||
bool str2_isL = isLL || isUL;
|
||||
|
||||
int str1_chr_shift = str1_isL ? 0 : 1;
|
||||
int str2_chr_shift = str2_isL ? 0 : 1;
|
||||
int str1_chr_size = str1_isL ? 1 : 2;
|
||||
int str2_chr_size = str2_isL ? 1 : 2;
|
||||
int minCharsInWord = isLL ? wordSize : wordSize/2;
|
||||
|
||||
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
|
||||
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
|
||||
(uxt_insn)&MacroAssembler::uxthw;
|
||||
|
||||
BLOCK_COMMENT("string_compare {");
|
||||
|
||||
// Bizzarely, the counts are passed in bytes, regardless of whether they
|
||||
// are L or U strings, however the result is always in characters.
|
||||
if (!str1_isL) asrw(cnt1, cnt1, 1);
|
||||
if (!str2_isL) asrw(cnt2, cnt2, 1);
|
||||
|
||||
// Compute the minimum of the string lengths and save the difference.
|
||||
subsw(result, cnt1, cnt2);
|
||||
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
|
||||
|
||||
// A very short string
|
||||
cmpw(cnt2, minCharsInWord);
|
||||
br(Assembler::LE, SHORT_STRING);
|
||||
|
||||
// Compare longwords
|
||||
// load first parts of strings and finish initialization while loading
|
||||
{
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
ldr(tmp1, Address(str1));
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, DONE);
|
||||
ldr(tmp2, Address(str2));
|
||||
cmp(cnt2, stub_threshold);
|
||||
br(GE, STUB);
|
||||
subsw(cnt2, cnt2, minCharsInWord);
|
||||
br(EQ, TAIL_CHECK);
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
cmp(cnt2, stub_threshold);
|
||||
br(GE, STUB);
|
||||
subw(cnt2, cnt2, 4);
|
||||
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
add(cnt1, cnt1, 4);
|
||||
fmovd(tmp1, vtmp);
|
||||
} else { // UL case
|
||||
ldr(tmp1, Address(str1));
|
||||
ldrs(vtmp, Address(str2));
|
||||
cmp(cnt2, stub_threshold);
|
||||
br(GE, STUB);
|
||||
subw(cnt2, cnt2, 4);
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
add(cnt1, cnt1, 8);
|
||||
fmovd(tmp2, vtmp);
|
||||
}
|
||||
adds(cnt2, cnt2, isUL ? 4 : 8);
|
||||
br(GE, TAIL);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DIFFERENCE);
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
if (str1_isL == str2_isL) {
|
||||
ldr(tmp1, Address(str1, cnt2));
|
||||
ldr(tmp2, Address(str2, cnt2));
|
||||
adds(cnt2, cnt2, 8);
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1, cnt1));
|
||||
ldr(tmp2, Address(str2, cnt2));
|
||||
add(cnt1, cnt1, 4);
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp1, vtmp);
|
||||
adds(cnt2, cnt2, 8);
|
||||
} else { // UL
|
||||
ldrs(vtmp, Address(str2, cnt2));
|
||||
ldr(tmp1, Address(str1, cnt1));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
add(cnt1, cnt1, 8);
|
||||
fmovd(tmp2, vtmp);
|
||||
adds(cnt2, cnt2, 4);
|
||||
}
|
||||
br(GE, TAIL);
|
||||
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, NEXT_WORD);
|
||||
b(DIFFERENCE);
|
||||
bind(TAIL);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DIFFERENCE);
|
||||
// Last longword. In the case where length == 4 we compare the
|
||||
// same longword twice, but that's still faster than another
|
||||
// conditional branch.
|
||||
if (str1_isL == str2_isL) {
|
||||
ldr(tmp1, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp1, vtmp);
|
||||
} else { // UL
|
||||
ldrs(vtmp, Address(str2));
|
||||
ldr(tmp1, Address(str1));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp2, vtmp);
|
||||
}
|
||||
bind(TAIL_CHECK);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
bind(DIFFERENCE);
|
||||
rev(rscratch2, rscratch2);
|
||||
clz(rscratch2, rscratch2);
|
||||
andr(rscratch2, rscratch2, isLL ? -8 : -16);
|
||||
lsrv(tmp1, tmp1, rscratch2);
|
||||
(this->*ext_chr)(tmp1, tmp1);
|
||||
lsrv(tmp2, tmp2, rscratch2);
|
||||
(this->*ext_chr)(tmp2, tmp2);
|
||||
subw(result, tmp1, tmp2);
|
||||
b(DONE);
|
||||
}
|
||||
|
||||
bind(STUB);
|
||||
RuntimeAddress stub = NULL;
|
||||
switch(ae) {
|
||||
case StrIntrinsicNode::LL:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
|
||||
break;
|
||||
case StrIntrinsicNode::UU:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
|
||||
break;
|
||||
case StrIntrinsicNode::LU:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
|
||||
break;
|
||||
case StrIntrinsicNode::UL:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
|
||||
trampoline_call(stub);
|
||||
b(DONE);
|
||||
|
||||
bind(SHORT_STRING);
|
||||
// Is the minimum length zero?
|
||||
cbz(cnt2, DONE);
|
||||
// arrange code to do most branches while loading and loading next characters
|
||||
// while comparing previous
|
||||
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST_INIT);
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
b(SHORT_LOOP_START);
|
||||
bind(SHORT_LOOP);
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST);
|
||||
bind(SHORT_LOOP_START);
|
||||
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
|
||||
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
|
||||
cmp(tmp1, cnt1);
|
||||
br(NE, SHORT_LOOP_TAIL);
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST2);
|
||||
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
cmp(tmp2, rscratch1);
|
||||
br(EQ, SHORT_LOOP);
|
||||
sub(result, tmp2, rscratch1);
|
||||
b(DONE);
|
||||
bind(SHORT_LOOP_TAIL);
|
||||
sub(result, tmp1, cnt1);
|
||||
b(DONE);
|
||||
bind(SHORT_LAST2);
|
||||
cmp(tmp2, rscratch1);
|
||||
br(EQ, DONE);
|
||||
sub(result, tmp2, rscratch1);
|
||||
|
||||
b(DONE);
|
||||
bind(SHORT_LAST_INIT);
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
bind(SHORT_LAST);
|
||||
cmp(tmp1, cnt1);
|
||||
br(EQ, DONE);
|
||||
sub(result, tmp1, cnt1);
|
||||
|
||||
bind(DONE);
|
||||
|
||||
BLOCK_COMMENT("} string_compare");
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
// This method checks if provided byte array contains byte with highest bit set.
|
||||
void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
|
||||
|
||||
@ -1227,11 +1227,6 @@ public:
|
||||
Register table0, Register table1, Register table2, Register table3,
|
||||
bool upper = false);
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1, Register tmp2, FloatRegister vtmp1,
|
||||
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
|
||||
|
||||
void has_negatives(Register ary1, Register len, Register result);
|
||||
|
||||
void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
|
||||
@ -1260,15 +1255,6 @@ public:
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
FloatRegister Vtmp3, FloatRegister Vtmp4);
|
||||
void string_indexof(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2,
|
||||
Register tmp3, Register tmp4,
|
||||
Register tmp5, Register tmp6,
|
||||
int int_cnt1, Register result, int ae);
|
||||
void string_indexof_char(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
|
||||
FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
|
||||
|
||||
@ -137,7 +137,7 @@ bool SafePointNode::needs_polling_address_input() {
|
||||
|
||||
// emit an interrupt that is caught by the debugger (for debugging compiler)
|
||||
void emit_break(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ breakpoint();
|
||||
}
|
||||
|
||||
@ -157,7 +157,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
|
||||
|
||||
|
||||
void emit_nop(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ nop();
|
||||
}
|
||||
|
||||
@ -165,7 +165,7 @@ void emit_nop(CodeBuffer &cbuf) {
|
||||
void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
|
||||
int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
|
||||
int call_site_offset = cbuf.insts()->mark_off();
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
|
||||
address target = (address)m->method();
|
||||
assert(n->as_MachCall()->entry_point() == target, "sanity");
|
||||
@ -212,7 +212,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
|
||||
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
ConstantTable& constant_table = C->output()->constant_table();
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register r = as_Register(ra_->get_encode(this));
|
||||
CodeSection* consts_section = __ code()->consts();
|
||||
@ -269,7 +269,7 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
|
||||
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
for (int i = 0; i < OptoPrologueNops; i++) {
|
||||
__ nop();
|
||||
@ -339,7 +339,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Compile* C = ra_->C;
|
||||
|
||||
size_t framesize = C->output()->frame_size_in_bytes();
|
||||
@ -429,7 +429,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
// Bailout only for real instruction emit.
|
||||
// This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )
|
||||
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
|
||||
// --------------------------------------
|
||||
// Check for mem-mem move. Load into unused float registers and fall into
|
||||
@ -790,7 +790,7 @@ void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
for(int i = 0; i < _count; i += 1) {
|
||||
__ nop();
|
||||
}
|
||||
@ -811,7 +811,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
||||
int reg = ra_->get_encode(this);
|
||||
Register dst = reg_to_register_object(reg);
|
||||
@ -847,7 +847,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register iCache = reg_to_register_object(Matcher::inline_cache_reg_encode());
|
||||
assert(iCache == Ricklass, "should be");
|
||||
Register receiver = R0;
|
||||
@ -866,7 +866,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
|
||||
|
||||
// Emit exception handler code.
|
||||
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_exception_handler());
|
||||
if (base == NULL) {
|
||||
@ -889,7 +889,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
|
||||
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
|
||||
// Can't use any of the current frame's registers as we may have deopted
|
||||
// at a poll and everything can be live.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_deopt_handler());
|
||||
if (base == NULL) {
|
||||
@ -1280,7 +1280,7 @@ encode %{
|
||||
// preserve mark
|
||||
address mark = cbuf.insts()->mark();
|
||||
debug_only(int off0 = cbuf.insts_size());
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int ret_addr_offset = as_MachCall()->ret_addr_offset();
|
||||
__ adr(LR, mark + ret_addr_offset);
|
||||
__ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset()));
|
||||
@ -1294,7 +1294,7 @@ encode %{
|
||||
// preserve mark
|
||||
address mark = cbuf.insts()->mark();
|
||||
debug_only(int off0 = cbuf.insts_size());
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
// FP is preserved across all calls, even compiled calls.
|
||||
// Use it to preserve SP in places where the callee might change the SP.
|
||||
__ mov(Rmh_SP_save, SP);
|
||||
@ -1305,12 +1305,12 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class restore_SP %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(SP, Rmh_SP_save);
|
||||
%}
|
||||
|
||||
enc_class Java_Dynamic_Call (method meth) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
|
||||
assert(R8_ic_reg == Ricklass, "should be");
|
||||
__ set_inst_mark();
|
||||
@ -1338,7 +1338,7 @@ encode %{
|
||||
val |= (val << bit_width);
|
||||
}
|
||||
}
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
if (val == -1) {
|
||||
__ mvn($tmp$$Register, 0);
|
||||
@ -1355,7 +1355,7 @@ encode %{
|
||||
// Replicate float con 2 times and pack into vector (8 bytes) in regD.
|
||||
float fval = $src$$constant;
|
||||
int val = *((int*)&fval);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
if (val == -1) {
|
||||
__ mvn($tmp$$Register, 0);
|
||||
@ -1370,7 +1370,7 @@ encode %{
|
||||
|
||||
enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{
|
||||
Label Ldone, Lloop;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register str1_reg = $str1$$Register;
|
||||
Register str2_reg = $str2$$Register;
|
||||
@ -1462,7 +1462,7 @@ encode %{
|
||||
|
||||
enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{
|
||||
Label Lchar, Lchar_loop, Ldone, Lequal;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register str1_reg = $str1$$Register;
|
||||
Register str2_reg = $str2$$Register;
|
||||
@ -1524,7 +1524,7 @@ encode %{
|
||||
|
||||
enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{
|
||||
Label Ldone, Lloop, Lequal;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register ary1_reg = $ary1$$Register;
|
||||
Register ary2_reg = $ary2$$Register;
|
||||
|
||||
160
src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp
Normal file
160
src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "runtime/basicLock.hpp"
|
||||
|
||||
// TODO: 8 bytes at a time? pre-fetch?
|
||||
// Compare char[] arrays aligned to 4 bytes.
|
||||
void C2_MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
|
||||
Register limit, Register result,
|
||||
Register chr1, Register chr2, Label& Ldone) {
|
||||
Label Lvector, Lloop;
|
||||
|
||||
// if (ary1 == ary2)
|
||||
// return true;
|
||||
cmpoop(ary1, ary2);
|
||||
b(Ldone, eq);
|
||||
|
||||
// Note: limit contains number of bytes (2*char_elements) != 0.
|
||||
tst(limit, 0x2); // trailing character ?
|
||||
b(Lvector, eq);
|
||||
|
||||
// compare the trailing char
|
||||
sub(limit, limit, sizeof(jchar));
|
||||
ldrh(chr1, Address(ary1, limit));
|
||||
ldrh(chr2, Address(ary2, limit));
|
||||
cmp(chr1, chr2);
|
||||
mov(result, 0, ne); // not equal
|
||||
b(Ldone, ne);
|
||||
|
||||
// only one char ?
|
||||
tst(limit, limit);
|
||||
mov(result, 1, eq);
|
||||
b(Ldone, eq);
|
||||
|
||||
// word by word compare, dont't need alignment check
|
||||
bind(Lvector);
|
||||
|
||||
// Shift ary1 and ary2 to the end of the arrays, negate limit
|
||||
add(ary1, limit, ary1);
|
||||
add(ary2, limit, ary2);
|
||||
neg(limit, limit);
|
||||
|
||||
bind(Lloop);
|
||||
ldr_u32(chr1, Address(ary1, limit));
|
||||
ldr_u32(chr2, Address(ary2, limit));
|
||||
cmp_32(chr1, chr2);
|
||||
mov(result, 0, ne); // not equal
|
||||
b(Ldone, ne);
|
||||
adds(limit, limit, 2*sizeof(jchar));
|
||||
b(Lloop, ne);
|
||||
|
||||
// Caller should set it:
|
||||
// mov(result_reg, 1); //equal
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3) {
|
||||
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
|
||||
|
||||
Register Rmark = Rscratch2;
|
||||
|
||||
assert(Roop != Rscratch, "");
|
||||
assert(Roop != Rmark, "");
|
||||
assert(Rbox != Rscratch, "");
|
||||
assert(Rbox != Rmark, "");
|
||||
|
||||
Label fast_lock, done;
|
||||
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
|
||||
biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
|
||||
// Fall through if lock not biased otherwise branch to done
|
||||
}
|
||||
|
||||
// Invariant: Rmark loaded below does not contain biased lock pattern
|
||||
|
||||
ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
|
||||
tst(Rmark, markWord::unlocked_value);
|
||||
b(fast_lock, ne);
|
||||
|
||||
// Check for recursive lock
|
||||
// See comments in InterpreterMacroAssembler::lock_object for
|
||||
// explanations on the fast recursive locking check.
|
||||
// -1- test low 2 bits
|
||||
movs(Rscratch, AsmOperand(Rmark, lsl, 30));
|
||||
// -2- test (hdr - SP) if the low two bits are 0
|
||||
sub(Rscratch, Rmark, SP, eq);
|
||||
movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
|
||||
// If still 'eq' then recursive locking OK
|
||||
// set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
|
||||
str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
|
||||
b(done);
|
||||
|
||||
bind(fast_lock);
|
||||
str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
|
||||
|
||||
bool allow_fallthrough_on_failure = true;
|
||||
bool one_shot = true;
|
||||
cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
|
||||
|
||||
bind(done);
|
||||
|
||||
// At this point flags are set as follows:
|
||||
// EQ -> Success
|
||||
// NE -> Failure, branch to slow path
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) {
|
||||
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
|
||||
|
||||
Register Rmark = Rscratch2;
|
||||
|
||||
assert(Roop != Rscratch, "");
|
||||
assert(Roop != Rmark, "");
|
||||
assert(Rbox != Rscratch, "");
|
||||
assert(Rbox != Rmark, "");
|
||||
|
||||
Label done;
|
||||
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
biased_locking_exit(Roop, Rscratch, done);
|
||||
}
|
||||
|
||||
ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
|
||||
// If hdr is NULL, we've got recursive locking and there's nothing more to do
|
||||
cmp(Rmark, 0);
|
||||
b(done, eq);
|
||||
|
||||
// Restore the object header
|
||||
bool allow_fallthrough_on_failure = true;
|
||||
bool one_shot = true;
|
||||
cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
||||
39
src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp
Normal file
39
src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
|
||||
#define CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
|
||||
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
// Compare char[] arrays aligned to 4 bytes.
|
||||
void char_arrays_equals(Register ary1, Register ary2,
|
||||
Register limit, Register result,
|
||||
Register chr1, Register chr2, Label& Ldone);
|
||||
|
||||
void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
|
||||
void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
|
||||
|
||||
#endif // CPU_ARM_C2_MACROASSEMBLER_ARM_HPP
|
||||
@ -1626,57 +1626,6 @@ void MacroAssembler::lookup_interface_method(Register Rklass,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
// TODO: 8 bytes at a time? pre-fetch?
|
||||
// Compare char[] arrays aligned to 4 bytes.
|
||||
void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
|
||||
Register limit, Register result,
|
||||
Register chr1, Register chr2, Label& Ldone) {
|
||||
Label Lvector, Lloop;
|
||||
|
||||
// if (ary1 == ary2)
|
||||
// return true;
|
||||
cmpoop(ary1, ary2);
|
||||
b(Ldone, eq);
|
||||
|
||||
// Note: limit contains number of bytes (2*char_elements) != 0.
|
||||
tst(limit, 0x2); // trailing character ?
|
||||
b(Lvector, eq);
|
||||
|
||||
// compare the trailing char
|
||||
sub(limit, limit, sizeof(jchar));
|
||||
ldrh(chr1, Address(ary1, limit));
|
||||
ldrh(chr2, Address(ary2, limit));
|
||||
cmp(chr1, chr2);
|
||||
mov(result, 0, ne); // not equal
|
||||
b(Ldone, ne);
|
||||
|
||||
// only one char ?
|
||||
tst(limit, limit);
|
||||
mov(result, 1, eq);
|
||||
b(Ldone, eq);
|
||||
|
||||
// word by word compare, dont't need alignment check
|
||||
bind(Lvector);
|
||||
|
||||
// Shift ary1 and ary2 to the end of the arrays, negate limit
|
||||
add(ary1, limit, ary1);
|
||||
add(ary2, limit, ary2);
|
||||
neg(limit, limit);
|
||||
|
||||
bind(Lloop);
|
||||
ldr_u32(chr1, Address(ary1, limit));
|
||||
ldr_u32(chr2, Address(ary2, limit));
|
||||
cmp_32(chr1, chr2);
|
||||
mov(result, 0, ne); // not equal
|
||||
b(Ldone, ne);
|
||||
adds(limit, limit, 2*sizeof(jchar));
|
||||
b(Lloop, ne);
|
||||
|
||||
// Caller should set it:
|
||||
// mov(result_reg, 1); //equal
|
||||
}
|
||||
#endif
|
||||
|
||||
void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
|
||||
mov_slow(tmpreg1, counter_addr);
|
||||
@ -1970,92 +1919,6 @@ void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
|
||||
return bs->resolve(this, decorators, obj);
|
||||
}
|
||||
|
||||
|
||||
#ifdef COMPILER2
|
||||
void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3)
|
||||
{
|
||||
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
|
||||
|
||||
Register Rmark = Rscratch2;
|
||||
|
||||
assert(Roop != Rscratch, "");
|
||||
assert(Roop != Rmark, "");
|
||||
assert(Rbox != Rscratch, "");
|
||||
assert(Rbox != Rmark, "");
|
||||
|
||||
Label fast_lock, done;
|
||||
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
|
||||
biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
|
||||
// Fall through if lock not biased otherwise branch to done
|
||||
}
|
||||
|
||||
// Invariant: Rmark loaded below does not contain biased lock pattern
|
||||
|
||||
ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
|
||||
tst(Rmark, markWord::unlocked_value);
|
||||
b(fast_lock, ne);
|
||||
|
||||
// Check for recursive lock
|
||||
// See comments in InterpreterMacroAssembler::lock_object for
|
||||
// explanations on the fast recursive locking check.
|
||||
// -1- test low 2 bits
|
||||
movs(Rscratch, AsmOperand(Rmark, lsl, 30));
|
||||
// -2- test (hdr - SP) if the low two bits are 0
|
||||
sub(Rscratch, Rmark, SP, eq);
|
||||
movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
|
||||
// If still 'eq' then recursive locking OK
|
||||
// set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
|
||||
str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
|
||||
b(done);
|
||||
|
||||
bind(fast_lock);
|
||||
str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
|
||||
|
||||
bool allow_fallthrough_on_failure = true;
|
||||
bool one_shot = true;
|
||||
cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
|
||||
|
||||
bind(done);
|
||||
|
||||
// At this point flags are set as follows:
|
||||
// EQ -> Success
|
||||
// NE -> Failure, branch to slow path
|
||||
}
|
||||
|
||||
void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2)
|
||||
{
|
||||
assert(VM_Version::supports_ldrex(), "unsupported, yet?");
|
||||
|
||||
Register Rmark = Rscratch2;
|
||||
|
||||
assert(Roop != Rscratch, "");
|
||||
assert(Roop != Rmark, "");
|
||||
assert(Rbox != Rscratch, "");
|
||||
assert(Rbox != Rmark, "");
|
||||
|
||||
Label done;
|
||||
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
biased_locking_exit(Roop, Rscratch, done);
|
||||
}
|
||||
|
||||
ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
|
||||
// If hdr is NULL, we've got recursive locking and there's nothing more to do
|
||||
cmp(Rmark, 0);
|
||||
b(done, eq);
|
||||
|
||||
// Restore the object header
|
||||
bool allow_fallthrough_on_failure = true;
|
||||
bool one_shot = true;
|
||||
cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
|
||||
|
||||
bind(done);
|
||||
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
|
||||
if (SafepointMechanism::uses_thread_local_poll()) {
|
||||
ldr_u32(tmp1, Address(Rthread, Thread::polling_page_offset()));
|
||||
|
||||
@ -1068,11 +1068,6 @@ public:
|
||||
Register temp_reg2,
|
||||
Label& L_no_such_interface);
|
||||
|
||||
// Compare char[] arrays aligned to 4 bytes.
|
||||
void char_arrays_equals(Register ary1, Register ary2,
|
||||
Register limit, Register result,
|
||||
Register chr1, Register chr2, Label& Ldone);
|
||||
|
||||
|
||||
void floating_cmp(Register dst);
|
||||
|
||||
@ -1090,11 +1085,6 @@ public:
|
||||
|
||||
void restore_default_fp_mode();
|
||||
|
||||
#ifdef COMPILER2
|
||||
void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
|
||||
void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
|
||||
#endif
|
||||
|
||||
void safepoint_poll(Register tmp1, Label& slow_path);
|
||||
void get_polling_page(Register dest);
|
||||
void read_polling_page(Register dest, relocInfo::relocType rtype);
|
||||
|
||||
580
src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
Normal file
580
src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
Normal file
@ -0,0 +1,580 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) // nothing
|
||||
#else
|
||||
#define BLOCK_COMMENT(str) block_comment(str)
|
||||
#endif
|
||||
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
||||
|
||||
// Intrinsics for CompactStrings
|
||||
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once.
|
||||
void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
||||
Label& Lfailure) {
|
||||
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||
Label Lloop, Lslow;
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
|
||||
srwi_(tmp2, cnt, 3);
|
||||
beq(CCR0, Lslow);
|
||||
ori(tmp1, tmp1, 0xFF);
|
||||
rldimi(tmp1, tmp1, 32, 0);
|
||||
mtctr(tmp2);
|
||||
|
||||
// 2x unrolled loop
|
||||
bind(Lloop);
|
||||
ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)
|
||||
ld(tmp4, 8, src); // _4_5_6_7
|
||||
|
||||
orr(tmp0, tmp2, tmp4);
|
||||
rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
|
||||
rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3
|
||||
rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
|
||||
rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7
|
||||
|
||||
andc_(tmp0, tmp0, tmp1);
|
||||
bne(CCR0, Lfailure); // Not latin1.
|
||||
addi(src, src, 16);
|
||||
|
||||
rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
|
||||
srdi(tmp2, tmp2, 3*8); // ____0_2_
|
||||
rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
|
||||
srdi(tmp4, tmp4, 3*8); // ____4_6_
|
||||
|
||||
orr(tmp2, tmp2, tmp3); // ____0123
|
||||
orr(tmp4, tmp4, tmp5); // ____4567
|
||||
|
||||
stw(tmp2, 0, dst);
|
||||
stw(tmp4, 4, dst);
|
||||
addi(dst, dst, 8);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lslow); // Fallback to slow version
|
||||
}
|
||||
|
||||
// Compress char[] to byte[]. cnt must be positive int.
|
||||
void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
|
||||
Label Lloop;
|
||||
mtctr(cnt);
|
||||
|
||||
bind(Lloop);
|
||||
lhz(tmp, 0, src);
|
||||
cmplwi(CCR0, tmp, 0xff);
|
||||
bgt(CCR0, Lfailure); // Not latin1.
|
||||
addi(src, src, 2);
|
||||
stb(tmp, 0, dst);
|
||||
addi(dst, dst, 1);
|
||||
bdnz(Lloop);
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||
Label Lloop, Lslow;
|
||||
|
||||
// Check if cnt >= 8
|
||||
srwi_(tmp2, cnt, 3);
|
||||
beq(CCR0, Lslow);
|
||||
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF
|
||||
ori(tmp1, tmp1, 0xFF);
|
||||
mtctr(tmp2);
|
||||
|
||||
// 2x unrolled loop
|
||||
bind(Lloop);
|
||||
lwz(tmp2, 0, src); // ____0123 (Big Endian)
|
||||
lwz(tmp4, 4, src); // ____4567
|
||||
addi(src, src, 8);
|
||||
|
||||
rldicl(tmp3, tmp2, 7*8, 64-8); // _______2
|
||||
rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
|
||||
rldicl(tmp5, tmp4, 7*8, 64-8); // _______6
|
||||
rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
|
||||
|
||||
andc(tmp0, tmp2, tmp1); // ____0_1_
|
||||
rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
|
||||
andc(tmp3, tmp4, tmp1); // ____4_5_
|
||||
rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
|
||||
|
||||
rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3
|
||||
rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7
|
||||
|
||||
std(tmp2, 0, dst);
|
||||
std(tmp4, 8, dst);
|
||||
addi(dst, dst, 16);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lslow); // Fallback to slow version
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[]. cnt must be positive int.
|
||||
void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
|
||||
Label Lloop;
|
||||
mtctr(cnt);
|
||||
|
||||
bind(Lloop);
|
||||
lbz(tmp, 0, src);
|
||||
addi(src, src, 1);
|
||||
sth(tmp, 0, dst);
|
||||
addi(dst, dst, 2);
|
||||
bdnz(Lloop);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register result, int ae) {
|
||||
const Register tmp0 = R0,
|
||||
diff = tmp1;
|
||||
|
||||
assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
|
||||
Label Ldone, Lslow, Lloop, Lreturn_diff;
|
||||
|
||||
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
|
||||
// we interchange str1 and str2 in the UL case and negate the result.
|
||||
// Like this, str1 is always latin1 encoded, except for the UU case.
|
||||
// In addition, we need 0 (or sign which is 0) extend.
|
||||
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
srwi(cnt1, cnt1, 1);
|
||||
} else {
|
||||
clrldi(cnt1, cnt1, 32);
|
||||
}
|
||||
|
||||
if (ae != StrIntrinsicNode::LL) {
|
||||
srwi(cnt2, cnt2, 1);
|
||||
} else {
|
||||
clrldi(cnt2, cnt2, 32);
|
||||
}
|
||||
|
||||
// See if the lengths are different, and calculate min in cnt1.
|
||||
// Save diff in case we need it for a tie-breaker.
|
||||
subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
|
||||
// if (diff > 0) { cnt1 = cnt2; }
|
||||
if (VM_Version::has_isel()) {
|
||||
isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
|
||||
} else {
|
||||
Label Lskip;
|
||||
blt(CCR0, Lskip);
|
||||
mr(cnt1, cnt2);
|
||||
bind(Lskip);
|
||||
}
|
||||
|
||||
// Rename registers
|
||||
Register chr1 = result;
|
||||
Register chr2 = tmp0;
|
||||
|
||||
// Compare multiple characters in fast loop (only implemented for same encoding).
|
||||
int stride1 = 8, stride2 = 8;
|
||||
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
|
||||
int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
|
||||
Label Lfastloop, Lskipfast;
|
||||
|
||||
srwi_(tmp0, cnt1, log2_chars_per_iter);
|
||||
beq(CCR0, Lskipfast);
|
||||
rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
|
||||
li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
|
||||
mtctr(tmp0);
|
||||
|
||||
bind(Lfastloop);
|
||||
ld(chr1, 0, str1);
|
||||
ld(chr2, 0, str2);
|
||||
cmpd(CCR0, chr1, chr2);
|
||||
bne(CCR0, Lslow);
|
||||
addi(str1, str1, stride1);
|
||||
addi(str2, str2, stride2);
|
||||
bdnz(Lfastloop);
|
||||
mr(cnt1, cnt2); // Remaining characters.
|
||||
bind(Lskipfast);
|
||||
}
|
||||
|
||||
// Loop which searches the first difference character by character.
|
||||
cmpwi(CCR0, cnt1, 0);
|
||||
beq(CCR0, Lreturn_diff);
|
||||
bind(Lslow);
|
||||
mtctr(cnt1);
|
||||
|
||||
switch (ae) {
|
||||
case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
|
||||
case StrIntrinsicNode::UL: // fallthru (see comment above)
|
||||
case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
|
||||
case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
|
||||
default: ShouldNotReachHere(); break;
|
||||
}
|
||||
|
||||
bind(Lloop);
|
||||
if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
|
||||
if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
|
||||
subf_(result, chr2, chr1); // result = chr1 - chr2
|
||||
bne(CCR0, Ldone);
|
||||
addi(str1, str1, stride1);
|
||||
addi(str2, str2, stride2);
|
||||
bdnz(Lloop);
|
||||
|
||||
// If strings are equal up to min length, return the length difference.
|
||||
bind(Lreturn_diff);
|
||||
mr(result, diff);
|
||||
|
||||
// Otherwise, return the difference between the first mismatched chars.
|
||||
bind(Ldone);
|
||||
if (ae == StrIntrinsicNode::UL) {
|
||||
neg(result, result); // Negate result (see note above).
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp1, Register result, bool is_byte) {
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
|
||||
Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
|
||||
bool limit_needs_shift = false;
|
||||
|
||||
if (is_array_equ) {
|
||||
const int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
|
||||
|
||||
// Return true if the same array.
|
||||
cmpd(CCR0, ary1, ary2);
|
||||
beq(CCR0, Lskiploop);
|
||||
|
||||
// Return false if one of them is NULL.
|
||||
cmpdi(CCR0, ary1, 0);
|
||||
cmpdi(CCR1, ary2, 0);
|
||||
li(result, 0);
|
||||
cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
|
||||
beq(CCR0, Ldone);
|
||||
|
||||
// Load the lengths of arrays.
|
||||
lwz(limit, length_offset, ary1);
|
||||
lwz(tmp0, length_offset, ary2);
|
||||
|
||||
// Return false if the two arrays are not equal length.
|
||||
cmpw(CCR0, limit, tmp0);
|
||||
bne(CCR0, Ldone);
|
||||
|
||||
// Load array addresses.
|
||||
addi(ary1, ary1, base_offset);
|
||||
addi(ary2, ary2, base_offset);
|
||||
} else {
|
||||
limit_needs_shift = !is_byte;
|
||||
li(result, 0); // Assume not equal.
|
||||
}
|
||||
|
||||
// Rename registers
|
||||
Register chr1 = tmp0;
|
||||
Register chr2 = tmp1;
|
||||
|
||||
// Compare 8 bytes per iteration in fast loop.
|
||||
const int log2_chars_per_iter = is_byte ? 3 : 2;
|
||||
|
||||
srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
|
||||
beq(CCR0, Lskipfast);
|
||||
mtctr(tmp0);
|
||||
|
||||
bind(Lfastloop);
|
||||
ld(chr1, 0, ary1);
|
||||
ld(chr2, 0, ary2);
|
||||
addi(ary1, ary1, 8);
|
||||
addi(ary2, ary2, 8);
|
||||
cmpd(CCR0, chr1, chr2);
|
||||
bne(CCR0, Ldone);
|
||||
bdnz(Lfastloop);
|
||||
|
||||
bind(Lskipfast);
|
||||
rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
|
||||
beq(CCR0, Lskiploop);
|
||||
mtctr(limit);
|
||||
|
||||
// Character by character.
|
||||
bind(Lloop);
|
||||
if (is_byte) {
|
||||
lbz(chr1, 0, ary1);
|
||||
lbz(chr2, 0, ary2);
|
||||
addi(ary1, ary1, 1);
|
||||
addi(ary2, ary2, 1);
|
||||
} else {
|
||||
lhz(chr1, 0, ary1);
|
||||
lhz(chr2, 0, ary2);
|
||||
addi(ary1, ary1, 2);
|
||||
addi(ary2, ary2, 2);
|
||||
}
|
||||
cmpw(CCR0, chr1, chr2);
|
||||
bne(CCR0, Ldone);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lskiploop);
|
||||
li(result, 1); // All characters are equal.
|
||||
bind(Ldone);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
|
||||
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
|
||||
|
||||
// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
|
||||
Label L_TooShort, L_Found, L_NotFound, L_End;
|
||||
Register last_addr = haycnt, // Kill haycnt at the beginning.
|
||||
addr = tmp1,
|
||||
n_start = tmp2,
|
||||
ch1 = tmp3,
|
||||
ch2 = R0;
|
||||
|
||||
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
|
||||
const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
|
||||
const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
|
||||
|
||||
// **************************************************************************************************
|
||||
// Prepare for main loop: optimized for needle count >=2, bail out otherwise.
|
||||
// **************************************************************************************************
|
||||
|
||||
// Compute last haystack addr to use if no match gets found.
|
||||
clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.
|
||||
addi(addr, haystack, -h_csize); // Accesses use pre-increment.
|
||||
if (needlecntval == 0) { // variable needlecnt
|
||||
cmpwi(CCR6, needlecnt, 2);
|
||||
clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.
|
||||
blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
|
||||
}
|
||||
|
||||
if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
|
||||
|
||||
if (needlecntval == 0) { // variable needlecnt
|
||||
subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
|
||||
addi(needlecnt, needlecnt, -2); // Rest of needle.
|
||||
} else { // constant needlecnt
|
||||
guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
|
||||
assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
|
||||
addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
|
||||
if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
|
||||
}
|
||||
|
||||
if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
|
||||
|
||||
if (ae ==StrIntrinsicNode::UL) {
|
||||
srwi(tmp4, n_start, 1*8); // ___0
|
||||
rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
|
||||
}
|
||||
|
||||
add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
|
||||
|
||||
// Main Loop (now we have at least 2 characters).
|
||||
Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
|
||||
bind(L_OuterLoop); // Search for 1st 2 characters.
|
||||
Register addr_diff = tmp4;
|
||||
subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
|
||||
addi(addr, addr, h_csize); // This is the new address we want to use for comparing.
|
||||
srdi_(ch2, addr_diff, h_csize);
|
||||
beq(CCR0, L_FinalCheck); // 2 characters left?
|
||||
mtctr(ch2); // num of characters / 2
|
||||
bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
|
||||
if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).
|
||||
lwz(ch1, 0, addr);
|
||||
lwz(ch2, 2, addr);
|
||||
} else {
|
||||
lhz(ch1, 0, addr);
|
||||
lhz(ch2, 1, addr);
|
||||
}
|
||||
cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
|
||||
cmpw(CCR1, ch2, n_start);
|
||||
beq(CCR0, L_Comp1); // Did we find the needle start?
|
||||
beq(CCR1, L_Comp2);
|
||||
addi(addr, addr, 2 * h_csize);
|
||||
bdnz(L_InnerLoop);
|
||||
bind(L_FinalCheck);
|
||||
andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
|
||||
beq(CCR0, L_NotFound);
|
||||
if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
|
||||
cmpw(CCR1, ch1, n_start);
|
||||
beq(CCR1, L_Comp1);
|
||||
bind(L_NotFound);
|
||||
li(result, -1); // not found
|
||||
b(L_End);
|
||||
|
||||
// **************************************************************************************************
|
||||
// Special Case: unfortunately, the variable needle case can be called with needlecnt<2
|
||||
// **************************************************************************************************
|
||||
if (needlecntval == 0) { // We have to handle these cases separately.
|
||||
Label L_OneCharLoop;
|
||||
bind(L_TooShort);
|
||||
mtctr(haycnt);
|
||||
if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
|
||||
bind(L_OneCharLoop);
|
||||
if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
|
||||
cmpw(CCR1, ch1, n_start);
|
||||
beq(CCR1, L_Found); // Did we find the one character needle?
|
||||
bdnz(L_OneCharLoop);
|
||||
li(result, -1); // Not found.
|
||||
b(L_End);
|
||||
}
|
||||
|
||||
// **************************************************************************************************
|
||||
// Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
|
||||
// **************************************************************************************************
|
||||
|
||||
// Compare the rest
|
||||
bind(L_Comp2);
|
||||
addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.
|
||||
bind(L_Comp1); // Addr points to possible needle start.
|
||||
if (needlecntval != 2) { // Const needlecnt==2?
|
||||
if (needlecntval != 3) {
|
||||
if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
|
||||
Register n_ind = tmp4,
|
||||
h_ind = n_ind;
|
||||
li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.
|
||||
mtctr(needlecnt); // Decremented by 2, still > 0.
|
||||
Label L_CompLoop;
|
||||
bind(L_CompLoop);
|
||||
if (ae ==StrIntrinsicNode::UL) {
|
||||
h_ind = ch1;
|
||||
sldi(h_ind, n_ind, 1);
|
||||
}
|
||||
if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
|
||||
if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
|
||||
cmpw(CCR1, ch1, ch2);
|
||||
bne(CCR1, L_OuterLoop);
|
||||
addi(n_ind, n_ind, n_csize);
|
||||
bdnz(L_CompLoop);
|
||||
} else { // No loop required if there's only one needle character left.
|
||||
if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
|
||||
if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
|
||||
cmpw(CCR1, ch1, ch2);
|
||||
bne(CCR1, L_OuterLoop);
|
||||
}
|
||||
}
|
||||
// Return index ...
|
||||
bind(L_Found);
|
||||
subf(result, haystack, addr); // relative to haystack, ...
|
||||
if (h_csize == 2) { srdi(result, result, 1); } // in characters.
|
||||
bind(L_End);
|
||||
} // string_indexof
|
||||
|
||||
void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
|
||||
assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
|
||||
|
||||
Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
|
||||
Register addr = tmp1,
|
||||
ch1 = tmp2,
|
||||
ch2 = R0;
|
||||
|
||||
const int h_csize = is_byte ? 1 : 2;
|
||||
|
||||
//4:
|
||||
srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
|
||||
mr(addr, haystack);
|
||||
beq(CCR0, L_FinalCheck);
|
||||
mtctr(tmp2); // Move to count register.
|
||||
//8:
|
||||
bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
|
||||
if (!is_byte) {
|
||||
lhz(ch1, 0, addr);
|
||||
lhz(ch2, 2, addr);
|
||||
} else {
|
||||
lbz(ch1, 0, addr);
|
||||
lbz(ch2, 1, addr);
|
||||
}
|
||||
(needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
|
||||
(needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
|
||||
beq(CCR0, L_Found1); // Did we find the needle?
|
||||
beq(CCR1, L_Found2);
|
||||
addi(addr, addr, 2 * h_csize);
|
||||
bdnz(L_InnerLoop);
|
||||
//16:
|
||||
bind(L_FinalCheck);
|
||||
andi_(R0, haycnt, 1);
|
||||
beq(CCR0, L_NotFound);
|
||||
if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
|
||||
(needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
|
||||
beq(CCR1, L_Found1);
|
||||
//21:
|
||||
bind(L_NotFound);
|
||||
li(result, -1); // Not found.
|
||||
b(L_End);
|
||||
|
||||
bind(L_Found2);
|
||||
addi(addr, addr, h_csize);
|
||||
//24:
|
||||
bind(L_Found1); // Return index ...
|
||||
subf(result, haystack, addr); // relative to haystack, ...
|
||||
if (!is_byte) { srdi(result, result, 1); } // in characters.
|
||||
bind(L_End);
|
||||
} // string_indexof_char
|
||||
|
||||
|
||||
void C2_MacroAssembler::has_negatives(Register src, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2) {
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
|
||||
Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080
|
||||
srwi_(tmp2, cnt, 4);
|
||||
li(result, 1); // Assume there's a negative byte.
|
||||
beq(CCR0, Lslow);
|
||||
ori(tmp1, tmp1, 0x8080);
|
||||
rldimi(tmp1, tmp1, 32, 0);
|
||||
mtctr(tmp2);
|
||||
|
||||
// 2x unrolled loop
|
||||
bind(Lfastloop);
|
||||
ld(tmp2, 0, src);
|
||||
ld(tmp0, 8, src);
|
||||
|
||||
orr(tmp0, tmp2, tmp0);
|
||||
|
||||
and_(tmp0, tmp0, tmp1);
|
||||
bne(CCR0, Ldone); // Found negative byte.
|
||||
addi(src, src, 16);
|
||||
|
||||
bdnz(Lfastloop);
|
||||
|
||||
bind(Lslow); // Fallback to slow version
|
||||
rldicl_(tmp0, cnt, 0, 64-4);
|
||||
beq(CCR0, Lnoneg);
|
||||
mtctr(tmp0);
|
||||
bind(Lloop);
|
||||
lbz(tmp0, 0, src);
|
||||
addi(src, src, 1);
|
||||
andi_(tmp0, tmp0, 0x80);
|
||||
bne(CCR0, Ldone); // Found negative byte.
|
||||
bdnz(Lloop);
|
||||
bind(Lnoneg);
|
||||
li(result, 0);
|
||||
|
||||
bind(Ldone);
|
||||
}
|
||||
|
||||
62
src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
Normal file
62
src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
|
||||
#define CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
|
||||
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
// Intrinsics for CompactStrings
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once.
|
||||
void string_compress_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
||||
Label& Lfailure);
|
||||
|
||||
// Compress char[] to byte[]. cnt must be positive int.
|
||||
void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void string_inflate_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
||||
|
||||
// Inflate byte[] to char[]. cnt must be positive int.
|
||||
void string_inflate(Register src, Register dst, Register cnt, Register tmp);
|
||||
|
||||
void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
|
||||
Register tmp1, Register result, int ae);
|
||||
|
||||
void array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp1, Register result, bool is_byte);
|
||||
|
||||
void string_indexof(Register result, Register haystack, Register haycnt,
|
||||
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
|
||||
|
||||
void string_indexof_char(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
|
||||
|
||||
void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
|
||||
|
||||
#endif // CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
|
||||
@ -33,6 +33,7 @@
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "nativeInst_ppc.hpp"
|
||||
#include "oops/klass.inline.hpp"
|
||||
#include "oops/methodData.hpp"
|
||||
#include "prims/methodHandles.hpp"
|
||||
#include "runtime/biasedLocking.hpp"
|
||||
#include "runtime/icache.hpp"
|
||||
@ -45,9 +46,6 @@
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
#ifdef COMPILER2
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) // nothing
|
||||
@ -3311,552 +3309,6 @@ void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwo
|
||||
|
||||
/////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Intrinsics for CompactStrings
|
||||
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once.
|
||||
void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
||||
Label& Lfailure) {
|
||||
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||
Label Lloop, Lslow;
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF
|
||||
srwi_(tmp2, cnt, 3);
|
||||
beq(CCR0, Lslow);
|
||||
ori(tmp1, tmp1, 0xFF);
|
||||
rldimi(tmp1, tmp1, 32, 0);
|
||||
mtctr(tmp2);
|
||||
|
||||
// 2x unrolled loop
|
||||
bind(Lloop);
|
||||
ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)
|
||||
ld(tmp4, 8, src); // _4_5_6_7
|
||||
|
||||
orr(tmp0, tmp2, tmp4);
|
||||
rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
|
||||
rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3
|
||||
rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
|
||||
rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7
|
||||
|
||||
andc_(tmp0, tmp0, tmp1);
|
||||
bne(CCR0, Lfailure); // Not latin1.
|
||||
addi(src, src, 16);
|
||||
|
||||
rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
|
||||
srdi(tmp2, tmp2, 3*8); // ____0_2_
|
||||
rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
|
||||
srdi(tmp4, tmp4, 3*8); // ____4_6_
|
||||
|
||||
orr(tmp2, tmp2, tmp3); // ____0123
|
||||
orr(tmp4, tmp4, tmp5); // ____4567
|
||||
|
||||
stw(tmp2, 0, dst);
|
||||
stw(tmp4, 4, dst);
|
||||
addi(dst, dst, 8);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lslow); // Fallback to slow version
|
||||
}
|
||||
|
||||
// Compress char[] to byte[]. cnt must be positive int.
|
||||
void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
|
||||
Label Lloop;
|
||||
mtctr(cnt);
|
||||
|
||||
bind(Lloop);
|
||||
lhz(tmp, 0, src);
|
||||
cmplwi(CCR0, tmp, 0xff);
|
||||
bgt(CCR0, Lfailure); // Not latin1.
|
||||
addi(src, src, 2);
|
||||
stb(tmp, 0, dst);
|
||||
addi(dst, dst, 1);
|
||||
bdnz(Lloop);
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||
Label Lloop, Lslow;
|
||||
|
||||
// Check if cnt >= 8
|
||||
srwi_(tmp2, cnt, 3);
|
||||
beq(CCR0, Lslow);
|
||||
lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF
|
||||
ori(tmp1, tmp1, 0xFF);
|
||||
mtctr(tmp2);
|
||||
|
||||
// 2x unrolled loop
|
||||
bind(Lloop);
|
||||
lwz(tmp2, 0, src); // ____0123 (Big Endian)
|
||||
lwz(tmp4, 4, src); // ____4567
|
||||
addi(src, src, 8);
|
||||
|
||||
rldicl(tmp3, tmp2, 7*8, 64-8); // _______2
|
||||
rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
|
||||
rldicl(tmp5, tmp4, 7*8, 64-8); // _______6
|
||||
rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
|
||||
|
||||
andc(tmp0, tmp2, tmp1); // ____0_1_
|
||||
rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
|
||||
andc(tmp3, tmp4, tmp1); // ____4_5_
|
||||
rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
|
||||
|
||||
rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3
|
||||
rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7
|
||||
|
||||
std(tmp2, 0, dst);
|
||||
std(tmp4, 8, dst);
|
||||
addi(dst, dst, 16);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lslow); // Fallback to slow version
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[]. cnt must be positive int.
|
||||
void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
|
||||
Label Lloop;
|
||||
mtctr(cnt);
|
||||
|
||||
bind(Lloop);
|
||||
lbz(tmp, 0, src);
|
||||
addi(src, src, 1);
|
||||
sth(tmp, 0, dst);
|
||||
addi(dst, dst, 2);
|
||||
bdnz(Lloop);
|
||||
}
|
||||
|
||||
void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register result, int ae) {
|
||||
const Register tmp0 = R0,
|
||||
diff = tmp1;
|
||||
|
||||
assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
|
||||
Label Ldone, Lslow, Lloop, Lreturn_diff;
|
||||
|
||||
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
|
||||
// we interchange str1 and str2 in the UL case and negate the result.
|
||||
// Like this, str1 is always latin1 encoded, except for the UU case.
|
||||
// In addition, we need 0 (or sign which is 0) extend.
|
||||
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
srwi(cnt1, cnt1, 1);
|
||||
} else {
|
||||
clrldi(cnt1, cnt1, 32);
|
||||
}
|
||||
|
||||
if (ae != StrIntrinsicNode::LL) {
|
||||
srwi(cnt2, cnt2, 1);
|
||||
} else {
|
||||
clrldi(cnt2, cnt2, 32);
|
||||
}
|
||||
|
||||
// See if the lengths are different, and calculate min in cnt1.
|
||||
// Save diff in case we need it for a tie-breaker.
|
||||
subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
|
||||
// if (diff > 0) { cnt1 = cnt2; }
|
||||
if (VM_Version::has_isel()) {
|
||||
isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
|
||||
} else {
|
||||
Label Lskip;
|
||||
blt(CCR0, Lskip);
|
||||
mr(cnt1, cnt2);
|
||||
bind(Lskip);
|
||||
}
|
||||
|
||||
// Rename registers
|
||||
Register chr1 = result;
|
||||
Register chr2 = tmp0;
|
||||
|
||||
// Compare multiple characters in fast loop (only implemented for same encoding).
|
||||
int stride1 = 8, stride2 = 8;
|
||||
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
|
||||
int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
|
||||
Label Lfastloop, Lskipfast;
|
||||
|
||||
srwi_(tmp0, cnt1, log2_chars_per_iter);
|
||||
beq(CCR0, Lskipfast);
|
||||
rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
|
||||
li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
|
||||
mtctr(tmp0);
|
||||
|
||||
bind(Lfastloop);
|
||||
ld(chr1, 0, str1);
|
||||
ld(chr2, 0, str2);
|
||||
cmpd(CCR0, chr1, chr2);
|
||||
bne(CCR0, Lslow);
|
||||
addi(str1, str1, stride1);
|
||||
addi(str2, str2, stride2);
|
||||
bdnz(Lfastloop);
|
||||
mr(cnt1, cnt2); // Remaining characters.
|
||||
bind(Lskipfast);
|
||||
}
|
||||
|
||||
// Loop which searches the first difference character by character.
|
||||
cmpwi(CCR0, cnt1, 0);
|
||||
beq(CCR0, Lreturn_diff);
|
||||
bind(Lslow);
|
||||
mtctr(cnt1);
|
||||
|
||||
switch (ae) {
|
||||
case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
|
||||
case StrIntrinsicNode::UL: // fallthru (see comment above)
|
||||
case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
|
||||
case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
|
||||
default: ShouldNotReachHere(); break;
|
||||
}
|
||||
|
||||
bind(Lloop);
|
||||
if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
|
||||
if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
|
||||
subf_(result, chr2, chr1); // result = chr1 - chr2
|
||||
bne(CCR0, Ldone);
|
||||
addi(str1, str1, stride1);
|
||||
addi(str2, str2, stride2);
|
||||
bdnz(Lloop);
|
||||
|
||||
// If strings are equal up to min length, return the length difference.
|
||||
bind(Lreturn_diff);
|
||||
mr(result, diff);
|
||||
|
||||
// Otherwise, return the difference between the first mismatched chars.
|
||||
bind(Ldone);
|
||||
if (ae == StrIntrinsicNode::UL) {
|
||||
neg(result, result); // Negate result (see note above).
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp1, Register result, bool is_byte) {
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
|
||||
Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
|
||||
bool limit_needs_shift = false;
|
||||
|
||||
if (is_array_equ) {
|
||||
const int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
|
||||
|
||||
// Return true if the same array.
|
||||
cmpd(CCR0, ary1, ary2);
|
||||
beq(CCR0, Lskiploop);
|
||||
|
||||
// Return false if one of them is NULL.
|
||||
cmpdi(CCR0, ary1, 0);
|
||||
cmpdi(CCR1, ary2, 0);
|
||||
li(result, 0);
|
||||
cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
|
||||
beq(CCR0, Ldone);
|
||||
|
||||
// Load the lengths of arrays.
|
||||
lwz(limit, length_offset, ary1);
|
||||
lwz(tmp0, length_offset, ary2);
|
||||
|
||||
// Return false if the two arrays are not equal length.
|
||||
cmpw(CCR0, limit, tmp0);
|
||||
bne(CCR0, Ldone);
|
||||
|
||||
// Load array addresses.
|
||||
addi(ary1, ary1, base_offset);
|
||||
addi(ary2, ary2, base_offset);
|
||||
} else {
|
||||
limit_needs_shift = !is_byte;
|
||||
li(result, 0); // Assume not equal.
|
||||
}
|
||||
|
||||
// Rename registers
|
||||
Register chr1 = tmp0;
|
||||
Register chr2 = tmp1;
|
||||
|
||||
// Compare 8 bytes per iteration in fast loop.
|
||||
const int log2_chars_per_iter = is_byte ? 3 : 2;
|
||||
|
||||
srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
|
||||
beq(CCR0, Lskipfast);
|
||||
mtctr(tmp0);
|
||||
|
||||
bind(Lfastloop);
|
||||
ld(chr1, 0, ary1);
|
||||
ld(chr2, 0, ary2);
|
||||
addi(ary1, ary1, 8);
|
||||
addi(ary2, ary2, 8);
|
||||
cmpd(CCR0, chr1, chr2);
|
||||
bne(CCR0, Ldone);
|
||||
bdnz(Lfastloop);
|
||||
|
||||
bind(Lskipfast);
|
||||
rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
|
||||
beq(CCR0, Lskiploop);
|
||||
mtctr(limit);
|
||||
|
||||
// Character by character.
|
||||
bind(Lloop);
|
||||
if (is_byte) {
|
||||
lbz(chr1, 0, ary1);
|
||||
lbz(chr2, 0, ary2);
|
||||
addi(ary1, ary1, 1);
|
||||
addi(ary2, ary2, 1);
|
||||
} else {
|
||||
lhz(chr1, 0, ary1);
|
||||
lhz(chr2, 0, ary2);
|
||||
addi(ary1, ary1, 2);
|
||||
addi(ary2, ary2, 2);
|
||||
}
|
||||
cmpw(CCR0, chr1, chr2);
|
||||
bne(CCR0, Ldone);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lskiploop);
|
||||
li(result, 1); // All characters are equal.
|
||||
bind(Ldone);
|
||||
}
|
||||
|
||||
void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
|
||||
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
|
||||
|
||||
// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
|
||||
Label L_TooShort, L_Found, L_NotFound, L_End;
|
||||
Register last_addr = haycnt, // Kill haycnt at the beginning.
|
||||
addr = tmp1,
|
||||
n_start = tmp2,
|
||||
ch1 = tmp3,
|
||||
ch2 = R0;
|
||||
|
||||
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
|
||||
const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
|
||||
const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
|
||||
|
||||
// **************************************************************************************************
|
||||
// Prepare for main loop: optimized for needle count >=2, bail out otherwise.
|
||||
// **************************************************************************************************
|
||||
|
||||
// Compute last haystack addr to use if no match gets found.
|
||||
clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.
|
||||
addi(addr, haystack, -h_csize); // Accesses use pre-increment.
|
||||
if (needlecntval == 0) { // variable needlecnt
|
||||
cmpwi(CCR6, needlecnt, 2);
|
||||
clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.
|
||||
blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
|
||||
}
|
||||
|
||||
if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
|
||||
|
||||
if (needlecntval == 0) { // variable needlecnt
|
||||
subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
|
||||
addi(needlecnt, needlecnt, -2); // Rest of needle.
|
||||
} else { // constant needlecnt
|
||||
guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
|
||||
assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
|
||||
addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
|
||||
if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
|
||||
}
|
||||
|
||||
if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
|
||||
|
||||
if (ae ==StrIntrinsicNode::UL) {
|
||||
srwi(tmp4, n_start, 1*8); // ___0
|
||||
rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
|
||||
}
|
||||
|
||||
add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
|
||||
|
||||
// Main Loop (now we have at least 2 characters).
|
||||
Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
|
||||
bind(L_OuterLoop); // Search for 1st 2 characters.
|
||||
Register addr_diff = tmp4;
|
||||
subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
|
||||
addi(addr, addr, h_csize); // This is the new address we want to use for comparing.
|
||||
srdi_(ch2, addr_diff, h_csize);
|
||||
beq(CCR0, L_FinalCheck); // 2 characters left?
|
||||
mtctr(ch2); // num of characters / 2
|
||||
bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
|
||||
if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).
|
||||
lwz(ch1, 0, addr);
|
||||
lwz(ch2, 2, addr);
|
||||
} else {
|
||||
lhz(ch1, 0, addr);
|
||||
lhz(ch2, 1, addr);
|
||||
}
|
||||
cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
|
||||
cmpw(CCR1, ch2, n_start);
|
||||
beq(CCR0, L_Comp1); // Did we find the needle start?
|
||||
beq(CCR1, L_Comp2);
|
||||
addi(addr, addr, 2 * h_csize);
|
||||
bdnz(L_InnerLoop);
|
||||
bind(L_FinalCheck);
|
||||
andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
|
||||
beq(CCR0, L_NotFound);
|
||||
if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
|
||||
cmpw(CCR1, ch1, n_start);
|
||||
beq(CCR1, L_Comp1);
|
||||
bind(L_NotFound);
|
||||
li(result, -1); // not found
|
||||
b(L_End);
|
||||
|
||||
// **************************************************************************************************
|
||||
// Special Case: unfortunately, the variable needle case can be called with needlecnt<2
|
||||
// **************************************************************************************************
|
||||
if (needlecntval == 0) { // We have to handle these cases separately.
|
||||
Label L_OneCharLoop;
|
||||
bind(L_TooShort);
|
||||
mtctr(haycnt);
|
||||
if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
|
||||
bind(L_OneCharLoop);
|
||||
if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
|
||||
cmpw(CCR1, ch1, n_start);
|
||||
beq(CCR1, L_Found); // Did we find the one character needle?
|
||||
bdnz(L_OneCharLoop);
|
||||
li(result, -1); // Not found.
|
||||
b(L_End);
|
||||
}
|
||||
|
||||
// **************************************************************************************************
|
||||
// Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
|
||||
// **************************************************************************************************
|
||||
|
||||
// Compare the rest
|
||||
bind(L_Comp2);
|
||||
addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.
|
||||
bind(L_Comp1); // Addr points to possible needle start.
|
||||
if (needlecntval != 2) { // Const needlecnt==2?
|
||||
if (needlecntval != 3) {
|
||||
if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
|
||||
Register n_ind = tmp4,
|
||||
h_ind = n_ind;
|
||||
li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.
|
||||
mtctr(needlecnt); // Decremented by 2, still > 0.
|
||||
Label L_CompLoop;
|
||||
bind(L_CompLoop);
|
||||
if (ae ==StrIntrinsicNode::UL) {
|
||||
h_ind = ch1;
|
||||
sldi(h_ind, n_ind, 1);
|
||||
}
|
||||
if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
|
||||
if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
|
||||
cmpw(CCR1, ch1, ch2);
|
||||
bne(CCR1, L_OuterLoop);
|
||||
addi(n_ind, n_ind, n_csize);
|
||||
bdnz(L_CompLoop);
|
||||
} else { // No loop required if there's only one needle character left.
|
||||
if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
|
||||
if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
|
||||
cmpw(CCR1, ch1, ch2);
|
||||
bne(CCR1, L_OuterLoop);
|
||||
}
|
||||
}
|
||||
// Return index ...
|
||||
bind(L_Found);
|
||||
subf(result, haystack, addr); // relative to haystack, ...
|
||||
if (h_csize == 2) { srdi(result, result, 1); } // in characters.
|
||||
bind(L_End);
|
||||
} // string_indexof
|
||||
|
||||
void MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
|
||||
assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
|
||||
|
||||
Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
|
||||
Register addr = tmp1,
|
||||
ch1 = tmp2,
|
||||
ch2 = R0;
|
||||
|
||||
const int h_csize = is_byte ? 1 : 2;
|
||||
|
||||
//4:
|
||||
srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
|
||||
mr(addr, haystack);
|
||||
beq(CCR0, L_FinalCheck);
|
||||
mtctr(tmp2); // Move to count register.
|
||||
//8:
|
||||
bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
|
||||
if (!is_byte) {
|
||||
lhz(ch1, 0, addr);
|
||||
lhz(ch2, 2, addr);
|
||||
} else {
|
||||
lbz(ch1, 0, addr);
|
||||
lbz(ch2, 1, addr);
|
||||
}
|
||||
(needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
|
||||
(needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
|
||||
beq(CCR0, L_Found1); // Did we find the needle?
|
||||
beq(CCR1, L_Found2);
|
||||
addi(addr, addr, 2 * h_csize);
|
||||
bdnz(L_InnerLoop);
|
||||
//16:
|
||||
bind(L_FinalCheck);
|
||||
andi_(R0, haycnt, 1);
|
||||
beq(CCR0, L_NotFound);
|
||||
if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
|
||||
(needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
|
||||
beq(CCR1, L_Found1);
|
||||
//21:
|
||||
bind(L_NotFound);
|
||||
li(result, -1); // Not found.
|
||||
b(L_End);
|
||||
|
||||
bind(L_Found2);
|
||||
addi(addr, addr, h_csize);
|
||||
//24:
|
||||
bind(L_Found1); // Return index ...
|
||||
subf(result, haystack, addr); // relative to haystack, ...
|
||||
if (!is_byte) { srdi(result, result, 1); } // in characters.
|
||||
bind(L_End);
|
||||
} // string_indexof_char
|
||||
|
||||
|
||||
void MacroAssembler::has_negatives(Register src, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2) {
|
||||
const Register tmp0 = R0;
|
||||
assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
|
||||
Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080
|
||||
srwi_(tmp2, cnt, 4);
|
||||
li(result, 1); // Assume there's a negative byte.
|
||||
beq(CCR0, Lslow);
|
||||
ori(tmp1, tmp1, 0x8080);
|
||||
rldimi(tmp1, tmp1, 32, 0);
|
||||
mtctr(tmp2);
|
||||
|
||||
// 2x unrolled loop
|
||||
bind(Lfastloop);
|
||||
ld(tmp2, 0, src);
|
||||
ld(tmp0, 8, src);
|
||||
|
||||
orr(tmp0, tmp2, tmp0);
|
||||
|
||||
and_(tmp0, tmp0, tmp1);
|
||||
bne(CCR0, Ldone); // Found negative byte.
|
||||
addi(src, src, 16);
|
||||
|
||||
bdnz(Lfastloop);
|
||||
|
||||
bind(Lslow); // Fallback to slow version
|
||||
rldicl_(tmp0, cnt, 0, 64-4);
|
||||
beq(CCR0, Lnoneg);
|
||||
mtctr(tmp0);
|
||||
bind(Lloop);
|
||||
lbz(tmp0, 0, src);
|
||||
addi(src, src, 1);
|
||||
andi_(tmp0, tmp0, 0x80);
|
||||
bne(CCR0, Ldone); // Found negative byte.
|
||||
bdnz(Lloop);
|
||||
bind(Lnoneg);
|
||||
li(result, 0);
|
||||
|
||||
bind(Ldone);
|
||||
}
|
||||
|
||||
#endif // Compiler2
|
||||
|
||||
// Helpers for Intrinsic Emitters
|
||||
//
|
||||
// Revert the byte order of a 32bit value in a register
|
||||
|
||||
@ -761,39 +761,6 @@ class MacroAssembler: public Assembler {
|
||||
void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
|
||||
void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Intrinsics for CompactStrings
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once.
|
||||
void string_compress_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
||||
Label& Lfailure);
|
||||
|
||||
// Compress char[] to byte[]. cnt must be positive int.
|
||||
void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void string_inflate_16(Register src, Register dst, Register cnt,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
||||
|
||||
// Inflate byte[] to char[]. cnt must be positive int.
|
||||
void string_inflate(Register src, Register dst, Register cnt, Register tmp);
|
||||
|
||||
void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
|
||||
Register tmp1, Register result, int ae);
|
||||
|
||||
void array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp1, Register result, bool is_byte);
|
||||
|
||||
void string_indexof(Register result, Register haystack, Register haycnt,
|
||||
Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
|
||||
|
||||
void string_indexof_char(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
|
||||
|
||||
void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
|
||||
#endif
|
||||
|
||||
// Emitters for BigInteger.multiplyToLen intrinsic.
|
||||
inline void multiply64(Register dest_hi, Register dest_lo,
|
||||
Register x, Register y);
|
||||
|
||||
@ -1144,7 +1144,7 @@ bool SafePointNode::needs_polling_address_input() {
|
||||
|
||||
// Emit an interrupt that is caught by the debugger (for debugging compiler).
|
||||
void emit_break(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ illtrap();
|
||||
}
|
||||
|
||||
@ -1165,7 +1165,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
|
||||
//=============================================================================
|
||||
|
||||
void emit_nop(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ nop();
|
||||
}
|
||||
|
||||
@ -1184,12 +1184,14 @@ source_hpp %{ // Header information of the source block.
|
||||
//---< Used for optimization in Compile::Shorten_branches >---
|
||||
//--------------------------------------------------------------
|
||||
|
||||
class C2_MacroAssembler;
|
||||
|
||||
class CallStubImpl {
|
||||
|
||||
public:
|
||||
|
||||
// Emit call stub, compiled java to interpreter.
|
||||
static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
|
||||
static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
|
||||
|
||||
// Size of call trampoline stub.
|
||||
// This doesn't need to be accurate to the byte, but it
|
||||
@ -1220,7 +1222,7 @@ source %{
|
||||
// load the call target from the constant pool
|
||||
// branch via CTR (LR/link still points to the call-site above)
|
||||
|
||||
void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
|
||||
void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
|
||||
address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
|
||||
if (stub == NULL) {
|
||||
ciEnv::current()->record_out_of_memory_failure();
|
||||
@ -1251,7 +1253,7 @@ typedef struct {
|
||||
// - Add a relocation at the branch-and-link instruction.
|
||||
// - Emit a branch-and-link.
|
||||
// - Remember the return pc offset.
|
||||
EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
|
||||
EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
|
||||
EmitCallOffsets offsets = { -1, -1 };
|
||||
const int start_offset = __ offset();
|
||||
offsets.insts_call_instruction_offset = __ offset();
|
||||
@ -1379,7 +1381,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
|
||||
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
const long framesize = C->output()->frame_size_in_bytes();
|
||||
assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
|
||||
@ -1571,7 +1573,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
|
||||
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
|
||||
assert(framesize >= 0, "negative frame-size?");
|
||||
@ -1637,7 +1639,7 @@ int MachEpilogNode::safepoint_offset() const {
|
||||
|
||||
#if 0 // TODO: PPC port
|
||||
void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
if (LoadPollAddressFromThread) {
|
||||
_masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread);
|
||||
} else {
|
||||
@ -1754,7 +1756,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
int src_offset = ra_->reg2offset(src_lo);
|
||||
int dst_offset = ra_->reg2offset(dst_lo);
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
__ ld(R0, src_offset, R1_SP);
|
||||
__ std(R0, dst_offset, R1_SP);
|
||||
__ ld(R0, src_offset+8, R1_SP);
|
||||
@ -1767,7 +1769,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
|
||||
int dst_offset = ra_->reg2offset(dst_lo);
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
__ addi(R0, R1_SP, dst_offset);
|
||||
__ stxvd2x(Rsrc, R0);
|
||||
}
|
||||
@ -1778,7 +1780,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
|
||||
int src_offset = ra_->reg2offset(src_lo);
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
__ addi(R0, R1_SP, src_offset);
|
||||
__ lxvd2x(Rdst, R0);
|
||||
}
|
||||
@ -1789,7 +1791,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
|
||||
VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
__ xxlor(Rdst, Rsrc, Rsrc);
|
||||
}
|
||||
size += 4;
|
||||
@ -1833,7 +1835,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
size = (Rsrc != Rdst) ? 4 : 0;
|
||||
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
if (size) {
|
||||
__ mr(Rdst, Rsrc);
|
||||
}
|
||||
@ -1879,7 +1881,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
// Check for float reg-reg copy.
|
||||
if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
|
||||
FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
|
||||
__ fmr(Rdst, Rsrc);
|
||||
@ -2049,7 +2051,7 @@ void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
// _count contains the number of nops needed for padding.
|
||||
for (int i = 0; i < _count; i++) {
|
||||
__ nop();
|
||||
@ -2070,7 +2072,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
#endif
|
||||
|
||||
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
||||
int reg = ra_->get_encode(this);
|
||||
@ -2096,7 +2098,7 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
|
||||
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
// This is the unverified entry point.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
// Inline_cache contains a klass.
|
||||
Register ic_klass = as_Register(Matcher::inline_cache_reg_encode());
|
||||
@ -2179,7 +2181,7 @@ class HandlerImpl {
|
||||
source %{
|
||||
|
||||
int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_exception_handler());
|
||||
if (base == NULL) return 0; // CodeBuffer::expand failed
|
||||
@ -2196,7 +2198,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
|
||||
// The deopt_handler is like the exception handler, but it calls to
|
||||
// the deoptimization blob instead of jumping to the exception blob.
|
||||
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_deopt_handler());
|
||||
if (base == NULL) return 0; // CodeBuffer::expand failed
|
||||
@ -2660,14 +2662,14 @@ const bool Matcher::convi2l_type_required = true;
|
||||
encode %{
|
||||
enc_class enc_unimplemented %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ unimplemented("Unimplemented mach node encoding in AD file.", 13);
|
||||
%}
|
||||
|
||||
enc_class enc_untested %{
|
||||
#ifdef ASSERT
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ untested("Untested mach node encoding in AD file.");
|
||||
#else
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_none);
|
||||
@ -2676,7 +2678,7 @@ encode %{
|
||||
|
||||
enc_class enc_lbz(iRegIdst dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_lbz);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lbz($dst$$Register, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
@ -2684,7 +2686,7 @@ encode %{
|
||||
// Load acquire.
|
||||
enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lbz($dst$$Register, Idisp, $mem$$base$$Register);
|
||||
__ twi_0($dst$$Register);
|
||||
@ -2694,7 +2696,7 @@ encode %{
|
||||
enc_class enc_lhz(iRegIdst dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_lhz);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lhz($dst$$Register, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
@ -2703,7 +2705,7 @@ encode %{
|
||||
enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lhz($dst$$Register, Idisp, $mem$$base$$Register);
|
||||
__ twi_0($dst$$Register);
|
||||
@ -2713,7 +2715,7 @@ encode %{
|
||||
enc_class enc_lwz(iRegIdst dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_lwz);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lwz($dst$$Register, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
@ -2722,7 +2724,7 @@ encode %{
|
||||
enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lwz($dst$$Register, Idisp, $mem$$base$$Register);
|
||||
__ twi_0($dst$$Register);
|
||||
@ -2731,7 +2733,7 @@ encode %{
|
||||
|
||||
enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_ld);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
// Operand 'ds' requires 4-alignment.
|
||||
assert((Idisp & 0x3) == 0, "unaligned offset");
|
||||
@ -2741,7 +2743,7 @@ encode %{
|
||||
// Load acquire.
|
||||
enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
// Operand 'ds' requires 4-alignment.
|
||||
assert((Idisp & 0x3) == 0, "unaligned offset");
|
||||
@ -2752,7 +2754,7 @@ encode %{
|
||||
|
||||
enc_class enc_lfd(RegF dst, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_lfd);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
@ -2760,7 +2762,7 @@ encode %{
|
||||
enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_ld);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int toc_offset = 0;
|
||||
|
||||
address const_toc_addr;
|
||||
@ -2784,7 +2786,7 @@ encode %{
|
||||
enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_addis);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
if (!ra_->C->output()->in_scratch_emit_size()) {
|
||||
address const_toc_addr;
|
||||
@ -3019,7 +3021,7 @@ encode %{
|
||||
enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_ld);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int toc_offset = 0;
|
||||
|
||||
intptr_t val = $src$$constant;
|
||||
@ -3052,7 +3054,7 @@ encode %{
|
||||
enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_addis);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
if (!ra_->C->output()->in_scratch_emit_size()) {
|
||||
intptr_t val = $src$$constant;
|
||||
relocInfo::relocType constant_reloc = $src->constant_reloc(); // src
|
||||
@ -3186,14 +3188,14 @@ encode %{
|
||||
|
||||
enc_class enc_stw(iRegIsrc src, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_stw);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ stw($src$$Register, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
|
||||
enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_std);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
// Operand 'ds' requires 4-alignment.
|
||||
assert((Idisp & 0x3) == 0, "unaligned offset");
|
||||
@ -3202,14 +3204,14 @@ encode %{
|
||||
|
||||
enc_class enc_stfs(RegF src, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_stfs);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
|
||||
enc_class enc_stfd(RegF src, memory mem) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_stfd);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
__ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
|
||||
%}
|
||||
@ -3230,7 +3232,7 @@ encode %{
|
||||
// __ bind(skip_release);
|
||||
// __ stb(card mark);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label skip_storestore;
|
||||
|
||||
#if 0 // TODO: PPC port
|
||||
@ -3451,7 +3453,7 @@ encode %{
|
||||
enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int cc = $cmp$$cmpcode;
|
||||
int flags_reg = $crx$$reg;
|
||||
Label done;
|
||||
@ -3466,7 +3468,7 @@ encode %{
|
||||
enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label done;
|
||||
assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
|
||||
// Branch if not (cmp crx).
|
||||
@ -3480,14 +3482,14 @@ encode %{
|
||||
// input mapping for latency computation.
|
||||
enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_andc);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ andc($dst$$Register, $src1$$Register, $src2$$Register);
|
||||
%}
|
||||
|
||||
enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Label done;
|
||||
__ cmpwi($crx$$CondRegister, $src$$Register, 0);
|
||||
@ -3500,7 +3502,7 @@ encode %{
|
||||
enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Label done;
|
||||
__ cmpdi($crx$$CondRegister, $src$$Register, 0);
|
||||
@ -3513,7 +3515,7 @@ encode %{
|
||||
enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
|
||||
Label done;
|
||||
__ bso($crx$$CondRegister, done);
|
||||
@ -3525,7 +3527,7 @@ encode %{
|
||||
enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_cmove);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label done;
|
||||
__ bso($crx$$CondRegister, done);
|
||||
__ mffprd($dst$$Register, $src$$FloatRegister);
|
||||
@ -3536,7 +3538,7 @@ encode %{
|
||||
enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label d; // dummy
|
||||
__ bind(d);
|
||||
Label* p = ($lbl$$label);
|
||||
@ -3566,7 +3568,7 @@ encode %{
|
||||
// to ppc64Opcode_bc in order to hide this detail from the scheduler.
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label d; // dummy
|
||||
__ bind(d);
|
||||
Label* p = ($lbl$$label);
|
||||
@ -3598,7 +3600,7 @@ encode %{
|
||||
// to ppc64Opcode_bc in order to hide this detail from the scheduler.
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_bc);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label d; // dummy
|
||||
__ bind(d);
|
||||
Label* p = ($lbl$$label);
|
||||
@ -3683,7 +3685,7 @@ encode %{
|
||||
// Fake operand dst needed for PPC scheduler.
|
||||
assert($dst$$constant == 0x0, "dst must be 0x0");
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
// Mark the code position where the load from the safepoint
|
||||
// polling page was emitted as relocInfo::poll_type.
|
||||
__ relocate(relocInfo::poll_type);
|
||||
@ -3739,7 +3741,7 @@ encode %{
|
||||
enc_class enc_java_static_call(method meth) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_bl);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address entry_point = (address)$meth$$method;
|
||||
|
||||
if (!_method) {
|
||||
@ -3789,7 +3791,7 @@ encode %{
|
||||
enc_class enc_java_dynamic_call_sched(method meth) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_bl);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
if (!ra_->C->output()->in_scratch_emit_size()) {
|
||||
// Create a call trampoline stub for the given method.
|
||||
@ -3892,7 +3894,7 @@ encode %{
|
||||
// In the code we have to use $constanttablebase.
|
||||
enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int start_offset = __ offset();
|
||||
|
||||
Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
|
||||
@ -3951,7 +3953,7 @@ encode %{
|
||||
enc_class enc_java_to_runtime_call (method meth) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
const address start_pc = __ pc();
|
||||
|
||||
#if defined(ABI_ELFv2)
|
||||
@ -3984,7 +3986,7 @@ encode %{
|
||||
// input mapping for latency computation.
|
||||
enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
|
||||
// TODO: PPC port $archOpcode(ppc64Opcode_mtctr);
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mtctr($src$$Register);
|
||||
%}
|
||||
|
||||
|
||||
1281
src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
Normal file
1281
src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
Normal file
File diff suppressed because it is too large
Load Diff
76
src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
Normal file
76
src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_S390_C2_MACROASSEMBLER_S390_HPP
|
||||
#define CPU_S390_C2_MACROASSEMBLER_S390_HPP
|
||||
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
//-------------------------------------------
|
||||
// Special String Intrinsics Implementation.
|
||||
//-------------------------------------------
|
||||
// Intrinsics for CompactStrings
|
||||
// Restores: src, dst
|
||||
// Uses: cnt
|
||||
// Kills: tmp, Z_R0, Z_R1.
|
||||
// Early clobber: result.
|
||||
// Boolean precise controls accuracy of result value.
|
||||
unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
|
||||
Register tmp, bool precise);
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
// Restores: src, dst
|
||||
// Uses: cnt
|
||||
// Kills: tmp, Z_R0, Z_R1.
|
||||
unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp);
|
||||
|
||||
// Inflate byte[] to char[], length known at compile time.
|
||||
// Restores: src, dst
|
||||
// Kills: tmp, Z_R0, Z_R1.
|
||||
// Note:
|
||||
// len is signed int. Counts # characters, not bytes.
|
||||
unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
|
||||
|
||||
// Kills src.
|
||||
unsigned int has_negatives(Register result, Register src, Register cnt,
|
||||
Register odd_reg, Register even_reg, Register tmp);
|
||||
|
||||
unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
|
||||
Register odd_reg, Register even_reg, Register result, int ae);
|
||||
|
||||
unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
|
||||
Register odd_reg, Register even_reg, Register result, bool is_byte);
|
||||
|
||||
unsigned int string_indexof(Register result, Register haystack, Register haycnt,
|
||||
Register needle, Register needlecnt, int needlecntval,
|
||||
Register odd_reg, Register even_reg, int ae);
|
||||
|
||||
unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
|
||||
|
||||
#endif // CPU_S390_C2_MACROASSEMBLER_S390_HPP
|
||||
File diff suppressed because it is too large
Load Diff
@ -841,52 +841,6 @@ class MacroAssembler: public Assembler {
|
||||
Register cnt_reg,
|
||||
Register tmp1_reg, Register tmp2_reg);
|
||||
|
||||
//-------------------------------------------
|
||||
// Special String Intrinsics Implementation.
|
||||
//-------------------------------------------
|
||||
// Intrinsics for CompactStrings
|
||||
// Restores: src, dst
|
||||
// Uses: cnt
|
||||
// Kills: tmp, Z_R0, Z_R1.
|
||||
// Early clobber: result.
|
||||
// Boolean precise controls accuracy of result value.
|
||||
#ifdef COMPILER2
|
||||
unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
|
||||
Register tmp, bool precise);
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
// Restores: src, dst
|
||||
// Uses: cnt
|
||||
// Kills: tmp, Z_R0, Z_R1.
|
||||
unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp);
|
||||
|
||||
// Inflate byte[] to char[], length known at compile time.
|
||||
// Restores: src, dst
|
||||
// Kills: tmp, Z_R0, Z_R1.
|
||||
// Note:
|
||||
// len is signed int. Counts # characters, not bytes.
|
||||
unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len);
|
||||
|
||||
// Kills src.
|
||||
unsigned int has_negatives(Register result, Register src, Register cnt,
|
||||
Register odd_reg, Register even_reg, Register tmp);
|
||||
|
||||
unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
|
||||
Register odd_reg, Register even_reg, Register result, int ae);
|
||||
|
||||
unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
|
||||
Register odd_reg, Register even_reg, Register result, bool is_byte);
|
||||
|
||||
unsigned int string_indexof(Register result, Register haystack, Register haycnt,
|
||||
Register needle, Register needlecnt, int needlecntval,
|
||||
Register odd_reg, Register even_reg, int ae);
|
||||
|
||||
unsigned int string_indexof_char(Register result, Register haystack, Register haycnt,
|
||||
Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte);
|
||||
#endif
|
||||
|
||||
// Emit an oop const to the constant pool and set a relocation info
|
||||
// with address current_pc. Return the TOC offset of the constant.
|
||||
|
||||
@ -664,13 +664,13 @@ bool SafePointNode::needs_polling_address_input() {
|
||||
}
|
||||
|
||||
void emit_nop(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ z_nop();
|
||||
}
|
||||
|
||||
// Emit an interrupt that is caught by the debugger (for debugging compiler).
|
||||
void emit_break(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ z_illtrap();
|
||||
}
|
||||
|
||||
@ -735,7 +735,7 @@ static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
|
||||
|
||||
// Check effective address (at runtime) for required alignment.
|
||||
static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
__ z_lay(Z_R0, disp, index, base);
|
||||
__ z_nill(Z_R0, alignment-1);
|
||||
@ -743,7 +743,7 @@ static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index,
|
||||
__ z_illtrap();
|
||||
}
|
||||
|
||||
int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
|
||||
int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
|
||||
PhaseRegAlloc* ra_, bool is_native_call = false) {
|
||||
__ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
|
||||
address old_mark = __ inst_mark();
|
||||
@ -774,7 +774,7 @@ int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relo
|
||||
return (ret_off - start_off);
|
||||
}
|
||||
|
||||
static int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
|
||||
static int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
|
||||
__ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
|
||||
address old_mark = __ inst_mark();
|
||||
unsigned int start_off = __ offset();
|
||||
@ -807,7 +807,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
|
||||
// Even with PC-relative TOC addressing, we still need this node.
|
||||
// Float loads/stores do not support PC-relative addresses.
|
||||
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rtoc = as_Register(ra_->get_encode(this));
|
||||
__ load_toc(Rtoc);
|
||||
}
|
||||
@ -858,7 +858,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
||||
|
||||
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
__ verify_thread();
|
||||
|
||||
@ -932,7 +932,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
|
||||
#endif
|
||||
|
||||
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Compile* C = ra_->C;
|
||||
__ verify_thread();
|
||||
|
||||
@ -1034,7 +1034,7 @@ static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigne
|
||||
|
||||
static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
__ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP);
|
||||
}
|
||||
|
||||
@ -1108,7 +1108,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
// Check for integer reg-reg copy.
|
||||
if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
|
||||
Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
|
||||
__ z_lgr(Rdst, Rsrc);
|
||||
@ -1155,7 +1155,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
|
||||
// Check for float reg-reg copy.
|
||||
if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
|
||||
FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
|
||||
__ z_ldr(Rdst, Rsrc);
|
||||
@ -1254,7 +1254,7 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const {
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
int rem_space = 0;
|
||||
if (!(ra_->C->output()->in_scratch_emit_size())) {
|
||||
@ -1294,7 +1294,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
|
||||
|
||||
// Take care of the size function, if you make changes here!
|
||||
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
||||
int reg = ra_->get_encode(this);
|
||||
@ -1360,7 +1360,7 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
|
||||
#endif
|
||||
|
||||
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
const int ic_miss_offset = 2;
|
||||
|
||||
// Inline_cache contains a klass.
|
||||
@ -1455,7 +1455,7 @@ source %{
|
||||
// aware of the exception.
|
||||
int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
|
||||
Register temp_reg = Z_R1;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_exception_handler());
|
||||
if (base == NULL) {
|
||||
@ -1476,7 +1476,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
|
||||
|
||||
// Emit deopt handler code.
|
||||
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address base = __ start_a_stub(size_deopt_handler());
|
||||
|
||||
if (base == NULL) {
|
||||
@ -1841,13 +1841,13 @@ void Compile::reshape_address(AddPNode* addp) {
|
||||
// needs for encoding need to be specified.
|
||||
encode %{
|
||||
enc_class enc_unimplemented %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ unimplemented("Unimplemented mach node encoding in AD file.", 13);
|
||||
%}
|
||||
|
||||
enc_class enc_untested %{
|
||||
#ifdef ASSERT
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ untested("Untested mach node encoding in AD file.");
|
||||
#endif
|
||||
%}
|
||||
@ -2033,7 +2033,7 @@ encode %{
|
||||
Assembler::reg(Ridx, 12, 48) |
|
||||
Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
|
||||
} else {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ load_const_optimized(Z_R1_scratch, $mem$$disp);
|
||||
if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); }
|
||||
z_emit_inst(cbuf, $secondary |
|
||||
@ -2045,7 +2045,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_brul(Label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2058,7 +2058,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_bru(Label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2071,7 +2071,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2084,7 +2084,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2097,7 +2097,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2121,7 +2121,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2147,7 +2147,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2172,7 +2172,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* p = $lbl$$label;
|
||||
|
||||
// 'p' is `NULL' when this encoding class is used only to
|
||||
@ -2199,7 +2199,7 @@ encode %{
|
||||
|
||||
// Call from Java to runtime.
|
||||
enc_class z_enc_java_to_runtime_call(method meth) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
// Save return pc before call to the place where we need it, since
|
||||
// callee doesn't.
|
||||
@ -2227,7 +2227,7 @@ encode %{
|
||||
enc_class z_enc_java_static_call(method meth) %{
|
||||
// Call to fixup routine. Fixup routine uses ScopeDesc info to determine
|
||||
// whom we intended to call.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int ret_offset = 0;
|
||||
|
||||
if (!_method) {
|
||||
@ -2256,7 +2256,7 @@ encode %{
|
||||
|
||||
// Java dynamic call
|
||||
enc_class z_enc_java_dynamic_call(method meth) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
unsigned int start_off = __ offset();
|
||||
|
||||
int vtable_index = this->_vtable_index;
|
||||
@ -2311,7 +2311,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdst = reg_to_register_object($dst$$reg);
|
||||
Register Rsrc = reg_to_register_object($src$$reg);
|
||||
|
||||
@ -2332,7 +2332,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdst = reg_to_register_object($dst$$reg);
|
||||
int Csrc = $src$$constant;
|
||||
Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
|
||||
@ -2349,7 +2349,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_cctobool(iRegI res) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rres = reg_to_register_object($res$$reg);
|
||||
|
||||
if (VM_Version::has_LoadStoreConditional()) {
|
||||
@ -2366,7 +2366,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rcomp = reg_to_register_object($compare_value$$reg);
|
||||
Register Rnew = reg_to_register_object($exchange_value$$reg);
|
||||
Register Raddr = reg_to_register_object($addr_ptr$$reg);
|
||||
@ -2375,7 +2375,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rcomp = reg_to_register_object($compare_value$$reg);
|
||||
Register Rnew = reg_to_register_object($exchange_value$$reg);
|
||||
Register Raddr = reg_to_register_object($addr_ptr$$reg);
|
||||
@ -2384,7 +2384,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdst = reg_to_register_object($dst$$reg);
|
||||
Register Rtmp = reg_to_register_object($tmp$$reg);
|
||||
guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
|
||||
@ -2400,7 +2400,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdst = reg_to_register_object($dst$$reg);
|
||||
Register Rtmp = reg_to_register_object($tmp$$reg);
|
||||
guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
|
||||
|
||||
526
src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp
Normal file
526
src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.cpp
Normal file
@ -0,0 +1,526 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "oops/arrayOop.hpp"
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
#define STOP(error) stop(error)
|
||||
#else
|
||||
#define BLOCK_COMMENT(str) block_comment(str)
|
||||
#define STOP(error) block_comment(error); stop(error)
|
||||
#endif
|
||||
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
|
||||
void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
|
||||
Label Lloop, Lslow;
|
||||
assert(UseVIS >= 3, "VIS3 is required");
|
||||
assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
|
||||
assert_different_registers(ftmp1, ftmp2, ftmp3);
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
cmp(cnt, 8);
|
||||
br(Assembler::less, false, Assembler::pn, Lslow);
|
||||
delayed()->mov(cnt, result); // copy count
|
||||
|
||||
// Check for 8-byte alignment of src and dst
|
||||
or3(src, dst, tmp1);
|
||||
andcc(tmp1, 7, G0);
|
||||
br(Assembler::notZero, false, Assembler::pn, Lslow);
|
||||
delayed()->nop();
|
||||
|
||||
// Set mask for bshuffle instruction
|
||||
Register mask = tmp4;
|
||||
set(0x13579bdf, mask);
|
||||
bmask(mask, G0, G0);
|
||||
|
||||
// Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
|
||||
Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
|
||||
add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00
|
||||
sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000
|
||||
or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00
|
||||
|
||||
// Load first 8 bytes
|
||||
ldx(src, 0, tmp1);
|
||||
|
||||
bind(Lloop);
|
||||
// Load next 8 bytes
|
||||
ldx(src, 8, tmp2);
|
||||
|
||||
// Check for non-latin1 character by testing if the most significant byte of a char is set.
|
||||
// Although we have to move the data between integer and floating point registers, this is
|
||||
// still faster than the corresponding VIS instructions (ford/fand/fcmpd).
|
||||
or3(tmp1, tmp2, tmp3);
|
||||
btst(tmp3, mask);
|
||||
// annul zeroing if branch is not taken to preserve original count
|
||||
brx(Assembler::notZero, true, Assembler::pn, Ldone);
|
||||
delayed()->mov(G0, result); // 0 - failed
|
||||
|
||||
// Move bytes into float register
|
||||
movxtod(tmp1, ftmp1);
|
||||
movxtod(tmp2, ftmp2);
|
||||
|
||||
// Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
|
||||
bshuffle(ftmp1, ftmp2, ftmp3);
|
||||
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
|
||||
|
||||
// Increment addresses and decrement count
|
||||
inc(src, 16);
|
||||
inc(dst, 8);
|
||||
dec(cnt, 8);
|
||||
|
||||
cmp(cnt, 8);
|
||||
// annul LDX if branch is not taken to prevent access past end of string
|
||||
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
|
||||
delayed()->ldx(src, 0, tmp1);
|
||||
|
||||
// Fallback to slow version
|
||||
bind(Lslow);
|
||||
}
|
||||
|
||||
// Compress char[] to byte[]. Return 0 on failure.
|
||||
void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
|
||||
Label Lloop;
|
||||
assert_different_registers(src, dst, cnt, tmp, result);
|
||||
|
||||
lduh(src, 0, tmp);
|
||||
|
||||
bind(Lloop);
|
||||
inc(src, sizeof(jchar));
|
||||
cmp(tmp, 0xff);
|
||||
// annul zeroing if branch is not taken to preserve original count
|
||||
br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
|
||||
delayed()->mov(G0, result); // 0 - failed
|
||||
deccc(cnt);
|
||||
stb(tmp, dst, 0);
|
||||
inc(dst);
|
||||
// annul LDUH if branch is not taken to prevent access past end of string
|
||||
br(Assembler::notZero, true, Assembler::pt, Lloop);
|
||||
delayed()->lduh(src, 0, tmp); // hoisted
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
|
||||
Label Lloop, Lslow;
|
||||
assert(UseVIS >= 3, "VIS3 is required");
|
||||
assert_different_registers(src, dst, cnt, tmp);
|
||||
assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
cmp(cnt, 8);
|
||||
br(Assembler::less, false, Assembler::pn, Lslow);
|
||||
delayed()->nop();
|
||||
|
||||
// Check for 8-byte alignment of src and dst
|
||||
or3(src, dst, tmp);
|
||||
andcc(tmp, 7, G0);
|
||||
br(Assembler::notZero, false, Assembler::pn, Lslow);
|
||||
// Initialize float register to zero
|
||||
FloatRegister zerof = ftmp4;
|
||||
delayed()->fzero(FloatRegisterImpl::D, zerof);
|
||||
|
||||
// Load first 8 bytes
|
||||
ldf(FloatRegisterImpl::D, src, 0, ftmp1);
|
||||
|
||||
bind(Lloop);
|
||||
inc(src, 8);
|
||||
dec(cnt, 8);
|
||||
|
||||
// Inflate the string by interleaving each byte from the source array
|
||||
// with a zero byte and storing the result in the destination array.
|
||||
fpmerge(zerof, ftmp1->successor(), ftmp2);
|
||||
stf(FloatRegisterImpl::D, ftmp2, dst, 8);
|
||||
fpmerge(zerof, ftmp1, ftmp3);
|
||||
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
|
||||
|
||||
inc(dst, 16);
|
||||
|
||||
cmp(cnt, 8);
|
||||
// annul LDX if branch is not taken to prevent access past end of string
|
||||
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
|
||||
delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
|
||||
|
||||
// Fallback to slow version
|
||||
bind(Lslow);
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
|
||||
Label Loop;
|
||||
assert_different_registers(src, dst, cnt, tmp);
|
||||
|
||||
ldub(src, 0, tmp);
|
||||
bind(Loop);
|
||||
inc(src);
|
||||
deccc(cnt);
|
||||
sth(tmp, dst, 0);
|
||||
inc(dst, sizeof(jchar));
|
||||
// annul LDUB if branch is not taken to prevent access past end of string
|
||||
br(Assembler::notZero, true, Assembler::pt, Loop);
|
||||
delayed()->ldub(src, 0, tmp); // hoisted
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2,
|
||||
Register result, int ae) {
|
||||
Label Ldone, Lloop;
|
||||
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
|
||||
int stride1, stride2;
|
||||
|
||||
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
|
||||
// we interchange str1 and str2 in the UL case and negate the result.
|
||||
// Like this, str1 is always latin1 encoded, expect for the UU case.
|
||||
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
srl(cnt2, 1, cnt2);
|
||||
}
|
||||
|
||||
// See if the lengths are different, and calculate min in cnt1.
|
||||
// Save diff in case we need it for a tie-breaker.
|
||||
Label Lskip;
|
||||
Register diff = tmp1;
|
||||
subcc(cnt1, cnt2, diff);
|
||||
br(Assembler::greater, true, Assembler::pt, Lskip);
|
||||
// cnt2 is shorter, so use its count:
|
||||
delayed()->mov(cnt2, cnt1);
|
||||
bind(Lskip);
|
||||
|
||||
// Rename registers
|
||||
Register limit1 = cnt1;
|
||||
Register limit2 = limit1;
|
||||
Register chr1 = result;
|
||||
Register chr2 = cnt2;
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
// We need an additional register to keep track of two limits
|
||||
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
|
||||
limit2 = tmp2;
|
||||
}
|
||||
|
||||
// Is the minimum length zero?
|
||||
cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
|
||||
br(Assembler::equal, true, Assembler::pn, Ldone);
|
||||
// result is difference in lengths
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
|
||||
} else {
|
||||
delayed()->mov(diff, result);
|
||||
}
|
||||
|
||||
// Load first characters
|
||||
if (ae == StrIntrinsicNode::LL) {
|
||||
stride1 = stride2 = sizeof(jbyte);
|
||||
ldub(str1, 0, chr1);
|
||||
ldub(str2, 0, chr2);
|
||||
} else if (ae == StrIntrinsicNode::UU) {
|
||||
stride1 = stride2 = sizeof(jchar);
|
||||
lduh(str1, 0, chr1);
|
||||
lduh(str2, 0, chr2);
|
||||
} else {
|
||||
stride1 = sizeof(jbyte);
|
||||
stride2 = sizeof(jchar);
|
||||
ldub(str1, 0, chr1);
|
||||
lduh(str2, 0, chr2);
|
||||
}
|
||||
|
||||
// Compare first characters
|
||||
subcc(chr1, chr2, chr1);
|
||||
br(Assembler::notZero, false, Assembler::pt, Ldone);
|
||||
assert(chr1 == result, "result must be pre-placed");
|
||||
delayed()->nop();
|
||||
|
||||
// Check if the strings start at same location
|
||||
cmp(str1, str2);
|
||||
brx(Assembler::equal, true, Assembler::pn, Ldone);
|
||||
delayed()->mov(G0, result); // result is zero
|
||||
|
||||
// We have no guarantee that on 64 bit the higher half of limit is 0
|
||||
signx(limit1);
|
||||
|
||||
// Get limit
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
sll(limit1, 1, limit2);
|
||||
subcc(limit2, stride2, chr2);
|
||||
}
|
||||
subcc(limit1, stride1, chr1);
|
||||
br(Assembler::zero, true, Assembler::pn, Ldone);
|
||||
// result is difference in lengths
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
|
||||
} else {
|
||||
delayed()->mov(diff, result);
|
||||
}
|
||||
|
||||
// Shift str1 and str2 to the end of the arrays, negate limit
|
||||
add(str1, limit1, str1);
|
||||
add(str2, limit2, str2);
|
||||
neg(chr1, limit1); // limit1 = -(limit1-stride1)
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
neg(chr2, limit2); // limit2 = -(limit2-stride2)
|
||||
}
|
||||
|
||||
// Compare the rest of the characters
|
||||
load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
|
||||
|
||||
bind(Lloop);
|
||||
load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false);
|
||||
|
||||
subcc(chr1, chr2, chr1);
|
||||
br(Assembler::notZero, false, Assembler::pt, Ldone);
|
||||
assert(chr1 == result, "result must be pre-placed");
|
||||
delayed()->inccc(limit1, stride1);
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
inccc(limit2, stride2);
|
||||
}
|
||||
|
||||
// annul LDUB if branch is not taken to prevent access past end of string
|
||||
br(Assembler::notZero, true, Assembler::pt, Lloop);
|
||||
delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
|
||||
|
||||
// If strings are equal up to min length, return the length difference.
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
// Divide by 2 to get number of chars
|
||||
sra(diff, 1, result);
|
||||
} else {
|
||||
mov(diff, result);
|
||||
}
|
||||
|
||||
// Otherwise, return the difference between the first mismatched chars.
|
||||
bind(Ldone);
|
||||
if(ae == StrIntrinsicNode::UL) {
|
||||
// Negate result (see note above)
|
||||
neg(result);
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp, Register result, bool is_byte) {
|
||||
Label Ldone, Lloop, Lremaining;
|
||||
assert_different_registers(ary1, ary2, limit, tmp, result);
|
||||
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
|
||||
assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned");
|
||||
|
||||
if (is_array_equ) {
|
||||
// return true if the same array
|
||||
cmp(ary1, ary2);
|
||||
brx(Assembler::equal, true, Assembler::pn, Ldone);
|
||||
delayed()->mov(1, result); // equal
|
||||
|
||||
br_null(ary1, true, Assembler::pn, Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
|
||||
br_null(ary2, true, Assembler::pn, Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
|
||||
// load the lengths of arrays
|
||||
ld(Address(ary1, length_offset), limit);
|
||||
ld(Address(ary2, length_offset), tmp);
|
||||
|
||||
// return false if the two arrays are not equal length
|
||||
cmp(limit, tmp);
|
||||
br(Assembler::notEqual, true, Assembler::pn, Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
}
|
||||
|
||||
cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
|
||||
delayed()->mov(1, result); // zero-length arrays are equal
|
||||
|
||||
if (is_array_equ) {
|
||||
// load array addresses
|
||||
add(ary1, base_offset, ary1);
|
||||
add(ary2, base_offset, ary2);
|
||||
// set byte count
|
||||
if (!is_byte) {
|
||||
sll(limit, exact_log2(sizeof(jchar)), limit);
|
||||
}
|
||||
} else {
|
||||
// We have no guarantee that on 64 bit the higher half of limit is 0
|
||||
signx(limit);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Sanity check for doubleword (8-byte) alignment of ary1 and ary2.
|
||||
// Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()).
|
||||
Label Laligned;
|
||||
or3(ary1, ary2, tmp);
|
||||
andcc(tmp, 7, tmp);
|
||||
br_null_short(tmp, Assembler::pn, Laligned);
|
||||
STOP("First array element is not 8-byte aligned.");
|
||||
should_not_reach_here();
|
||||
bind(Laligned);
|
||||
#endif
|
||||
|
||||
// Shift ary1 and ary2 to the end of the arrays, negate limit
|
||||
add(ary1, limit, ary1);
|
||||
add(ary2, limit, ary2);
|
||||
neg(limit, limit);
|
||||
|
||||
// MAIN LOOP
|
||||
// Load and compare array elements of size 'byte_width' until the elements are not
|
||||
// equal or we reached the end of the arrays. If the size of the arrays is not a
|
||||
// multiple of 'byte_width', we simply read over the end of the array, bail out and
|
||||
// compare the remaining bytes below by skipping the garbage bytes.
|
||||
ldx(ary1, limit, result);
|
||||
bind(Lloop);
|
||||
ldx(ary2, limit, tmp);
|
||||
inccc(limit, 8);
|
||||
// Bail out if we reached the end (but still do the comparison)
|
||||
br(Assembler::positive, false, Assembler::pn, Lremaining);
|
||||
delayed()->cmp(result, tmp);
|
||||
// Check equality of elements
|
||||
brx(Assembler::equal, false, Assembler::pt, target(Lloop));
|
||||
delayed()->ldx(ary1, limit, result);
|
||||
|
||||
ba(Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
|
||||
// TAIL COMPARISON
|
||||
// We got here because we reached the end of the arrays. 'limit' is the number of
|
||||
// garbage bytes we may have compared by reading over the end of the arrays. Shift
|
||||
// out the garbage and compare the remaining elements.
|
||||
bind(Lremaining);
|
||||
// Optimistic shortcut: elements potentially including garbage are equal
|
||||
brx(Assembler::equal, true, Assembler::pt, target(Ldone));
|
||||
delayed()->mov(1, result); // equal
|
||||
// Shift 'limit' bytes to the right and compare
|
||||
sll(limit, 3, limit); // bytes to bits
|
||||
srlx(result, limit, result);
|
||||
srlx(tmp, limit, tmp);
|
||||
cmp(result, tmp);
|
||||
clr(result);
|
||||
movcc(Assembler::equal, false, xcc, 1, result);
|
||||
|
||||
bind(Ldone);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
|
||||
|
||||
// test for negative bytes in input string of a given size
|
||||
// result 1 if found, 0 otherwise.
|
||||
|
||||
Label Lcore, Ltail, Lreturn, Lcore_rpt;
|
||||
|
||||
assert_different_registers(inp, size, t2, t3, t4, t5, result);
|
||||
|
||||
Register i = result; // result used as integer index i until very end
|
||||
Register lmask = t2; // t2 is aliased to lmask
|
||||
|
||||
// INITIALIZATION
|
||||
// ===========================================================
|
||||
// initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b)
|
||||
// compute unaligned offset -> i
|
||||
// compute core end index -> t5
|
||||
Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal
|
||||
add(t2, 0x80, t2);
|
||||
sllx(t2, 32, t3);
|
||||
or3(t3, t2, lmask); // 0x8080808080808080 -> lmask
|
||||
sra(size,0,size);
|
||||
andcc(inp, 0x7, i); // unaligned offset -> i
|
||||
br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
|
||||
delayed()->add(size, -8, t5); // (annuled) core end index -> t5
|
||||
|
||||
// ===========================================================
|
||||
|
||||
// UNALIGNED HEAD
|
||||
// ===========================================================
|
||||
// * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
|
||||
// * obliterate (ignore) bytes outside string by shifting off reg ends
|
||||
// * compare with bitmask, short circuit return true if one or more high
|
||||
// bits set.
|
||||
cmp(size, 0);
|
||||
br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
|
||||
delayed()->mov(0,result); // annuled so i not clobbered for following
|
||||
neg(i, t4);
|
||||
add(i, size, t5);
|
||||
ldx(inp, t4, t3); // raw aligned 8B containing unaligned head -> t3
|
||||
mov(8, t4);
|
||||
sub(t4, t5, t4);
|
||||
sra(t4, 31, t5);
|
||||
andn(t4, t5, t5);
|
||||
add(i, t5, t4);
|
||||
sll(t5, 3, t5);
|
||||
sll(t4, 3, t4); // # bits to shift right, left -> t5,t4
|
||||
srlx(t3, t5, t3);
|
||||
sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
|
||||
andcc(lmask, t3, G0);
|
||||
brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
|
||||
delayed()->mov(1,result); // annuled so i not clobbered for following
|
||||
add(size, -8, t5); // core end index -> t5
|
||||
mov(8, t4);
|
||||
sub(t4, i, i); // # bytes examined in unalgn head (<8) -> i
|
||||
// ===========================================================
|
||||
|
||||
// ALIGNED CORE
|
||||
// ===========================================================
|
||||
// * iterate index i over aligned 8B sections of core, comparing with
|
||||
// bitmask, short circuit return true if one or more high bits set
|
||||
// t5 contains core end index/loop limit which is the index
|
||||
// of the MSB of last (unaligned) 8B fully contained in the string.
|
||||
// inp contains address of first byte in string/array
|
||||
// lmask contains 8B high bit mask for comparison
|
||||
// i contains next index to be processed (adr. inp+i is on 8B boundary)
|
||||
bind(Lcore);
|
||||
cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
|
||||
bind(Lcore_rpt);
|
||||
ldx(inp, i, t3);
|
||||
andcc(t3, lmask, G0);
|
||||
brx(Assembler::notZero, true, Assembler::pn, Lreturn);
|
||||
delayed()->mov(1, result); // annuled so i not clobbered for following
|
||||
add(i, 8, i);
|
||||
cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
|
||||
// ===========================================================
|
||||
|
||||
// ALIGNED TAIL (<8B)
|
||||
// ===========================================================
|
||||
// handle aligned tail of 7B or less as complete 8B, obliterating end of
|
||||
// string bytes by shifting them off end, compare what's left with bitmask
|
||||
// inp contains address of first byte in string/array
|
||||
// lmask contains 8B high bit mask for comparison
|
||||
// i contains next index to be processed (adr. inp+i is on 8B boundary)
|
||||
bind(Ltail);
|
||||
subcc(size, i, t4); // # of remaining bytes in string -> t4
|
||||
// return 0 if no more remaining bytes
|
||||
br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
|
||||
delayed()->mov(0, result); // annuled so i not clobbered for following
|
||||
ldx(inp, i, t3); // load final 8B (aligned) containing tail -> t3
|
||||
mov(8, t5);
|
||||
sub(t5, t4, t4);
|
||||
mov(0, result); // ** i clobbered at this point
|
||||
sll(t4, 3, t4); // bits beyond end of string -> t4
|
||||
srlx(t3, t4, t3); // bytes beyond end now obliterated -> t3
|
||||
andcc(lmask, t3, G0);
|
||||
movcc(Assembler::notZero, false, xcc, 1, result);
|
||||
bind(Lreturn);
|
||||
}
|
||||
|
||||
58
src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp
Normal file
58
src/hotspot/cpu/sparc/c2_MacroAssembler_sparc.hpp
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
|
||||
#define CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
|
||||
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
|
||||
void string_compress_16(Register src, Register dst, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
|
||||
|
||||
// Compress char[] to byte[]. Return 0 on failure.
|
||||
void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2,
|
||||
Register result, int ae);
|
||||
|
||||
void array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp, Register result, bool is_byte);
|
||||
// test for negative bytes in input string of a given size, result 0 if none
|
||||
void has_negatives(Register inp, Register size, Register result,
|
||||
Register t2, Register t3, Register t4,
|
||||
Register t5);
|
||||
|
||||
#endif // CPU_SPARC_C2_MACROASSEMBLER_SPARC_HPP
|
||||
@ -49,9 +49,6 @@
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
#ifdef COMPILER2
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
@ -3436,498 +3433,6 @@ void MacroAssembler::reinit_heapbase() {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
|
||||
void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
|
||||
Label Lloop, Lslow;
|
||||
assert(UseVIS >= 3, "VIS3 is required");
|
||||
assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
|
||||
assert_different_registers(ftmp1, ftmp2, ftmp3);
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
cmp(cnt, 8);
|
||||
br(Assembler::less, false, Assembler::pn, Lslow);
|
||||
delayed()->mov(cnt, result); // copy count
|
||||
|
||||
// Check for 8-byte alignment of src and dst
|
||||
or3(src, dst, tmp1);
|
||||
andcc(tmp1, 7, G0);
|
||||
br(Assembler::notZero, false, Assembler::pn, Lslow);
|
||||
delayed()->nop();
|
||||
|
||||
// Set mask for bshuffle instruction
|
||||
Register mask = tmp4;
|
||||
set(0x13579bdf, mask);
|
||||
bmask(mask, G0, G0);
|
||||
|
||||
// Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
|
||||
Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
|
||||
add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00
|
||||
sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000
|
||||
or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00
|
||||
|
||||
// Load first 8 bytes
|
||||
ldx(src, 0, tmp1);
|
||||
|
||||
bind(Lloop);
|
||||
// Load next 8 bytes
|
||||
ldx(src, 8, tmp2);
|
||||
|
||||
// Check for non-latin1 character by testing if the most significant byte of a char is set.
|
||||
// Although we have to move the data between integer and floating point registers, this is
|
||||
// still faster than the corresponding VIS instructions (ford/fand/fcmpd).
|
||||
or3(tmp1, tmp2, tmp3);
|
||||
btst(tmp3, mask);
|
||||
// annul zeroing if branch is not taken to preserve original count
|
||||
brx(Assembler::notZero, true, Assembler::pn, Ldone);
|
||||
delayed()->mov(G0, result); // 0 - failed
|
||||
|
||||
// Move bytes into float register
|
||||
movxtod(tmp1, ftmp1);
|
||||
movxtod(tmp2, ftmp2);
|
||||
|
||||
// Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
|
||||
bshuffle(ftmp1, ftmp2, ftmp3);
|
||||
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
|
||||
|
||||
// Increment addresses and decrement count
|
||||
inc(src, 16);
|
||||
inc(dst, 8);
|
||||
dec(cnt, 8);
|
||||
|
||||
cmp(cnt, 8);
|
||||
// annul LDX if branch is not taken to prevent access past end of string
|
||||
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
|
||||
delayed()->ldx(src, 0, tmp1);
|
||||
|
||||
// Fallback to slow version
|
||||
bind(Lslow);
|
||||
}
|
||||
|
||||
// Compress char[] to byte[]. Return 0 on failure.
|
||||
void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
|
||||
Label Lloop;
|
||||
assert_different_registers(src, dst, cnt, tmp, result);
|
||||
|
||||
lduh(src, 0, tmp);
|
||||
|
||||
bind(Lloop);
|
||||
inc(src, sizeof(jchar));
|
||||
cmp(tmp, 0xff);
|
||||
// annul zeroing if branch is not taken to preserve original count
|
||||
br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
|
||||
delayed()->mov(G0, result); // 0 - failed
|
||||
deccc(cnt);
|
||||
stb(tmp, dst, 0);
|
||||
inc(dst);
|
||||
// annul LDUH if branch is not taken to prevent access past end of string
|
||||
br(Assembler::notZero, true, Assembler::pt, Lloop);
|
||||
delayed()->lduh(src, 0, tmp); // hoisted
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
|
||||
Label Lloop, Lslow;
|
||||
assert(UseVIS >= 3, "VIS3 is required");
|
||||
assert_different_registers(src, dst, cnt, tmp);
|
||||
assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
|
||||
|
||||
// Check if cnt >= 8 (= 16 bytes)
|
||||
cmp(cnt, 8);
|
||||
br(Assembler::less, false, Assembler::pn, Lslow);
|
||||
delayed()->nop();
|
||||
|
||||
// Check for 8-byte alignment of src and dst
|
||||
or3(src, dst, tmp);
|
||||
andcc(tmp, 7, G0);
|
||||
br(Assembler::notZero, false, Assembler::pn, Lslow);
|
||||
// Initialize float register to zero
|
||||
FloatRegister zerof = ftmp4;
|
||||
delayed()->fzero(FloatRegisterImpl::D, zerof);
|
||||
|
||||
// Load first 8 bytes
|
||||
ldf(FloatRegisterImpl::D, src, 0, ftmp1);
|
||||
|
||||
bind(Lloop);
|
||||
inc(src, 8);
|
||||
dec(cnt, 8);
|
||||
|
||||
// Inflate the string by interleaving each byte from the source array
|
||||
// with a zero byte and storing the result in the destination array.
|
||||
fpmerge(zerof, ftmp1->successor(), ftmp2);
|
||||
stf(FloatRegisterImpl::D, ftmp2, dst, 8);
|
||||
fpmerge(zerof, ftmp1, ftmp3);
|
||||
stf(FloatRegisterImpl::D, ftmp3, dst, 0);
|
||||
|
||||
inc(dst, 16);
|
||||
|
||||
cmp(cnt, 8);
|
||||
// annul LDX if branch is not taken to prevent access past end of string
|
||||
br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
|
||||
delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
|
||||
|
||||
// Fallback to slow version
|
||||
bind(Lslow);
|
||||
}
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
|
||||
Label Loop;
|
||||
assert_different_registers(src, dst, cnt, tmp);
|
||||
|
||||
ldub(src, 0, tmp);
|
||||
bind(Loop);
|
||||
inc(src);
|
||||
deccc(cnt);
|
||||
sth(tmp, dst, 0);
|
||||
inc(dst, sizeof(jchar));
|
||||
// annul LDUB if branch is not taken to prevent access past end of string
|
||||
br(Assembler::notZero, true, Assembler::pt, Loop);
|
||||
delayed()->ldub(src, 0, tmp); // hoisted
|
||||
}
|
||||
|
||||
void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2,
|
||||
Register result, int ae) {
|
||||
Label Ldone, Lloop;
|
||||
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
|
||||
int stride1, stride2;
|
||||
|
||||
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
|
||||
// we interchange str1 and str2 in the UL case and negate the result.
|
||||
// Like this, str1 is always latin1 encoded, expect for the UU case.
|
||||
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
srl(cnt2, 1, cnt2);
|
||||
}
|
||||
|
||||
// See if the lengths are different, and calculate min in cnt1.
|
||||
// Save diff in case we need it for a tie-breaker.
|
||||
Label Lskip;
|
||||
Register diff = tmp1;
|
||||
subcc(cnt1, cnt2, diff);
|
||||
br(Assembler::greater, true, Assembler::pt, Lskip);
|
||||
// cnt2 is shorter, so use its count:
|
||||
delayed()->mov(cnt2, cnt1);
|
||||
bind(Lskip);
|
||||
|
||||
// Rename registers
|
||||
Register limit1 = cnt1;
|
||||
Register limit2 = limit1;
|
||||
Register chr1 = result;
|
||||
Register chr2 = cnt2;
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
// We need an additional register to keep track of two limits
|
||||
assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
|
||||
limit2 = tmp2;
|
||||
}
|
||||
|
||||
// Is the minimum length zero?
|
||||
cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
|
||||
br(Assembler::equal, true, Assembler::pn, Ldone);
|
||||
// result is difference in lengths
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
|
||||
} else {
|
||||
delayed()->mov(diff, result);
|
||||
}
|
||||
|
||||
// Load first characters
|
||||
if (ae == StrIntrinsicNode::LL) {
|
||||
stride1 = stride2 = sizeof(jbyte);
|
||||
ldub(str1, 0, chr1);
|
||||
ldub(str2, 0, chr2);
|
||||
} else if (ae == StrIntrinsicNode::UU) {
|
||||
stride1 = stride2 = sizeof(jchar);
|
||||
lduh(str1, 0, chr1);
|
||||
lduh(str2, 0, chr2);
|
||||
} else {
|
||||
stride1 = sizeof(jbyte);
|
||||
stride2 = sizeof(jchar);
|
||||
ldub(str1, 0, chr1);
|
||||
lduh(str2, 0, chr2);
|
||||
}
|
||||
|
||||
// Compare first characters
|
||||
subcc(chr1, chr2, chr1);
|
||||
br(Assembler::notZero, false, Assembler::pt, Ldone);
|
||||
assert(chr1 == result, "result must be pre-placed");
|
||||
delayed()->nop();
|
||||
|
||||
// Check if the strings start at same location
|
||||
cmp(str1, str2);
|
||||
brx(Assembler::equal, true, Assembler::pn, Ldone);
|
||||
delayed()->mov(G0, result); // result is zero
|
||||
|
||||
// We have no guarantee that on 64 bit the higher half of limit is 0
|
||||
signx(limit1);
|
||||
|
||||
// Get limit
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
sll(limit1, 1, limit2);
|
||||
subcc(limit2, stride2, chr2);
|
||||
}
|
||||
subcc(limit1, stride1, chr1);
|
||||
br(Assembler::zero, true, Assembler::pn, Ldone);
|
||||
// result is difference in lengths
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
|
||||
} else {
|
||||
delayed()->mov(diff, result);
|
||||
}
|
||||
|
||||
// Shift str1 and str2 to the end of the arrays, negate limit
|
||||
add(str1, limit1, str1);
|
||||
add(str2, limit2, str2);
|
||||
neg(chr1, limit1); // limit1 = -(limit1-stride1)
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
neg(chr2, limit2); // limit2 = -(limit2-stride2)
|
||||
}
|
||||
|
||||
// Compare the rest of the characters
|
||||
load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
|
||||
|
||||
bind(Lloop);
|
||||
load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false);
|
||||
|
||||
subcc(chr1, chr2, chr1);
|
||||
br(Assembler::notZero, false, Assembler::pt, Ldone);
|
||||
assert(chr1 == result, "result must be pre-placed");
|
||||
delayed()->inccc(limit1, stride1);
|
||||
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
|
||||
inccc(limit2, stride2);
|
||||
}
|
||||
|
||||
// annul LDUB if branch is not taken to prevent access past end of string
|
||||
br(Assembler::notZero, true, Assembler::pt, Lloop);
|
||||
delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
|
||||
|
||||
// If strings are equal up to min length, return the length difference.
|
||||
if (ae == StrIntrinsicNode::UU) {
|
||||
// Divide by 2 to get number of chars
|
||||
sra(diff, 1, result);
|
||||
} else {
|
||||
mov(diff, result);
|
||||
}
|
||||
|
||||
// Otherwise, return the difference between the first mismatched chars.
|
||||
bind(Ldone);
|
||||
if(ae == StrIntrinsicNode::UL) {
|
||||
// Negate result (see note above)
|
||||
neg(result);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp, Register result, bool is_byte) {
|
||||
Label Ldone, Lloop, Lremaining;
|
||||
assert_different_registers(ary1, ary2, limit, tmp, result);
|
||||
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
|
||||
assert(base_offset % 8 == 0, "Base offset must be 8-byte aligned");
|
||||
|
||||
if (is_array_equ) {
|
||||
// return true if the same array
|
||||
cmp(ary1, ary2);
|
||||
brx(Assembler::equal, true, Assembler::pn, Ldone);
|
||||
delayed()->mov(1, result); // equal
|
||||
|
||||
br_null(ary1, true, Assembler::pn, Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
|
||||
br_null(ary2, true, Assembler::pn, Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
|
||||
// load the lengths of arrays
|
||||
ld(Address(ary1, length_offset), limit);
|
||||
ld(Address(ary2, length_offset), tmp);
|
||||
|
||||
// return false if the two arrays are not equal length
|
||||
cmp(limit, tmp);
|
||||
br(Assembler::notEqual, true, Assembler::pn, Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
}
|
||||
|
||||
cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
|
||||
delayed()->mov(1, result); // zero-length arrays are equal
|
||||
|
||||
if (is_array_equ) {
|
||||
// load array addresses
|
||||
add(ary1, base_offset, ary1);
|
||||
add(ary2, base_offset, ary2);
|
||||
// set byte count
|
||||
if (!is_byte) {
|
||||
sll(limit, exact_log2(sizeof(jchar)), limit);
|
||||
}
|
||||
} else {
|
||||
// We have no guarantee that on 64 bit the higher half of limit is 0
|
||||
signx(limit);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Sanity check for doubleword (8-byte) alignment of ary1 and ary2.
|
||||
// Guaranteed on 64-bit systems (see arrayOopDesc::header_size_in_bytes()).
|
||||
Label Laligned;
|
||||
or3(ary1, ary2, tmp);
|
||||
andcc(tmp, 7, tmp);
|
||||
br_null_short(tmp, Assembler::pn, Laligned);
|
||||
STOP("First array element is not 8-byte aligned.");
|
||||
should_not_reach_here();
|
||||
bind(Laligned);
|
||||
#endif
|
||||
|
||||
// Shift ary1 and ary2 to the end of the arrays, negate limit
|
||||
add(ary1, limit, ary1);
|
||||
add(ary2, limit, ary2);
|
||||
neg(limit, limit);
|
||||
|
||||
// MAIN LOOP
|
||||
// Load and compare array elements of size 'byte_width' until the elements are not
|
||||
// equal or we reached the end of the arrays. If the size of the arrays is not a
|
||||
// multiple of 'byte_width', we simply read over the end of the array, bail out and
|
||||
// compare the remaining bytes below by skipping the garbage bytes.
|
||||
ldx(ary1, limit, result);
|
||||
bind(Lloop);
|
||||
ldx(ary2, limit, tmp);
|
||||
inccc(limit, 8);
|
||||
// Bail out if we reached the end (but still do the comparison)
|
||||
br(Assembler::positive, false, Assembler::pn, Lremaining);
|
||||
delayed()->cmp(result, tmp);
|
||||
// Check equality of elements
|
||||
brx(Assembler::equal, false, Assembler::pt, target(Lloop));
|
||||
delayed()->ldx(ary1, limit, result);
|
||||
|
||||
ba(Ldone);
|
||||
delayed()->clr(result); // not equal
|
||||
|
||||
// TAIL COMPARISON
|
||||
// We got here because we reached the end of the arrays. 'limit' is the number of
|
||||
// garbage bytes we may have compared by reading over the end of the arrays. Shift
|
||||
// out the garbage and compare the remaining elements.
|
||||
bind(Lremaining);
|
||||
// Optimistic shortcut: elements potentially including garbage are equal
|
||||
brx(Assembler::equal, true, Assembler::pt, target(Ldone));
|
||||
delayed()->mov(1, result); // equal
|
||||
// Shift 'limit' bytes to the right and compare
|
||||
sll(limit, 3, limit); // bytes to bits
|
||||
srlx(result, limit, result);
|
||||
srlx(tmp, limit, tmp);
|
||||
cmp(result, tmp);
|
||||
clr(result);
|
||||
movcc(Assembler::equal, false, xcc, 1, result);
|
||||
|
||||
bind(Ldone);
|
||||
}
|
||||
|
||||
void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
|
||||
|
||||
// test for negative bytes in input string of a given size
|
||||
// result 1 if found, 0 otherwise.
|
||||
|
||||
Label Lcore, Ltail, Lreturn, Lcore_rpt;
|
||||
|
||||
assert_different_registers(inp, size, t2, t3, t4, t5, result);
|
||||
|
||||
Register i = result; // result used as integer index i until very end
|
||||
Register lmask = t2; // t2 is aliased to lmask
|
||||
|
||||
// INITIALIZATION
|
||||
// ===========================================================
|
||||
// initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b)
|
||||
// compute unaligned offset -> i
|
||||
// compute core end index -> t5
|
||||
Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal
|
||||
add(t2, 0x80, t2);
|
||||
sllx(t2, 32, t3);
|
||||
or3(t3, t2, lmask); // 0x8080808080808080 -> lmask
|
||||
sra(size,0,size);
|
||||
andcc(inp, 0x7, i); // unaligned offset -> i
|
||||
br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
|
||||
delayed()->add(size, -8, t5); // (annuled) core end index -> t5
|
||||
|
||||
// ===========================================================
|
||||
|
||||
// UNALIGNED HEAD
|
||||
// ===========================================================
|
||||
// * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
|
||||
// * obliterate (ignore) bytes outside string by shifting off reg ends
|
||||
// * compare with bitmask, short circuit return true if one or more high
|
||||
// bits set.
|
||||
cmp(size, 0);
|
||||
br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
|
||||
delayed()->mov(0,result); // annuled so i not clobbered for following
|
||||
neg(i, t4);
|
||||
add(i, size, t5);
|
||||
ldx(inp, t4, t3); // raw aligned 8B containing unaligned head -> t3
|
||||
mov(8, t4);
|
||||
sub(t4, t5, t4);
|
||||
sra(t4, 31, t5);
|
||||
andn(t4, t5, t5);
|
||||
add(i, t5, t4);
|
||||
sll(t5, 3, t5);
|
||||
sll(t4, 3, t4); // # bits to shift right, left -> t5,t4
|
||||
srlx(t3, t5, t3);
|
||||
sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
|
||||
andcc(lmask, t3, G0);
|
||||
brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
|
||||
delayed()->mov(1,result); // annuled so i not clobbered for following
|
||||
add(size, -8, t5); // core end index -> t5
|
||||
mov(8, t4);
|
||||
sub(t4, i, i); // # bytes examined in unalgn head (<8) -> i
|
||||
// ===========================================================
|
||||
|
||||
// ALIGNED CORE
|
||||
// ===========================================================
|
||||
// * iterate index i over aligned 8B sections of core, comparing with
|
||||
// bitmask, short circuit return true if one or more high bits set
|
||||
// t5 contains core end index/loop limit which is the index
|
||||
// of the MSB of last (unaligned) 8B fully contained in the string.
|
||||
// inp contains address of first byte in string/array
|
||||
// lmask contains 8B high bit mask for comparison
|
||||
// i contains next index to be processed (adr. inp+i is on 8B boundary)
|
||||
bind(Lcore);
|
||||
cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
|
||||
bind(Lcore_rpt);
|
||||
ldx(inp, i, t3);
|
||||
andcc(t3, lmask, G0);
|
||||
brx(Assembler::notZero, true, Assembler::pn, Lreturn);
|
||||
delayed()->mov(1, result); // annuled so i not clobbered for following
|
||||
add(i, 8, i);
|
||||
cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
|
||||
// ===========================================================
|
||||
|
||||
// ALIGNED TAIL (<8B)
|
||||
// ===========================================================
|
||||
// handle aligned tail of 7B or less as complete 8B, obliterating end of
|
||||
// string bytes by shifting them off end, compare what's left with bitmask
|
||||
// inp contains address of first byte in string/array
|
||||
// lmask contains 8B high bit mask for comparison
|
||||
// i contains next index to be processed (adr. inp+i is on 8B boundary)
|
||||
bind(Ltail);
|
||||
subcc(size, i, t4); // # of remaining bytes in string -> t4
|
||||
// return 0 if no more remaining bytes
|
||||
br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
|
||||
delayed()->mov(0, result); // annuled so i not clobbered for following
|
||||
ldx(inp, i, t3); // load final 8B (aligned) containing tail -> t3
|
||||
mov(8, t5);
|
||||
sub(t5, t4, t4);
|
||||
mov(0, result); // ** i clobbered at this point
|
||||
sll(t4, 3, t4); // bits beyond end of string -> t4
|
||||
srlx(t3, t4, t3); // bytes beyond end now obliterated -> t3
|
||||
andcc(lmask, t3, G0);
|
||||
movcc(Assembler::notZero, false, xcc, 1, result);
|
||||
bind(Lreturn);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Use BIS for zeroing (count is in bytes).
|
||||
void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
|
||||
assert(UseBlockZeroing && VM_Version::has_blk_zeroing(), "only works with BIS zeroing");
|
||||
|
||||
@ -1301,36 +1301,6 @@ public:
|
||||
void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
|
||||
void inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2);
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
|
||||
void string_compress_16(Register src, Register dst, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
|
||||
|
||||
// Compress char[] to byte[]. Return 0 on failure.
|
||||
void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
|
||||
|
||||
// Inflate byte[] to char[] by inflating 16 bytes at once.
|
||||
void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
|
||||
FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
|
||||
|
||||
// Inflate byte[] to char[].
|
||||
void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2,
|
||||
Register result, int ae);
|
||||
|
||||
void array_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register tmp, Register result, bool is_byte);
|
||||
// test for negative bytes in input string of a given size, result 0 if none
|
||||
void has_negatives(Register inp, Register size, Register result,
|
||||
Register t2, Register t3, Register t4,
|
||||
Register t5);
|
||||
|
||||
#endif
|
||||
|
||||
// Use BIS for zeroing
|
||||
void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
|
||||
|
||||
|
||||
@ -592,7 +592,7 @@ bool SafePointNode::needs_polling_address_input() {
|
||||
|
||||
// emit an interrupt that is caught by the debugger (for debugging compiler)
|
||||
void emit_break(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ breakpoint_trap();
|
||||
}
|
||||
|
||||
@ -612,7 +612,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
|
||||
|
||||
// Traceable jump
|
||||
void emit_jmpl(CodeBuffer &cbuf, int jump_target) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register rdest = reg_to_register_object(jump_target);
|
||||
__ JMP(rdest, 0);
|
||||
__ delayed()->nop();
|
||||
@ -620,19 +620,19 @@ void emit_jmpl(CodeBuffer &cbuf, int jump_target) {
|
||||
|
||||
// Traceable jump and set exception pc
|
||||
void emit_jmpl_set_exception_pc(CodeBuffer &cbuf, int jump_target) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register rdest = reg_to_register_object(jump_target);
|
||||
__ JMP(rdest, 0);
|
||||
__ delayed()->add(O7, frame::pc_return_offset, Oissuing_pc );
|
||||
}
|
||||
|
||||
void emit_nop(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ nop();
|
||||
}
|
||||
|
||||
void emit_illtrap(CodeBuffer &cbuf) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ illtrap(0);
|
||||
}
|
||||
|
||||
@ -908,7 +908,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
|
||||
disp += STACK_BIAS;
|
||||
// Check that stack offset fits, load into O7 if not
|
||||
if (!Assembler::is_simm13(disp)) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ set(disp, O7);
|
||||
if (index != R_G0_enc) {
|
||||
__ add(O7, reg_to_register_object(index), O7);
|
||||
@ -932,7 +932,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
|
||||
|
||||
#ifdef ASSERT
|
||||
if (VerifyOops) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
if (is_verified_oop_base) {
|
||||
__ verify_oop(reg_to_register_object(src1_enc));
|
||||
}
|
||||
@ -960,7 +960,7 @@ void emit_call_reloc(CodeBuffer &cbuf, intptr_t entry_point, RelocationHolder co
|
||||
// putting the "mov" instruction in the delay slot but the problem
|
||||
// may bite us again at some other point and a cleaner/generic
|
||||
// solution using relocations would be needed.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ set_inst_mark();
|
||||
|
||||
// We flush the current window just so that there is a valid stack copy
|
||||
@ -1024,7 +1024,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
|
||||
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
ConstantTable& constant_table = C->output()->constant_table();
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register r = as_Register(ra_->get_encode(this));
|
||||
CodeSection* consts_section = __ code()->consts();
|
||||
@ -1153,7 +1153,7 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
|
||||
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
for (int i = 0; i < OptoPrologueNops; i++) {
|
||||
__ nop();
|
||||
@ -1226,7 +1226,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Compile* C = ra_->C;
|
||||
|
||||
__ verify_thread();
|
||||
@ -1534,7 +1534,7 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const {
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
for (int i = 0; i < _count; i += 1) {
|
||||
__ nop();
|
||||
}
|
||||
@ -1555,7 +1555,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()) + STACK_BIAS;
|
||||
int reg = ra_->get_encode(this);
|
||||
|
||||
@ -1599,7 +1599,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
|
||||
#endif
|
||||
|
||||
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
|
||||
Register temp_reg = G3;
|
||||
assert( G5_ic_reg != temp_reg, "conflicting registers" );
|
||||
@ -1624,7 +1624,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
|
||||
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
|
||||
Register temp_reg = G3;
|
||||
AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point());
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_exception_handler());
|
||||
if (base == NULL) {
|
||||
@ -1649,7 +1649,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
|
||||
// at a poll and everything (including G3) can be live.
|
||||
Register temp_reg = L0;
|
||||
AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack());
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
address base = __ start_a_stub(size_deopt_handler());
|
||||
if (base == NULL) {
|
||||
@ -2007,7 +2007,7 @@ void Compile::reshape_address(AddPNode* addp) {
|
||||
encode %{
|
||||
enc_class enc_untested %{
|
||||
#ifdef ASSERT
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ untested("encoding");
|
||||
#endif
|
||||
%}
|
||||
@ -2142,7 +2142,7 @@ encode %{
|
||||
|
||||
/* %%% merge with enc_to_bool */
|
||||
enc_class enc_convP2B( iRegI dst, iRegP src ) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register src_reg = reg_to_register_object($src$$reg);
|
||||
Register dst_reg = reg_to_register_object($dst$$reg);
|
||||
@ -2151,7 +2151,7 @@ encode %{
|
||||
|
||||
enc_class enc_cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp ) %{
|
||||
// (Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)))
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register p_reg = reg_to_register_object($p$$reg);
|
||||
Register q_reg = reg_to_register_object($q$$reg);
|
||||
@ -2284,13 +2284,13 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class enc_PartialSubtypeCheck() %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type);
|
||||
__ delayed()->nop();
|
||||
%}
|
||||
|
||||
enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* L = $labl$$label;
|
||||
Assembler::Predict predict_taken =
|
||||
cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
|
||||
@ -2300,7 +2300,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label* L = $labl$$label;
|
||||
Assembler::Predict predict_taken =
|
||||
cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
|
||||
@ -2419,13 +2419,13 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class Set32( immI src, iRegI rd ) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ set($src$$constant, reg_to_register_object($rd$$reg));
|
||||
%}
|
||||
|
||||
enc_class call_epilog %{
|
||||
if( VerifyStackAtCalls ) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
int framesize = ra_->C->output()->frame_size_in_bytes();
|
||||
Register temp_reg = G3;
|
||||
__ add(SP, framesize, temp_reg);
|
||||
@ -2447,12 +2447,12 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class preserve_SP %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(SP, L7_mh_SP_save);
|
||||
%}
|
||||
|
||||
enc_class restore_SP %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(L7_mh_SP_save, SP);
|
||||
%}
|
||||
|
||||
@ -2477,7 +2477,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ set_inst_mark();
|
||||
int vtable_index = this->_vtable_index;
|
||||
// MachCallDynamicJavaNode::ret_addr_offset uses this same test
|
||||
@ -2526,7 +2526,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
|
||||
Register temp_reg = G3; // caller must kill G3! We cannot reuse G5_ic_reg here because
|
||||
@ -2543,7 +2543,7 @@ encode %{
|
||||
%}
|
||||
|
||||
enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdividend = reg_to_register_object($src1$$reg);
|
||||
Register Rdivisor = reg_to_register_object($src2$$reg);
|
||||
Register Rresult = reg_to_register_object($dst$$reg);
|
||||
@ -2554,7 +2554,7 @@ enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
|
||||
%}
|
||||
|
||||
enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register Rdividend = reg_to_register_object($src1$$reg);
|
||||
int divisor = $imm$$constant;
|
||||
@ -2565,7 +2565,7 @@ enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
|
||||
%}
|
||||
|
||||
enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rsrc1 = reg_to_register_object($src1$$reg);
|
||||
Register Rsrc2 = reg_to_register_object($src2$$reg);
|
||||
Register Rdst = reg_to_register_object($dst$$reg);
|
||||
@ -2577,7 +2577,7 @@ enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
|
||||
%}
|
||||
|
||||
enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdividend = reg_to_register_object($src1$$reg);
|
||||
Register Rdivisor = reg_to_register_object($src2$$reg);
|
||||
Register Rresult = reg_to_register_object($dst$$reg);
|
||||
@ -2594,7 +2594,7 @@ enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch
|
||||
%}
|
||||
|
||||
enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register Rdividend = reg_to_register_object($src1$$reg);
|
||||
int divisor = $imm$$constant;
|
||||
@ -2610,7 +2610,7 @@ enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
|
||||
%}
|
||||
|
||||
enc_class fabss (sflt_reg dst, sflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
|
||||
@ -2619,7 +2619,7 @@ enc_class fabss (sflt_reg dst, sflt_reg src) %{
|
||||
%}
|
||||
|
||||
enc_class fabsd (dflt_reg dst, dflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
|
||||
@ -2628,7 +2628,7 @@ enc_class fabsd (dflt_reg dst, dflt_reg src) %{
|
||||
%}
|
||||
|
||||
enc_class fnegd (dflt_reg dst, dflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
|
||||
@ -2637,7 +2637,7 @@ enc_class fnegd (dflt_reg dst, dflt_reg src) %{
|
||||
%}
|
||||
|
||||
enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
|
||||
@ -2646,7 +2646,7 @@ enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
|
||||
%}
|
||||
|
||||
enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
|
||||
@ -2656,7 +2656,7 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
|
||||
|
||||
|
||||
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
@ -2667,7 +2667,7 @@ enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
@ -2678,7 +2678,7 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
@ -2689,7 +2689,7 @@ enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
@ -2700,7 +2700,7 @@ enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
@ -2711,7 +2711,7 @@ enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
@ -2722,7 +2722,7 @@ enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
@ -2733,7 +2733,7 @@ enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
%}
|
||||
|
||||
enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
@ -2745,7 +2745,7 @@ enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
|
||||
|
||||
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);
|
||||
@ -2754,7 +2754,7 @@ enc_class fmovs (dflt_reg dst, dflt_reg src) %{
|
||||
%}
|
||||
|
||||
enc_class fmovd (dflt_reg dst, dflt_reg src) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);
|
||||
@ -2763,7 +2763,7 @@ enc_class fmovd (dflt_reg dst, dflt_reg src) %{
|
||||
%}
|
||||
|
||||
enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register Roop = reg_to_register_object($oop$$reg);
|
||||
Register Rbox = reg_to_register_object($box$$reg);
|
||||
@ -2779,7 +2779,7 @@ enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
%}
|
||||
|
||||
enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
|
||||
Register Roop = reg_to_register_object($oop$$reg);
|
||||
Register Rbox = reg_to_register_object($box$$reg);
|
||||
@ -2795,7 +2795,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
%}
|
||||
|
||||
enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rmem = reg_to_register_object($mem$$reg);
|
||||
Register Rold = reg_to_register_object($old$$reg);
|
||||
Register Rnew = reg_to_register_object($new$$reg);
|
||||
@ -2809,7 +2809,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
Register Rold = reg_to_register_object($old$$reg);
|
||||
Register Rnew = reg_to_register_object($new$$reg);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(Rnew, O7);
|
||||
__ casx(Rmem, Rold, O7);
|
||||
__ cmp( Rold, O7 );
|
||||
@ -2821,7 +2821,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
Register Rold = reg_to_register_object($old$$reg);
|
||||
Register Rnew = reg_to_register_object($new$$reg);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(Rnew, O7);
|
||||
__ cas(Rmem, Rold, O7);
|
||||
__ cmp( Rold, O7 );
|
||||
@ -2833,7 +2833,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
Register Rold = reg_to_register_object($old$$reg);
|
||||
Register Rnew = reg_to_register_object($new$$reg);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ cas(Rmem, Rold, Rnew);
|
||||
%}
|
||||
|
||||
@ -2843,14 +2843,14 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
Register Rold = reg_to_register_object($old$$reg);
|
||||
Register Rnew = reg_to_register_object($new$$reg);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ casx(Rmem, Rold, Rnew);
|
||||
%}
|
||||
|
||||
enc_class enc_lflags_ne_to_boolean( iRegI res ) %{
|
||||
Register Rres = reg_to_register_object($res$$reg);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(1, Rres);
|
||||
__ movcc( Assembler::notEqual, false, Assembler::xcc, G0, Rres );
|
||||
%}
|
||||
@ -2858,13 +2858,13 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
enc_class enc_iflags_ne_to_boolean( iRegI res ) %{
|
||||
Register Rres = reg_to_register_object($res$$reg);
|
||||
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ mov(1, Rres);
|
||||
__ movcc( Assembler::notEqual, false, Assembler::icc, G0, Rres );
|
||||
%}
|
||||
|
||||
enc_class floating_cmp ( iRegP dst, regF src1, regF src2 ) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Register Rdst = reg_to_register_object($dst$$reg);
|
||||
FloatRegister Fsrc1 = $primary ? reg_to_SingleFloatRegister_object($src1$$reg)
|
||||
: reg_to_DoubleFloatRegister_object($src1$$reg);
|
||||
@ -2880,7 +2880,7 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
Register temp_reg = G3;
|
||||
AddressLiteral rethrow_stub(OptoRuntime::rethrow_stub());
|
||||
assert(temp_reg != reg_to_register_object(R_I0_num), "temp must not break oop_reg");
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
#ifdef ASSERT
|
||||
__ save_frame(0);
|
||||
AddressLiteral last_rethrow_addrlit(&last_rethrow);
|
||||
@ -2911,17 +2911,17 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
||||
%}
|
||||
|
||||
enc_class enc_membar_acquire %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::LoadLoad) );
|
||||
%}
|
||||
|
||||
enc_class enc_membar_release %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore) );
|
||||
%}
|
||||
|
||||
enc_class enc_membar_volatile %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
|
||||
%}
|
||||
|
||||
|
||||
2594
src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Normal file
2594
src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Normal file
File diff suppressed because it is too large
Load Diff
166
src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Normal file
166
src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Normal file
@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
#define CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
// special instructions for EVEX
|
||||
void setvectmask(Register dst, Register src);
|
||||
void restorevectmask();
|
||||
|
||||
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
|
||||
// See full desription in macroAssembler_x86.cpp.
|
||||
void fast_lock(Register obj, Register box, Register tmp,
|
||||
Register scr, Register cx1, Register cx2,
|
||||
BiasedLockingCounters* counters,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data,
|
||||
bool use_rtm, bool profile_rtm);
|
||||
void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
void rtm_counters_update(Register abort_status, Register rtm_counters);
|
||||
void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
|
||||
void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data);
|
||||
void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
|
||||
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
|
||||
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
|
||||
void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
|
||||
void rtm_stack_locking(Register obj, Register tmp, Register scr,
|
||||
Register retry_on_abort_count,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL, Label& IsInflated);
|
||||
void rtm_inflated_locking(Register obj, Register box, Register tmp,
|
||||
Register scr, Register retry_on_busy_count,
|
||||
Register retry_on_abort_count,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL);
|
||||
#endif
|
||||
|
||||
// Generic instructions support for use in .ad files C2 code generation
|
||||
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
|
||||
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Reductions for vectors of ints, longs, floats, and doubles.
|
||||
|
||||
// dst = src1 + reduce(op, src2) using vtmp as temps
|
||||
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#ifdef _LP64
|
||||
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#endif // _LP64
|
||||
|
||||
// dst = reduce(op, src2) using vtmp as temps
|
||||
void reduce_fp(int opcode, int vlen,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
|
||||
private:
|
||||
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
#ifdef _LP64
|
||||
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#endif // _LP64
|
||||
|
||||
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
public:
|
||||
|
||||
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
||||
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
|
||||
|
||||
// IndexOf strings.
|
||||
// Small strings are loaded through stack if they cross page boundary.
|
||||
void string_indexof(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
int int_cnt2, Register result,
|
||||
XMMRegister vec, Register tmp,
|
||||
int ae);
|
||||
|
||||
// IndexOf for constant substrings with size >= 8 elements
|
||||
// which don't need to be loaded through stack.
|
||||
void string_indexofC8(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
int int_cnt2, Register result,
|
||||
XMMRegister vec, Register tmp,
|
||||
int ae);
|
||||
|
||||
// Smallest code: we don't need to load through stack,
|
||||
// check string tail.
|
||||
|
||||
// helper function for string_compare
|
||||
void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
|
||||
Address::ScaleFactor scale, Address::ScaleFactor scale1,
|
||||
Address::ScaleFactor scale2, Register index, int ae);
|
||||
// Compare strings.
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
XMMRegister vec1, int ae);
|
||||
|
||||
// Search for Non-ASCII character (Negative byte value) in a byte array,
|
||||
// return true if it has any and false otherwise.
|
||||
void has_negatives(Register ary1, Register len,
|
||||
Register result, Register tmp1,
|
||||
XMMRegister vec1, XMMRegister vec2);
|
||||
|
||||
// Compare char[] or byte[] arrays.
|
||||
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register result, Register chr,
|
||||
XMMRegister vec1, XMMRegister vec2, bool is_char);
|
||||
|
||||
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
File diff suppressed because it is too large
Load Diff
@ -158,12 +158,6 @@ class MacroAssembler: public Assembler {
|
||||
void incrementq(Register reg, int value = 1);
|
||||
void incrementq(Address dst, int value = 1);
|
||||
|
||||
#ifdef COMPILER2
|
||||
// special instructions for EVEX
|
||||
void setvectmask(Register dst, Register src);
|
||||
void restorevectmask();
|
||||
#endif
|
||||
|
||||
// Support optimal SSE move instructions.
|
||||
void movflt(XMMRegister dst, XMMRegister src) {
|
||||
if (dst-> encoding() == src->encoding()) return;
|
||||
@ -681,40 +675,6 @@ class MacroAssembler: public Assembler {
|
||||
Label& done, Label* slow_case = NULL,
|
||||
BiasedLockingCounters* counters = NULL);
|
||||
void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
|
||||
#ifdef COMPILER2
|
||||
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
|
||||
// See full desription in macroAssembler_x86.cpp.
|
||||
void fast_lock(Register obj, Register box, Register tmp,
|
||||
Register scr, Register cx1, Register cx2,
|
||||
BiasedLockingCounters* counters,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data,
|
||||
bool use_rtm, bool profile_rtm);
|
||||
void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
|
||||
#if INCLUDE_RTM_OPT
|
||||
void rtm_counters_update(Register abort_status, Register rtm_counters);
|
||||
void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
|
||||
void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data);
|
||||
void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
|
||||
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
|
||||
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
|
||||
void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
|
||||
void rtm_stack_locking(Register obj, Register tmp, Register scr,
|
||||
Register retry_on_abort_count,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL, Label& IsInflated);
|
||||
void rtm_inflated_locking(Register obj, Register box, Register tmp,
|
||||
Register scr, Register retry_on_busy_count,
|
||||
Register retry_on_abort_count,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Condition negate_condition(Condition cond);
|
||||
|
||||
@ -1635,60 +1595,6 @@ public:
|
||||
void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
|
||||
void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Generic instructions support for use in .ad files C2 code generation
|
||||
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
|
||||
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Reductions for vectors of ints, longs, floats, and doubles.
|
||||
|
||||
// dst = src1 + reduce(op, src2) using vtmp as temps
|
||||
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#ifdef _LP64
|
||||
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#endif // _LP64
|
||||
|
||||
// dst = reduce(op, src2) using vtmp as temps
|
||||
void reduce_fp(int opcode, int vlen,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
|
||||
private:
|
||||
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
#ifdef _LP64
|
||||
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#endif // _LP64
|
||||
|
||||
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
#endif
|
||||
|
||||
public:
|
||||
// C2 compiled method's prolog code.
|
||||
@ -1701,51 +1607,6 @@ public:
|
||||
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
|
||||
void xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp);
|
||||
|
||||
#ifdef COMPILER2
|
||||
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
||||
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
|
||||
|
||||
// IndexOf strings.
|
||||
// Small strings are loaded through stack if they cross page boundary.
|
||||
void string_indexof(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
int int_cnt2, Register result,
|
||||
XMMRegister vec, Register tmp,
|
||||
int ae);
|
||||
|
||||
// IndexOf for constant substrings with size >= 8 elements
|
||||
// which don't need to be loaded through stack.
|
||||
void string_indexofC8(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2,
|
||||
int int_cnt2, Register result,
|
||||
XMMRegister vec, Register tmp,
|
||||
int ae);
|
||||
|
||||
// Smallest code: we don't need to load through stack,
|
||||
// check string tail.
|
||||
|
||||
// helper function for string_compare
|
||||
void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
|
||||
Address::ScaleFactor scale, Address::ScaleFactor scale1,
|
||||
Address::ScaleFactor scale2, Register index, int ae);
|
||||
// Compare strings.
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
XMMRegister vec1, int ae);
|
||||
|
||||
// Search for Non-ASCII character (Negative byte value) in a byte array,
|
||||
// return true if it has any and false otherwise.
|
||||
void has_negatives(Register ary1, Register len,
|
||||
Register result, Register tmp1,
|
||||
XMMRegister vec1, XMMRegister vec2);
|
||||
|
||||
// Compare char[] or byte[] arrays.
|
||||
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register result, Register chr,
|
||||
XMMRegister vec1, XMMRegister vec2, bool is_char);
|
||||
|
||||
#endif
|
||||
|
||||
// Fill primitive arrays
|
||||
void generate_fill(BasicType t, bool aligned,
|
||||
Register to, Register value, Register count,
|
||||
|
||||
@ -1177,7 +1177,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
|
||||
|
||||
// Note that the code buffer's insts_mark is always relative to insts.
|
||||
// That's why we must use the macroassembler to generate a handler.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address base = __ start_a_stub(size_exception_handler());
|
||||
if (base == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
@ -1195,7 +1195,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
|
||||
|
||||
// Note that the code buffer's insts_mark is always relative to insts.
|
||||
// That's why we must use the macroassembler to generate a handler.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
address base = __ start_a_stub(size_deopt_handler());
|
||||
if (base == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
@ -1716,7 +1716,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
|
||||
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
|
||||
"no non-adjacent vector moves" );
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
int offset = __ offset();
|
||||
switch (ireg) {
|
||||
case Op_VecS: // copy whole register
|
||||
@ -1782,7 +1782,7 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
// into scratch buffer is used to get size in 64-bit VM.
|
||||
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
C2_MacroAssembler _masm(cbuf);
|
||||
int offset = __ offset();
|
||||
if (is_load) {
|
||||
switch (ireg) {
|
||||
@ -1985,7 +1985,7 @@ static inline jlong replicate8_imm(int con, int width) {
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ nop(_count);
|
||||
}
|
||||
|
||||
@ -2000,7 +2000,7 @@ static inline jlong replicate8_imm(int con, int width) {
|
||||
#endif
|
||||
|
||||
void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
__ int3();
|
||||
}
|
||||
|
||||
@ -2016,7 +2016,7 @@ encode %{
|
||||
if (VerifyStackAtCalls) {
|
||||
// Check that stack depth is unchanged: find majik cookie on stack
|
||||
int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
|
||||
MacroAssembler _masm(&cbuf);
|
||||
C2_MacroAssembler _masm(&cbuf);
|
||||
Label L;
|
||||
__ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
|
||||
__ jccb(Assembler::equal, L);
|
||||
|
||||
@ -2881,7 +2881,7 @@ void ADLParser::ins_encode_parse_block(InstructForm& inst) {
|
||||
// name is chosen to match the __ idiom used for assembly in other
|
||||
// parts of hotspot and assumes the existence of the standard
|
||||
// #define __ _masm.
|
||||
encoding->add_code(" MacroAssembler _masm(&cbuf);\n");
|
||||
encoding->add_code(" C2_MacroAssembler _masm(&cbuf);\n");
|
||||
}
|
||||
|
||||
// Parse the following %{ }% block
|
||||
@ -3004,9 +3004,9 @@ void ADLParser::ins_encode_parse_block_impl(InstructForm& inst, EncClass* encodi
|
||||
// which synthesizes a new encoding class taking the same arguments as
|
||||
// the InstructForm, and automatically prefixes the definition with:
|
||||
//
|
||||
// MacroAssembler masm(&cbuf);\n");
|
||||
// C2_MacroAssembler masm(&cbuf);\n");
|
||||
//
|
||||
// making it more compact to take advantage of the MacroAssembler and
|
||||
// making it more compact to take advantage of the C2_MacroAssembler and
|
||||
// placing the assembly closer to it's use by instructions.
|
||||
void ADLParser::ins_encode_parse(InstructForm& inst) {
|
||||
|
||||
|
||||
@ -211,7 +211,6 @@ int main(int argc, char *argv[])
|
||||
AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._VM_file._name));
|
||||
AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._HPP_file._name));
|
||||
AD.addInclude(AD._CPP_file, "memory/allocation.inline.hpp");
|
||||
AD.addInclude(AD._CPP_file, "asm/macroAssembler.inline.hpp");
|
||||
AD.addInclude(AD._CPP_file, "code/compiledIC.hpp");
|
||||
AD.addInclude(AD._CPP_file, "code/nativeInst.hpp");
|
||||
AD.addInclude(AD._CPP_file, "code/vmreg.inline.hpp");
|
||||
@ -221,6 +220,7 @@ int main(int argc, char *argv[])
|
||||
AD.addInclude(AD._CPP_file, "oops/markWord.hpp");
|
||||
AD.addInclude(AD._CPP_file, "oops/method.hpp");
|
||||
AD.addInclude(AD._CPP_file, "oops/oop.inline.hpp");
|
||||
AD.addInclude(AD._CPP_file, "opto/c2_MacroAssembler.hpp");
|
||||
AD.addInclude(AD._CPP_file, "opto/cfgnode.hpp");
|
||||
AD.addInclude(AD._CPP_file, "opto/intrinsicnode.hpp");
|
||||
AD.addInclude(AD._CPP_file, "opto/locknode.hpp");
|
||||
|
||||
41
src/hotspot/share/opto/c2_MacroAssembler.hpp
Normal file
41
src/hotspot/share/opto/c2_MacroAssembler.hpp
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_OPTO_C2_MACROASSEMBLER_HPP
|
||||
#define SHARE_OPTO_C2_MACROASSEMBLER_HPP
|
||||
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "asm/macroAssembler.inline.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
class C2_MacroAssembler: public MacroAssembler {
|
||||
public:
|
||||
// creation
|
||||
C2_MacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
|
||||
|
||||
#include CPU_HEADER(c2_MacroAssembler)
|
||||
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_C2_MACROASSEMBLER_HPP
|
||||
Loading…
x
Reference in New Issue
Block a user