8207343: Automate vtable/itable stub size calculation

Reviewed-by: kvn, mdoerr
This commit is contained in:
Lutz Schmidt 2018-09-03 09:43:08 +02:00
parent d08c6c824d
commit f36874335d
10 changed files with 717 additions and 679 deletions

View File

@ -44,24 +44,30 @@
#define __ masm->
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread,
oop receiver,
int index);
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
#endif
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const int aarch64_code_length = VtableStub::pd_code_size_limit(true);
VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
ResourceMark rm;
CodeBuffer cb(s->entry_point(), aarch64_code_length);
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ lea(r16, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
__ incrementw(Address(r16));
@ -78,21 +84,35 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
#ifndef PRODUCT
if (DebugVtables) {
Label L;
// TODO: find upper bound for this debug code.
start_pc = __ pc();
// check offset vs vtable length
__ ldrw(rscratch1, Address(r16, Klass::vtable_length_offset()));
__ cmpw(rscratch1, vtable_index * vtableEntry::size());
__ br(Assembler::GT, L);
__ enter();
__ mov(r2, vtable_index);
__ call_VM(noreg,
CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
const ptrdiff_t estimate = 256;
const ptrdiff_t codesize = __ pc() - start_pc;
slop_delta = estimate - codesize; // call_VM varies in length, depending on data
slop_bytes += slop_delta;
assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
__ leave();
__ bind(L);
}
#endif // PRODUCT
start_pc = __ pc();
__ lookup_virtual_method(r16, vtable_index, rmethod);
slop_delta = 8 - (int)(__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cbz(rmethod, L);
@ -101,6 +121,8 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
__ stop("Vtable entry is NULL");
__ bind(L);
}
#endif // PRODUCT
// r0: receiver klass
// rmethod: Method*
// r2: receiver
@ -108,43 +130,46 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
__ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
__ br(rscratch1);
__ flush();
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
vtable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// Note well: pd_code_size_limit is the absolute minimum we can get
// away with. If you add code here, bump the code stub size
// returned by pd_code_size_limit!
const int code_length = VtableStub::pd_code_size_limit(false);
VtableStub* s = new(code_length) VtableStub(false, itable_index);
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
__ incrementw(Address(r10));
}
#endif
// get receiver (need to skip return address on top of stack)
assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
// Entry arguments:
// rscratch2: CompiledICHolder
// j_rarg0: Receiver
// Most registers are in use; we'll use r16, rmethod, r10, r11
const Register recv_klass_reg = r10;
const Register holder_klass_reg = r16; // declaring interface klass (DECC)
@ -157,8 +182,8 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
__ ldr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ ldr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
// get receiver (need to skip return address on top of stack)
assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
start_pc = __ pc();
// get receiver klass (also an implicit null-check)
address npe_addr = __ pc();
__ load_klass(recv_klass_reg, j_rarg0);
@ -172,16 +197,25 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
L_no_such_interface,
/*return_method=*/false);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get selected method from declaring class and itable index
__ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg
__ lookup_interface_method(// inputs: rec. class, interface, itable index
recv_klass_reg, holder_klass_reg, itable_index,
// outputs: method, scan temp. reg
rmethod, temp_reg,
L_no_such_interface);
recv_klass_reg, holder_klass_reg, itable_index,
// outputs: method, scan temp. reg
rmethod, temp_reg,
L_no_such_interface);
// method (rmethod): Method*
// j_rarg0: receiver
const ptrdiff_t lookupSize = __ pc() - start_pc;
// Reduce "estimate" such that "padding" does not drop below 8.
const ptrdiff_t estimate = 152;
const ptrdiff_t codesize = typecheckSize + lookupSize;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
#ifdef ASSERT
if (DebugVtables) {
@ -206,92 +240,17 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// We force resolving of the call site by jumping to the "handle
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
__ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
__ flush();
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
itable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
int size = DebugVtables ? 216 : 0;
if (CountCompiledCalls)
size += 6 * 4;
// FIXME: vtable stubs only need 36 bytes
if (is_vtable_stub)
size += 52;
else
size += 176;
return size;
// In order to tune these parameters, run the JVM with VM options
// +PrintMiscellaneous and +WizardMode to see information about
// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
//
// If Universe::narrow_klass_base is nonzero, decoding a compressed
// class can take zeveral instructions.
//
// The JVM98 app. _202_jess has a megamorphic interface call.
// The itable code looks like this:
// ldr xmethod, [xscratch2,#CompiledICHolder::holder_klass_offset]
// ldr x0, [xscratch2]
// ldr w10, [x1,#oopDesc::klass_offset_in_bytes]
// mov xheapbase, #0x3c000000 // #narrow_klass_base
// movk xheapbase, #0x3f7, lsl #32
// add x10, xheapbase, x10
// mov xheapbase, #0xe7ff0000 // #heapbase
// movk xheapbase, #0x3f7, lsl #32
// ldr w11, [x10,#vtable_length_offset]
// add x11, x10, x11, uxtx #3
// add x11, x11, #itableMethodEntry::method_offset_in_bytes
// ldr x10, [x11]
// cmp xmethod, x10
// b.eq found_method
// search:
// cbz x10, no_such_interface
// add x11, x11, #0x10
// ldr x10, [x11]
// cmp xmethod, x10
// b.ne search
// found_method:
// ldr w10, [x1,#oopDesc::klass_offset_in_bytes]
// mov xheapbase, #0x3c000000 // #narrow_klass_base
// movk xheapbase, #0x3f7, lsl #32
// add x10, xheapbase, x10
// mov xheapbase, #0xe7ff0000 // #heapbase
// movk xheapbase, #0x3f7, lsl #32
// ldr w11, [x10,#vtable_length_offset]
// add x11, x10, x11, uxtx #3
// add x11, x11, #itableMethodEntry::method_offset_in_bytes
// add x10, x10, #itentry_off
// ldr xmethod, [x11]
// cmp x0, xmethod
// b.eq found_method2
// search2:
// cbz xmethod, 0x000003ffa872e6cc
// add x11, x11, #0x10
// ldr xmethod, [x11]
// cmp x0, xmethod
// b.ne search2
// found_method2:
// ldr w11, [x11,#itableOffsetEntry::offset_offset_in_bytes]
// ldr xmethod, [x10,w11,uxtw]
// ldr xscratch1, [xmethod,#Method::from_compiled_offset]
// br xscratch1
// no_such_interface:
// b throw_ICCE_entry
int VtableStub::pd_code_alignment() {
// aarch64 cache line size is not an architected constant. We just align on 4 bytes (instruction size).
const unsigned int icache_line_size = 4;
return icache_line_size;
}
int VtableStub::pd_code_alignment() { return 4; }

View File

@ -48,17 +48,31 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int
#endif
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const int code_length = VtableStub::pd_code_size_limit(true);
VtableStub* s = new(code_length) VtableStub(true, vtable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
// Implementation required?
}
#endif
assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
const Register tmp = Rtemp; // Rtemp OK, should be free at call sites
@ -66,17 +80,33 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
address npe_addr = __ pc();
__ load_klass(tmp, R0);
{
int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
#ifndef PRODUCT
if (DebugVtables) {
// Implementation required?
}
#endif
assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
if (method_offset & ~offset_mask) {
__ add(tmp, tmp, method_offset & ~offset_mask);
start_pc = __ pc();
{ // lookup virtual method
int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
if (method_offset & ~offset_mask) {
__ add(tmp, tmp, method_offset & ~offset_mask);
}
__ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
}
__ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
slop_delta = 8 - (int)(__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
// Implementation required?
}
#endif
address ame_addr = __ pc();
#ifdef AARCH64
@ -87,35 +117,36 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
#endif // AARCH64
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
vtable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// FIXME ARM: need correct 'slop' - below is x86 code
// shut the door on sizing bugs
//int slop = 8; // 32-bit offset is this much larger than a 13-bit one
//assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
const int code_length = VtableStub::pd_code_size_limit(false);
VtableStub* s = new(code_length) VtableStub(false, itable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
// Implementation required?
}
#endif
assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
// R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled
@ -123,15 +154,16 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
const Register Rintf = AARCH64_ONLY(R10) NOT_AARCH64(R5);
const Register Rscan = AARCH64_ONLY(R11) NOT_AARCH64(R6);
Label L_no_such_interface;
assert_different_registers(Ricklass, Rclass, Rintf, Rscan, Rtemp);
// Calculate the start of itable (itable goes after vtable)
const int scale = exact_log2(vtableEntry::size_in_bytes());
start_pc = __ pc();
// get receiver klass (also an implicit null-check)
address npe_addr = __ pc();
__ load_klass(Rclass, R0);
Label L_no_such_interface;
// Receiver subtype check against REFC.
__ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@ -140,6 +172,9 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
noreg, Rscan, Rtemp,
L_no_such_interface);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get Method* and entry point for compiler
__ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@ -148,6 +183,21 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
Rmethod, Rscan, Rtemp,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
// Reduce "estimate" such that "padding" does not drop below 8.
const ptrdiff_t estimate = 140;
const ptrdiff_t codesize = typecheckSize + lookupSize;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
#ifndef PRODUCT
if (DebugVtables) {
// Implementation required?
}
#endif
address ame_addr = __ pc();
#ifdef AARCH64
@ -158,7 +208,6 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
#endif // AARCH64
__ bind(L_no_such_interface);
// Handle IncompatibleClassChangeError in itable stubs.
// More detailed error message.
// We force resolving of the call site by jumping to the "handle
@ -168,43 +217,13 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
__ jump(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type, Rtemp);
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
itable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// FIXME ARM: need correct 'slop' - below is x86 code
// shut the door on sizing bugs
//int slop = 8; // 32-bit offset is this much larger than a 13-bit one
//assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
int instr_count;
if (is_vtable_stub) {
// vtable stub size
instr_count = NOT_AARCH64(4) AARCH64_ONLY(5);
} else {
// itable stub size
instr_count = NOT_AARCH64(31) AARCH64_ONLY(31);
}
#ifdef AARCH64
if (UseCompressedClassPointers) {
instr_count += MacroAssembler::instr_count_for_decode_klass_not_null();
}
#endif // AARCH64
return instr_count * Assembler::InstructionSize;
}
int VtableStub::pd_code_alignment() {
return 8;
// ARM32 cache line size is not an architected constant. We just align on word size.
const unsigned int icache_line_size = wordSize;
return icache_line_size;
}

View File

@ -39,36 +39,39 @@
#define __ masm->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
#else
#define BLOCK_COMMENT(str) __ block_comment(str)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
#endif
// Used by compiler only; may use only caller saved, non-argument
// registers.
// Used by compiler only; may use only caller saved, non-argument registers.
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
// PPC port: use fixed size.
const int code_length = VtableStub::pd_code_size_limit(true);
VtableStub* s = new (code_length) VtableStub(true, vtable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 8; // just a two-instruction safety net
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
start_pc = __ pc();
int load_const_maxLen = 5*BytesPerInstWord; // load_const generates 5 instructions. Assume that as max size for laod_const_optimized
int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true);
slop_delta = load_const_maxLen - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ lwz(R12_scratch2, offs, R11_scratch1);
__ addi(R12_scratch2, R12_scratch2, 1);
__ stw(R12_scratch2, offs, R11_scratch1);
@ -77,17 +80,13 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1");
// Get receiver klass.
const Register rcvr_klass = R11_scratch1;
// We might implicit NULL fault here.
address npe_addr = __ pc(); // npe = null pointer exception
// check if we must do an explicit check (implicit checks disabled, offset too large).
__ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
// Get receiver klass.
__ load_klass(rcvr_klass, R3);
// Set method (in case of interpreted method), and destination address.
int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index*vtableEntry::size_in_bytes();
#ifndef PRODUCT
if (DebugVtables) {
Label L;
@ -102,7 +101,9 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
}
#endif
int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
int entry_offset = in_bytes(Klass::vtable_start_offset()) +
vtable_index*vtableEntry::size_in_bytes();
int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
__ ld(R19_method, (RegisterOrConstant)v_off, rcvr_klass);
@ -116,40 +117,48 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
}
#endif
// If the vtable entry is null, the method is abstract.
address ame_addr = __ pc(); // ame = abstract method error
// if the vtable entry is null, the method is abstract
// NOTE: for vtable dispatches, the vtable entry will never be null.
__ null_check(R19_method, in_bytes(Method::from_compiled_offset()), /*implicit only*/NULL);
__ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
__ mtctr(R12_scratch2);
__ bctr();
masm->flush();
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
s->set_exception_points(npe_addr, ame_addr);
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// PPC port: use fixed size.
const int code_length = VtableStub::pd_code_size_limit(false);
VtableStub* s = new (code_length) VtableStub(false, itable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 8; // just a two-instruction safety net
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
address start_pc;
int load_const_maxLen = 5*BytesPerInstWord; // load_const generates 5 instructions. Assume that as max size for laod_const_optimized
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
start_pc = __ pc();
int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true);
slop_delta = load_const_maxLen - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ lwz(R12_scratch2, offs, R11_scratch1);
__ addi(R12_scratch2, R12_scratch2, 1);
__ stw(R12_scratch2, offs, R11_scratch1);
@ -209,33 +218,22 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
__ bind(L_no_such_interface);
start_pc = __ pc();
__ load_const_optimized(R11_scratch1, SharedRuntime::get_handle_wrong_method_stub(), R12_scratch2);
slop_delta = load_const_maxLen - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ mtctr(R11_scratch1);
__ bctr();
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
if (DebugVtables || CountCompiledCalls || VerifyOops) {
return 1000;
}
int size = is_vtable_stub ? 20 + 8 : 164 + 20; // Plain + safety
if (UseCompressedClassPointers) {
size += MacroAssembler::instr_size_for_decode_klass_not_null();
}
if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
size += is_vtable_stub ? 8 : 12;
}
return size;
}
int VtableStub::pd_code_alignment() {
// Power cache line size is 128 bytes, but we want to limit alignment loss.
const unsigned int icache_line_size = 32;
return icache_line_size;
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -37,9 +37,6 @@
#include "opto/runtime.hpp"
#endif
// Machine-dependent part of VtableStubs: create vtableStub of correct
// size and initialize its code.
#define __ masm->
#ifndef PRODUCT
@ -48,123 +45,140 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int
// Used by compiler only; may use only caller saved, non-argument registers.
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const int code_length = VtableStub::pd_code_size_limit(true);
VtableStub *s = new(code_length) VtableStub(true, vtable_index);
if (s == NULL) { // Indicates OOM In the code cache.
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
MacroAssembler *masm = new MacroAssembler(&cb);
int padding_bytes = 0;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
// Count unused bytes
// worst case actual size
padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
// worst case actual size
slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
// Use generic emitter for direct memory increment.
// Abuse Z_method as scratch register for generic emitter.
// It is loaded further down anyway before it is first used.
// No dynamic code size variance here, increment is 1, always.
__ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
}
#endif
assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
// Get receiver klass.
// Must do an explicit check if implicit checks are disabled.
address npe_addr = __ pc(); // npe == NULL ptr exception
const Register rcvr_klass = Z_R1_scratch;
address npe_addr = __ pc(); // npe == NULL ptr exception
// check if we must do an explicit check (implicit checks disabled, offset too large).
__ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
const Register rcvr_klass = Z_R1_scratch;
// Get receiver klass.
__ load_klass(rcvr_klass, Z_ARG1);
// Set method (in case of interpreted method), and destination address.
int entry_offset = in_bytes(Klass::vtable_start_offset()) +
vtable_index * vtableEntry::size_in_bytes();
#ifndef PRODUCT
if (DebugVtables) {
Label L;
NearLabel L;
// Check offset vs vtable length.
const Register vtable_idx = Z_R0_scratch;
// Count unused bytes.
// worst case actual size
padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true);
// worst case actual size
slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size(), true);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
assert(Immediate::is_uimm12(in_bytes(Klass::vtable_length_offset())), "disp to large");
assert(Displacement::is_shortDisp(in_bytes(Klass::vtable_length_offset())), "disp to large");
__ z_cl(vtable_idx, in_bytes(Klass::vtable_length_offset()), rcvr_klass);
__ z_brl(L);
__ z_lghi(Z_ARG3, vtable_index); // Debug code, don't optimize.
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false);
// Count unused bytes (assume worst case here).
padding_bytes += 12;
slop_bytes += 12;
__ bind(L);
}
#endif
int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
int entry_offset = in_bytes(Klass::vtable_start_offset()) +
vtable_index * vtableEntry::size_in_bytes();
int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
// Set method (in case of interpreted method), and destination address.
// Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC.
if (Displacement::is_validDisp(v_off)) {
__ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/);
// Account for the load_const in the else path.
padding_bytes += __ load_const_size();
slop_delta = __ load_const_size();
} else {
// Worse case, offset does not fit in displacement field.
__ load_const(Z_method, v_off); // Z_method temporarily holds the offset value.
// worst case actual size
slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_method, v_off, true);
__ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/);
}
slop_bytes += slop_delta;
#ifndef PRODUCT
if (DebugVtables) {
Label L;
NearLabel L;
__ z_ltgr(Z_method, Z_method);
__ z_brne(L);
__ stop("Vtable entry is ZERO",102);
__ stop("Vtable entry is ZERO", 102);
__ bind(L);
}
#endif
address ame_addr = __ pc(); // ame = abstract method error
// Must do an explicit check if implicit checks are disabled.
// Must do an explicit check if offset too large or implicit checks are disabled.
address ame_addr = __ pc();
__ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset()));
__ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
__ z_br(Z_R1_scratch);
masm->flush();
s->set_exception_points(npe_addr, ame_addr);
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
const int code_length = VtableStub::pd_code_size_limit(false);
VtableStub *s = new(code_length) VtableStub(false, itable_index);
if (s == NULL) { // Indicates OOM in the code cache.
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), code_length);
MacroAssembler *masm = new MacroAssembler(&cb);
int padding_bytes = 0;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
// Count unused bytes
// worst case actual size
padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
// worst case actual size
slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
// Use generic emitter for direct memory increment.
// Use Z_tmp_1 as scratch register for generic emitter.
__ add2mem_32((Z_R1_scratch), 1, Z_tmp_1);
// Abuse Z_method as scratch register for generic emitter.
// It is loaded further down anyway before it is first used.
// No dynamic code size variance here, increment is 1, always.
__ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
}
#endif
@ -178,7 +192,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
interface = Z_tmp_2;
// Get receiver klass.
// Must do an explicit check if implicit checks are disabled.
// Must do an explicit check if offset too large or implicit checks are disabled.
address npe_addr = __ pc(); // npe == NULL ptr exception
__ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
__ load_klass(rcvr_klass, Z_ARG1);
@ -195,10 +209,10 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
#ifndef PRODUCT
if (DebugVtables) {
Label ok1;
NearLabel ok1;
__ z_ltgr(Z_method, Z_method);
__ z_brne(ok1);
__ stop("method is null",103);
__ stop("method is null", 103);
__ bind(ok1);
}
#endif
@ -213,39 +227,24 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// Handle IncompatibleClassChangeError in itable stubs.
__ bind(no_such_interface);
// Count unused bytes
// worst case actual size
// We force resolving of the call site by jumping to
// the "handle wrong method" stub, and so let the
// more detailed IncompatibleClassChangeError
// we force re-resolving of the call site by jumping to
// the "handle wrong method" stub, thus letting the
// interpreter runtime do all the dirty work.
padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
// worst case actual size
slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ z_br(Z_R1_scratch);
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
s->set_exception_points(npe_addr, ame_addr);
return s;
}
// In order to tune these parameters, run the JVM with VM options
// +PrintMiscellaneous and +WizardMode to see information about
// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
int size = DebugVtables ? 216 : 0;
if (CountCompiledCalls) {
size += 6 * 4;
}
size += is_vtable_stub ? 36 : 140;
if (UseCompressedClassPointers) {
size += MacroAssembler::instr_size_for_decode_klass_not_null();
}
if (!ImplicitNullChecks) {
size += 36;
}
return size;
}
int VtableStub::pd_code_alignment() {
// System z cache line size is 256 bytes, but octoword-alignment is quite ok.
const unsigned int icache_line_size = 32;
return icache_line_size;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -41,32 +41,38 @@
#define __ masm->
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
#endif
// Used by compiler only; may use only caller saved, non-argument registers
// NOTE: %%%% if any change is made to this stub make sure that the function
// pd_code_size_limit is changed to ensure the correct size for VtableStub
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const int sparc_code_length = VtableStub::pd_code_size_limit(true);
VtableStub* s = new(sparc_code_length) VtableStub(true, vtable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
ResourceMark rm;
CodeBuffer cb(s->entry_point(), sparc_code_length);
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
const int index_dependent_slop = ((vtable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?).
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G5, G3_scratch);
}
#endif /* PRODUCT */
#endif // PRODUCT
assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0");
@ -74,20 +80,33 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
address npe_addr = __ pc();
__ load_klass(O0, G3_scratch);
// set Method* (in case of interpreted method), and destination address
#ifndef PRODUCT
if (DebugVtables) {
Label L;
// check offset vs vtable length
__ ld(G3_scratch, in_bytes(Klass::vtable_length_offset()), G5);
__ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L);
// set generates 8 instructions (worst case), 1 instruction (best case)
start_pc = __ pc();
__ set(vtable_index, O2);
slop_delta = __ worst_case_insts_for_set()*BytesPerInstWord - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
// there is no variance in call_VM() emitted code.
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2);
__ bind(L);
}
#endif
// set Method* (in case of interpreted method), and destination address
start_pc = __ pc();
__ lookup_virtual_method(G3_scratch, vtable_index, G5_method);
// lookup_virtual_method generates 3 instructions (worst case), 1 instruction (best case)
slop_delta = 3*BytesPerInstWord - (int)(__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
@ -109,37 +128,41 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
__ delayed()->nop();
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
vtable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// shut the door on sizing bugs
int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one
assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
// NOTE: %%%% if any change is made to this stub make sure that the function
// pd_code_size_limit is changed to ensure the correct size for VtableStub
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
const int sparc_code_length = VtableStub::pd_code_size_limit(false);
VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
const int index_dependent_slop = ((itable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?).
ResourceMark rm;
CodeBuffer cb(s->entry_point(), sparc_code_length);
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
// Use G3_scratch, G4_scratch as work regs for inc_counter.
// These are defined before use further down.
__ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G3_scratch, G4_scratch);
}
#endif // PRODUCT
Register G3_Klass = G3_scratch;
Register G5_icholder = G5; // Passed in as an argument
Register G4_interface = G4_scratch;
@ -160,16 +183,11 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// and so those registers are not available here.
__ save(SP,-frame::register_save_words*wordSize,SP);
#ifndef PRODUCT
if (CountCompiledCalls) {
__ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), L0, L1);
}
#endif /* PRODUCT */
Label L_no_such_interface;
Label L_no_such_interface;
Register L5_method = L5;
start_pc = __ pc();
// Receiver subtype check against REFC.
__ ld_ptr(G5_icholder, CompiledICHolder::holder_klass_offset(), G4_interface);
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@ -179,6 +197,9 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
L_no_such_interface,
/*return_method=*/ false);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get Method* and entrypoint for compiler
__ ld_ptr(G5_icholder, CompiledICHolder::holder_metadata_offset(), G4_interface);
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@ -187,6 +208,19 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
L5_method, L2, L3,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
// Reduce "estimate" such that "padding" does not drop below 8.
// Do not target a left-over number of zero, because a very
// large vtable or itable offset (> 4K) will require an extra
// sethi/or pair of instructions.
// Found typecheck(60) + lookup(72) to exceed previous extimate (32*4).
const ptrdiff_t estimate = 36*BytesPerInstWord;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
#ifndef PRODUCT
if (DebugVtables) {
Label L01;
@ -222,88 +256,12 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
__ delayed()->restore();
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
itable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// shut the door on sizing bugs
int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one
assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
if (DebugVtables || CountCompiledCalls || VerifyOops) return 1000;
else {
const int slop = 2*BytesPerInstWord; // sethi;add (needed for long offsets)
if (is_vtable_stub) {
// ld;ld;ld,jmp,nop
const int basic = 5*BytesPerInstWord +
// shift;add for load_klass (only shift with zero heap based)
(UseCompressedClassPointers ?
MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
return basic + slop;
} else {
const int basic = 54 * BytesPerInstWord +
// shift;add for load_klass (only shift with zero heap based)
(UseCompressedClassPointers ?
MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
return (basic + slop);
}
}
// In order to tune these parameters, run the JVM with VM options
// +PrintMiscellaneous and +WizardMode to see information about
// actual itable stubs. Look for lines like this:
// itable #1 at 0x5551212[116] left over: 8
// Reduce the constants so that the "left over" number is 8
// Do not aim at a left-over number of zero, because a very
// large vtable or itable offset (> 4K) will require an extra
// sethi/or pair of instructions.
//
// The JVM98 app. _202_jess has a megamorphic interface call.
// The itable code looks like this:
// Decoding VtableStub itbl[1]@16
// ld [ %o0 + 4 ], %g3
// save %sp, -64, %sp
// ld [ %g3 + 0xe8 ], %l2
// sll %l2, 2, %l2
// add %l2, 0x134, %l2
// add %g3, %l2, %l2
// add %g3, 4, %g3
// ld [ %l2 ], %l5
// brz,pn %l5, throw_icce
// cmp %l5, %g5
// be %icc, success
// add %l2, 8, %l2
// loop:
// ld [ %l2 ], %l5
// brz,pn %l5, throw_icce
// cmp %l5, %g5
// bne,pn %icc, loop
// add %l2, 8, %l2
// success:
// ld [ %l2 + -4 ], %l2
// ld [ %g3 + %l2 ], %l5
// restore %l5, 0, %g5
// ld [ %g5 + 0x44 ], %g3
// jmp %g3
// nop
// throw_icce:
// sethi %hi(throw_ICCE_entry), %g3
// ! 5 more instructions here, LP64_ONLY
// jmp %g3 + %lo(throw_ICCE_entry)
// restore
}
int VtableStub::pd_code_alignment() {
// UltraSPARC cache line size is 8 instructions:
const unsigned int icache_line_size = 32;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -55,25 +55,34 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int
// Available now, but may become callee-save at some point:
// rsi, rdi
// Note that rax and rdx are also used for return values.
//
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const int i486_code_length = VtableStub::pd_code_size_limit(true);
VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
ResourceMark rm;
CodeBuffer cb(s->entry_point(), i486_code_length);
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
// No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
const int index_dependent_slop = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
#endif /* PRODUCT */
#endif
// get receiver (need to skip return address on top of stack)
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
@ -85,11 +94,21 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
#ifndef PRODUCT
if (DebugVtables) {
Label L;
start_pc = __ pc();
// check offset vs vtable length
__ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
slop_delta = 6 - (__ pc() - start_pc); // cmpl varies in length, depending on data
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ jcc(Assembler::greater, L);
__ movl(rbx, vtable_index);
// VTABLE TODO: find upper bound for call_VM length.
start_pc = __ pc();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
slop_delta = 480 - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ bind(L);
}
#endif // PRODUCT
@ -97,8 +116,13 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const Register method = rbx;
// load Method* and target address
start_pc = __ pc();
__ lookup_virtual_method(rax, vtable_index, method);
slop_delta = 6 - (int)(__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cmpptr(method, (int32_t)NULL_WORD);
@ -108,55 +132,53 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
__ stop("Vtable entry is NULL");
__ bind(L);
}
#endif // PRODUCT
// rax,: receiver klass
// rax: receiver klass
// method (rbx): Method*
// rcx: receiver
address ame_addr = __ pc();
__ jmp( Address(method, Method::from_compiled_offset()));
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
vtable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// shut the door on sizing bugs
int slop = 3; // 32-bit offset is this much larger than an 8-bit one
assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// Note well: pd_code_size_limit is the absolute minimum we can get away with. If you
// add code here, bump the code stub size returned by pd_code_size_limit!
const int i486_code_length = VtableStub::pd_code_size_limit(false);
VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32).
(itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code.
ResourceMark rm;
CodeBuffer cb(s->entry_point(), i486_code_length);
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
// Entry arguments:
// rax: CompiledICHolder
// rcx: Receiver
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
#endif /* PRODUCT */
// Entry arguments:
// rax: CompiledICHolder
// rcx: Receiver
// Most registers are in use; we'll use rax, rbx, rsi, rdi
// (If we need to make rsi, rdi callee-save, do a push/pop here.)
const Register recv_klass_reg = rsi;
@ -171,10 +193,12 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
Label L_no_such_interface;
// get receiver klass (also an implicit null-check)
address npe_addr = __ pc();
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
address npe_addr = __ pc();
__ load_klass(recv_klass_reg, rcx);
start_pc = __ pc();
// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
@ -184,6 +208,9 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
L_no_such_interface,
/*return_method=*/false);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, rcx); // restore recv_klass_reg
@ -193,19 +220,30 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
method, temp_reg,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
// We expect we need index_dependent_slop extra bytes. Reason:
// The emitted code in lookup_interface_method changes when itable_index exceeds 31.
// For windows, a narrow estimate was found to be 104. Other OSes not tested.
const ptrdiff_t estimate = 104;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
// method (rbx): Method*
// rcx: receiver
#ifdef ASSERT
if (DebugVtables) {
Label L1;
__ cmpptr(method, (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L1);
__ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::notZero, L1);
__ stop("Method* is null");
__ bind(L1);
}
Label L1;
__ cmpptr(method, (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L1);
__ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::notZero, L1);
__ stop("Method* is null");
__ bind(L1);
}
#endif // ASSERT
address ame_addr = __ pc();
@ -219,70 +257,15 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// dirty work.
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
__ flush();
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
itable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// shut the door on sizing bugs
int slop = 3; // 32-bit offset is this much larger than an 8-bit one
assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
if (is_vtable_stub) {
// Vtable stub size
return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
} else {
// Itable stub size
return (DebugVtables ? 256 : 110) + (CountCompiledCalls ? 6 : 0);
}
// In order to tune these parameters, run the JVM with VM options
// +PrintMiscellaneous and +WizardMode to see information about
// actual itable stubs. Look for lines like this:
// itable #1 at 0x5551212[65] left over: 3
// Reduce the constants so that the "left over" number is >=3
// for the common cases.
// Do not aim at a left-over number of zero, because a
// large vtable or itable index (> 16) will require a 32-bit
// immediate displacement instead of an 8-bit one.
//
// The JVM98 app. _202_jess has a megamorphic interface call.
// The itable code looks like this:
// Decoding VtableStub itbl[1]@1
// mov 0x4(%ecx),%esi
// mov 0xe8(%esi),%edi
// lea 0x130(%esi,%edi,4),%edi
// add $0x7,%edi
// and $0xfffffff8,%edi
// lea 0x4(%esi),%esi
// mov (%edi),%ebx
// cmp %ebx,%eax
// je success
// loop:
// test %ebx,%ebx
// je throw_icce
// add $0x8,%edi
// mov (%edi),%ebx
// cmp %ebx,%eax
// jne loop
// success:
// mov 0x4(%edi),%edi
// mov (%esi,%edi,1),%ebx
// jmp *0x44(%ebx)
// throw_icce:
// jmp throw_ICCE_entry
}
int VtableStub::pd_code_alignment() {
return wordSize;
// x86 cache line size is 64 bytes, but we want to limit alignment loss.
const unsigned int icache_line_size = wordSize;
return icache_line_size;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,24 +42,32 @@
#define __ masm->
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread,
oop receiver,
int index);
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
#endif
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
const int amd64_code_length = VtableStub::pd_code_size_limit(true);
VtableStub* s = new(amd64_code_length) VtableStub(true, vtable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
ResourceMark rm;
CodeBuffer cb(s->entry_point(), amd64_code_length);
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
// No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
const int index_dependent_slop = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
@ -77,22 +85,35 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
#ifndef PRODUCT
if (DebugVtables) {
Label L;
start_pc = __ pc();
// check offset vs vtable length
__ cmpl(Address(rax, Klass::vtable_length_offset()),
vtable_index * vtableEntry::size());
__ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
slop_delta = 12 - (__ pc() - start_pc); // cmpl varies in length, depending on data
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ jcc(Assembler::greater, L);
__ movl(rbx, vtable_index);
__ call_VM(noreg,
CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx);
// VTABLE TODO: find upper bound for call_VM length.
start_pc = __ pc();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx);
slop_delta = 480 - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ bind(L);
}
#endif // PRODUCT
// load Method* and target address
const Register method = rbx;
// load Method* and target address
start_pc = __ pc();
__ lookup_virtual_method(rax, vtable_index, method);
slop_delta = 8 - (int)(__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cmpptr(method, (int32_t)NULL_WORD);
@ -102,50 +123,48 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
__ stop("Vtable entry is NULL");
__ bind(L);
}
#endif // PRODUCT
// rax: receiver klass
// rbx: Method*
// method (rbx): Method*
// rcx: receiver
address ame_addr = __ pc();
__ jmp( Address(rbx, Method::from_compiled_offset()));
__ flush();
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
vtable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// shut the door on sizing bugs
int slop = 3; // 32-bit offset is this much larger than an 8-bit one
assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// Note well: pd_code_size_limit is the absolute minimum we can get
// away with. If you add code here, bump the code stub size
// returned by pd_code_size_limit!
const int amd64_code_length = VtableStub::pd_code_size_limit(false);
VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index);
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 16).
(itable_index < 16) ? 3 : 0; // index == 0 generates even shorter code.
ResourceMark rm;
CodeBuffer cb(s->entry_point(), amd64_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler *masm = new MacroAssembler(&cb);
#ifndef PRODUCT
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
#endif
#endif // PRODUCT
// Entry arguments:
// rax: CompiledICHolder
@ -158,17 +177,19 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
const Register temp_reg = r11;
Label L_no_such_interface;
const Register icholder_reg = rax;
__ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
Label L_no_such_interface;
// get receiver klass (also an implicit null-check)
assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
address npe_addr = __ pc();
__ load_klass(recv_klass_reg, j_rarg0);
start_pc = __ pc();
// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
@ -178,6 +199,9 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
L_no_such_interface,
/*return_method=*/false);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg
@ -187,6 +211,17 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
method, temp_reg,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
// We expect we need index_dependent_slop extra bytes. Reason:
// The emitted code in lookup_interface_method changes when itable_index exceeds 15.
// For linux, a very narrow estimate would be 112, but Solaris requires some more space (130).
const ptrdiff_t estimate = 136;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
// If we take a trap while this arg is on the stack we will not
// be able to walk the stack properly. This is not an issue except
// when there are mistakes in this assembly code that could generate
@ -207,8 +242,6 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
}
#endif // ASSERT
// rbx: Method*
// j_rarg0: receiver
address ame_addr = __ pc();
__ jmp(Address(method, Method::from_compiled_offset()));
@ -220,68 +253,15 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// dirty work.
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
__ flush();
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
if (PrintMiscellaneous && (WizardMode || Verbose)) {
tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
itable_index, p2i(s->entry_point()),
(int)(s->code_end() - s->entry_point()),
(int)(s->code_end() - __ pc()));
}
guarantee(__ pc() <= s->code_end(), "overflowed buffer");
// shut the door on sizing bugs
int slop = 3; // 32-bit offset is this much larger than an 8-bit one
assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
s->set_exception_points(npe_addr, ame_addr);
return s;
}
int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
if (is_vtable_stub) {
// Vtable stub size
return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) +
(UseCompressedClassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
} else {
// Itable stub size
return (DebugVtables ? 512 : 140) + (CountCompiledCalls ? 13 : 0) +
(UseCompressedClassPointers ? 2 * MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
}
// In order to tune these parameters, run the JVM with VM options
// +PrintMiscellaneous and +WizardMode to see information about
// actual itable stubs. Look for lines like this:
// itable #1 at 0x5551212[71] left over: 3
// Reduce the constants so that the "left over" number is >=3
// for the common cases.
// Do not aim at a left-over number of zero, because a
// large vtable or itable index (>= 32) will require a 32-bit
// immediate displacement instead of an 8-bit one.
//
// The JVM98 app. _202_jess has a megamorphic interface call.
// The itable code looks like this:
// Decoding VtableStub itbl[1]@12
// mov 0x8(%rsi),%r10
// mov 0x198(%r10),%r11d
// lea 0x218(%r10,%r11,8),%r11
// lea 0x8(%r10),%r10
// mov (%r11),%rbx
// cmp %rbx,%rax
// je success
// loop:
// test %rbx,%rbx
// je throw_icce
// add $0x10,%r11
// mov (%r11),%rbx
// cmp %rbx,%rax
// jne loop
// success:
// mov 0x8(%r11),%r11d
// mov (%r10,%r11,1),%rbx
// jmpq *0x60(%rbx)
// throw_icce:
// jmpq throw_ICCE_entry
}
int VtableStub::pd_code_alignment() {
return wordSize;
// x86 cache line size is 64 bytes, but we want to limit alignment loss.
const unsigned int icache_line_size = wordSize;
return icache_line_size;
}

View File

@ -92,6 +92,32 @@ void VtableStub::print_on(outputStream* st) const {
VtableStub* VtableStubs::_table[VtableStubs::N];
int VtableStubs::_number_of_vtable_stubs = 0;
int VtableStubs::_vtab_stub_size = 0;
int VtableStubs::_itab_stub_size = 0;
#if defined(PRODUCT)
// These values are good for the PRODUCT case (no tracing).
static const int first_vtableStub_size = 64;
static const int first_itableStub_size = 256;
#else
// These values are good for the non-PRODUCT case (when tracing can be switched on).
// To find out, run test workload with
// -Xlog:vtablestubs=Trace -XX:+CountCompiledCalls -XX:+DebugVtables
// and use the reported "estimate" value.
// Here is a list of observed worst-case values:
// vtable itable
// aarch64: 460 324
// arm: ? ?
// ppc (linux, BE): 404 288
// ppc (linux, LE): 356 276
// ppc (AIX): 416 296
// s390x: 408 256
// Solaris-sparc: 792 348
// x86 (Linux): 670 309
// x86 (MacOS): 682 321
static const int first_vtableStub_size = 1024;
static const int first_itableStub_size = 512;
#endif
void VtableStubs::initialize() {
@ -107,6 +133,77 @@ void VtableStubs::initialize() {
}
int VtableStubs::code_size_limit(bool is_vtable_stub) {
if (is_vtable_stub) {
return _vtab_stub_size > 0 ? _vtab_stub_size : first_vtableStub_size;
} else { // itable stub
return _itab_stub_size > 0 ? _itab_stub_size : first_itableStub_size;
}
} // code_size_limit
void VtableStubs::check_and_set_size_limit(bool is_vtable_stub,
int code_size,
int padding) {
const char* name = is_vtable_stub ? "vtable" : "itable";
guarantee(code_size <= code_size_limit(is_vtable_stub),
"buffer overflow in %s stub, code_size is %d, limit is %d", name, code_size, code_size_limit(is_vtable_stub));
if (is_vtable_stub) {
if (log_is_enabled(Trace, vtablestubs)) {
if ( (_vtab_stub_size > 0) && ((code_size + padding) > _vtab_stub_size) ) {
log_trace(vtablestubs)("%s size estimate needed adjustment from %d to %d bytes",
name, _vtab_stub_size, code_size + padding);
}
}
if ( (code_size + padding) > _vtab_stub_size ) {
_vtab_stub_size = code_size + padding;
}
} else { // itable stub
if (log_is_enabled(Trace, vtablestubs)) {
if ( (_itab_stub_size > 0) && ((code_size + padding) > _itab_stub_size) ) {
log_trace(vtablestubs)("%s size estimate needed adjustment from %d to %d bytes",
name, _itab_stub_size, code_size + padding);
}
}
if ( (code_size + padding) > _itab_stub_size ) {
_itab_stub_size = code_size + padding;
}
}
return;
} // check_and_set_size_limit
void VtableStubs::bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s,
address npe_addr, address ame_addr, bool is_vtable_stub,
int index, int slop_bytes, int index_dependent_slop) {
const char* name = is_vtable_stub ? "vtable" : "itable";
const int stub_length = code_size_limit(is_vtable_stub);
if (log_is_enabled(Trace, vtablestubs)) {
log_trace(vtablestubs)("%s #%d at " PTR_FORMAT ": size: %d, estimate: %d, slop area: %d",
name, index, p2i(s->code_begin()),
(int)(masm->pc() - s->code_begin()),
stub_length,
(int)(s->code_end() - masm->pc()));
}
guarantee(masm->pc() <= s->code_end(), "%s #%d: overflowed buffer, estimated len: %d, actual len: %d, overrun: %d",
name, index, stub_length,
(int)(masm->pc() - s->code_begin()),
(int)(masm->pc() - s->code_end()));
assert((masm->pc() + index_dependent_slop) <= s->code_end(), "%s #%d: spare space for 32-bit offset: required = %d, available = %d",
name, index, index_dependent_slop,
(int)(s->code_end() - masm->pc()));
// After the first vtable/itable stub is generated, we have a much
// better estimate for the stub size. Remember/update this
// estimate after some sanity checks.
check_and_set_size_limit(is_vtable_stub, masm->offset(), slop_bytes);
s->set_exception_points(npe_addr, ame_addr);
}
address VtableStubs::find_stub(bool is_vtable_stub, int vtable_index) {
assert(vtable_index >= 0, "must be positive");
@ -173,10 +270,7 @@ VtableStub* VtableStubs::entry_point(address pc) {
uint hash = VtableStubs::hash(stub->is_vtable_stub(), stub->index());
VtableStub* s;
for (s = _table[hash]; s != NULL && s != stub; s = s->next()) {}
if (s == stub) {
return s;
}
return NULL;
return (s == stub) ? s : NULL;
}
bool VtableStubs::contains(address pc) {

View File

@ -25,12 +25,94 @@
#ifndef SHARE_VM_CODE_VTABLESTUBS_HPP
#define SHARE_VM_CODE_VTABLESTUBS_HPP
#include "asm/macroAssembler.hpp"
#include "code/vmreg.hpp"
#include "memory/allocation.hpp"
// A VtableStub holds an individual code stub for a pair (vtable index, #args) for either itables or vtables
// There's a one-to-one relationship between a VtableStub and such a pair.
// A word on VtableStub sizing:
// Such a vtable/itable stub consists of the instance data
// and an immediately following CodeBuffer.
// Unfortunately, the required space for the code buffer varies, depending on
// the setting of compile time macros (PRODUCT, ASSERT, ...) and of command line
// parameters. Actual data may have an influence on the size as well.
//
// A simple approximation for the VtableStub size would be to just take a value
// "large enough" for all circumstances - a worst case estimate.
// As there can exist many stubs - and they never go away - we certainly don't
// want to waste more code cache space than absolutely necessary.
//
// We need a different approach which, as far as possible, should be independent
// from or adaptive to code size variations. These variations may be caused by
// changed compile time or run time switches as well as by changed emitter code.
//
// Here is the idea:
// For the first stub we generate, we allocate a "large enough" code buffer.
// Once all instructions are emitted, we know the actual size of the stub.
// Remembering that size allows us to allocate a tightly matching code buffer
// for all subsequent stubs. That covers all "static variance", i.e. all variance
// that is due to compile time macros, command line parameters, machine capabilities,
// and other influences which are immutable for the life span of the vm.
//
// Life isn't always that easy. Code size may depend on actual data, "load constant"
// being an example for that. All code segments with such "dynamic variance" require
// additional care. We need to know or estimate the worst case code size for each
// such segment. With that knowledge, we can maintain a "slop counter" in the
// platform-specific stub emitters. It accumulates the difference between worst-case
// and actual code size. When the stub is fully generated, the actual stub size is
// adjusted (increased) by the slop counter value.
//
// As a result, we allocate all but the first code buffers with the same, tightly matching size.
//
// VtableStubs creates the code stubs for compiled calls through vtables.
// There is one stub per (vtable index, args_size) pair, and the stubs are
// never deallocated. They don't need to be GCed because they contain no oops.
class VtableStub;
class VtableStubs : AllStatic {
public: // N must be public (some compilers need this for _table)
enum {
N = 256, // size of stub table; must be power of two
mask = N - 1
};
private:
friend class VtableStub;
static VtableStub* _table[N]; // table of existing stubs
static int _number_of_vtable_stubs; // number of stubs created so far (for statistics)
static int _vtab_stub_size; // current size estimate for vtable stub (quasi-constant)
static int _itab_stub_size; // current size estimate for itable stub (quasi-constant)
static VtableStub* create_vtable_stub(int vtable_index);
static VtableStub* create_itable_stub(int vtable_index);
static VtableStub* lookup (bool is_vtable_stub, int vtable_index);
static void enter (bool is_vtable_stub, int vtable_index, VtableStub* s);
static inline uint hash (bool is_vtable_stub, int vtable_index);
static address find_stub (bool is_vtable_stub, int vtable_index);
static void bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s,
address npe_addr, address ame_addr, bool is_vtable_stub,
int index, int slop_bytes, int index_dependent_slop);
static int code_size_limit(bool is_vtable_stub);
static void check_and_set_size_limit(bool is_vtable_stub,
int code_size,
int padding);
public:
static address find_vtable_stub(int vtable_index) { return find_stub(true, vtable_index); }
static address find_itable_stub(int itable_index) { return find_stub(false, itable_index); }
static VtableStub* entry_point(address pc); // vtable stub entry point for a pc
static bool contains(address pc); // is pc within any stub?
static VtableStub* stub_containing(address pc); // stub containing pc or NULL
static int number_of_vtable_stubs() { return _number_of_vtable_stubs; }
static void initialize();
static void vtable_stub_do(void f(VtableStub*)); // iterates over all vtable stubs
};
class VtableStub {
private:
friend class VtableStubs;
@ -58,7 +140,7 @@ class VtableStub {
public:
address code_begin() const { return (address)(this + 1); }
address code_end() const { return code_begin() + pd_code_size_limit(_is_vtable_stub); }
address code_end() const { return code_begin() + VtableStubs::code_size_limit(_is_vtable_stub); }
address entry_point() const { return code_begin(); }
static int entry_offset() { return sizeof(class VtableStub); }
@ -78,7 +160,6 @@ class VtableStub {
}
// platform-dependent routines
static int pd_code_size_limit(bool is_vtable_stub);
static int pd_code_alignment();
// CNC: Removed because vtable stubs are now made with an ideal graph
// static bool pd_disregard_arg_size();
@ -100,38 +181,4 @@ class VtableStub {
};
// VtableStubs creates the code stubs for compiled calls through vtables.
// There is one stub per (vtable index, args_size) pair, and the stubs are
// never deallocated. They don't need to be GCed because they contain no oops.
class VtableStubs : AllStatic {
public: // N must be public (some compilers need this for _table)
enum {
N = 256, // size of stub table; must be power of two
mask = N - 1
};
private:
static VtableStub* _table[N]; // table of existing stubs
static int _number_of_vtable_stubs; // number of stubs created so far (for statistics)
static VtableStub* create_vtable_stub(int vtable_index);
static VtableStub* create_itable_stub(int vtable_index);
static VtableStub* lookup (bool is_vtable_stub, int vtable_index);
static void enter (bool is_vtable_stub, int vtable_index, VtableStub* s);
static inline uint hash (bool is_vtable_stub, int vtable_index);
static address find_stub (bool is_vtable_stub, int vtable_index);
public:
static address find_vtable_stub(int vtable_index) { return find_stub(true, vtable_index); }
static address find_itable_stub(int itable_index) { return find_stub(false, itable_index); }
static VtableStub* entry_point(address pc); // vtable stub entry point for a pc
static bool contains(address pc); // is pc within any stub?
static VtableStub* stub_containing(address pc); // stub containing pc or NULL
static int number_of_vtable_stubs() { return _number_of_vtable_stubs; }
static void initialize();
static void vtable_stub_do(void f(VtableStub*)); // iterates over all vtable stubs
};
#endif // SHARE_VM_CODE_VTABLESTUBS_HPP

View File

@ -170,7 +170,8 @@
LOG_TAG(vmoperation) \
LOG_TAG(vmthread) \
LOG_TAG(vtables) \
LOG_TAG(workgang)
LOG_TAG(vtablestubs) \
LOG_TAG(workgang) \
LOG_TAG_LIST_EXT
#define PREFIX_LOG_TAG(T) (LogTag::_##T)