mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-25 15:20:11 +00:00
8339983: [s390x] secondary_super_cache does not scale well: C1 and interpreter
Reviewed-by: lucy, aph
This commit is contained in:
parent
e023addf70
commit
a3b58ee5cd
@ -2539,13 +2539,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
|
||||
} else {
|
||||
bool need_slow_path = !k->is_loaded() ||
|
||||
((int) k->super_check_offset() == in_bytes(Klass::secondary_super_cache_offset()));
|
||||
intptr_t super_check_offset = k->is_loaded() ? k->super_check_offset() : -1L;
|
||||
__ load_klass(klass_RInfo, obj);
|
||||
// Perform the fast part of the checking logic.
|
||||
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1,
|
||||
(need_slow_path ? success_target : nullptr),
|
||||
failure_target, nullptr,
|
||||
RegisterOrConstant(super_check_offset));
|
||||
failure_target, nullptr);
|
||||
if (need_slow_path) {
|
||||
// Call out-of-line instance of __ check_klass_subtype_slow_path(...):
|
||||
address a = Runtime1::entry_for (C1StubId::slow_subtype_check_id);
|
||||
|
||||
@ -557,7 +557,12 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
|
||||
__ z_lg(Rsubklass, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
|
||||
__ z_lg(Rsuperklass, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
|
||||
|
||||
__ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, Rarray_ptr, Rlength, nullptr, &miss);
|
||||
__ check_klass_subtype_slow_path(Rsubklass,
|
||||
Rsuperklass,
|
||||
Rarray_ptr /* temp_reg */,
|
||||
Rlength /* temp2_reg */,
|
||||
nullptr /* L_success */,
|
||||
&miss /* L_failure */);
|
||||
|
||||
// Match falls through here.
|
||||
i = 0;
|
||||
|
||||
@ -2981,21 +2981,15 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
Label* L_slow_path,
|
||||
RegisterOrConstant super_check_offset) {
|
||||
Register super_check_offset) {
|
||||
// Input registers must not overlap.
|
||||
assert_different_registers(sub_klass, super_klass, temp1_reg, super_check_offset);
|
||||
|
||||
const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
|
||||
const int sco_offset = in_bytes(Klass::super_check_offset_offset());
|
||||
|
||||
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
|
||||
bool need_slow_path = (must_load_sco ||
|
||||
super_check_offset.constant_or_zero() == sc_offset);
|
||||
bool must_load_sco = ! super_check_offset->is_valid();
|
||||
|
||||
// Input registers must not overlap.
|
||||
assert_different_registers(sub_klass, super_klass, temp1_reg);
|
||||
if (super_check_offset.is_register()) {
|
||||
assert_different_registers(sub_klass, super_klass,
|
||||
super_check_offset.as_register());
|
||||
} else if (must_load_sco) {
|
||||
if (must_load_sco) {
|
||||
assert(temp1_reg != noreg, "supply either a temp or a register offset");
|
||||
}
|
||||
|
||||
@ -3006,9 +3000,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
|
||||
if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
|
||||
if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
|
||||
if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
|
||||
assert(label_nulls <= 1 ||
|
||||
(L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
|
||||
"at most one null in the batch, usually");
|
||||
assert(label_nulls <= 1 || (L_slow_path == &L_fallthrough && label_nulls <= 2), "at most one null in the batch, usually");
|
||||
|
||||
BLOCK_COMMENT("check_klass_subtype_fast_path {");
|
||||
// If the pointers are equal, we are done (e.g., String[] elements).
|
||||
@ -3023,10 +3015,12 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
|
||||
// Check the supertype display, which is uint.
|
||||
if (must_load_sco) {
|
||||
z_llgf(Rsuper_check_offset, sco_offset, super_klass);
|
||||
super_check_offset = RegisterOrConstant(Rsuper_check_offset);
|
||||
super_check_offset = Rsuper_check_offset;
|
||||
}
|
||||
|
||||
Address super_check_addr(sub_klass, super_check_offset, 0);
|
||||
z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
|
||||
branch_optimized(Assembler::bcondEqual, *L_success);
|
||||
|
||||
// This check has worked decisively for primary supers.
|
||||
// Secondary supers are sought in the super_cache ('super_cache_addr').
|
||||
@ -3044,46 +3038,27 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
|
||||
if (&(label) == &L_fallthrough) { /*do nothing*/ } \
|
||||
else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
|
||||
|
||||
if (super_check_offset.is_register()) {
|
||||
branch_optimized(Assembler::bcondEqual, *L_success);
|
||||
z_cfi(super_check_offset.as_register(), sc_offset);
|
||||
if (L_failure == &L_fallthrough) {
|
||||
branch_optimized(Assembler::bcondEqual, *L_slow_path);
|
||||
} else {
|
||||
branch_optimized(Assembler::bcondNotEqual, *L_failure);
|
||||
final_jmp(*L_slow_path);
|
||||
}
|
||||
} else if (super_check_offset.as_constant() == sc_offset) {
|
||||
// Need a slow path; fast failure is impossible.
|
||||
if (L_slow_path == &L_fallthrough) {
|
||||
branch_optimized(Assembler::bcondEqual, *L_success);
|
||||
} else {
|
||||
branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
|
||||
final_jmp(*L_success);
|
||||
}
|
||||
z_cfi(super_check_offset, in_bytes(Klass::secondary_super_cache_offset()));
|
||||
if (L_failure == &L_fallthrough) {
|
||||
branch_optimized(Assembler::bcondEqual, *L_slow_path);
|
||||
} else {
|
||||
// No slow path; it's a fast decision.
|
||||
if (L_failure == &L_fallthrough) {
|
||||
branch_optimized(Assembler::bcondEqual, *L_success);
|
||||
} else {
|
||||
branch_optimized(Assembler::bcondNotEqual, *L_failure);
|
||||
final_jmp(*L_success);
|
||||
}
|
||||
branch_optimized(Assembler::bcondNotEqual, *L_failure);
|
||||
final_jmp(*L_slow_path);
|
||||
}
|
||||
|
||||
bind(L_fallthrough);
|
||||
#undef local_brc
|
||||
#undef final_jmp
|
||||
BLOCK_COMMENT("} check_klass_subtype_fast_path");
|
||||
// fallthru (to slow path)
|
||||
}
|
||||
|
||||
void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
|
||||
Register Rsuperklass,
|
||||
Register Rarray_ptr, // tmp
|
||||
Register Rlength, // tmp
|
||||
Label* L_success,
|
||||
Label* L_failure) {
|
||||
void MacroAssembler::check_klass_subtype_slow_path_linear(Register Rsubklass,
|
||||
Register Rsuperklass,
|
||||
Register Rarray_ptr, // tmp
|
||||
Register Rlength, // tmp
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
bool set_cond_codes /* unused */) {
|
||||
// Input registers must not overlap.
|
||||
// Also check for R1 which is explicitly used here.
|
||||
assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
|
||||
@ -3106,7 +3081,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
|
||||
|
||||
NearLabel loop_iterate, loop_count, match;
|
||||
|
||||
BLOCK_COMMENT("check_klass_subtype_slow_path {");
|
||||
BLOCK_COMMENT("check_klass_subtype_slow_path_linear {");
|
||||
z_lg(Rarray_ptr, ss_offset, Rsubklass);
|
||||
|
||||
load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
|
||||
@ -3134,18 +3109,151 @@ void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
|
||||
branch_optimized(Assembler::bcondAlways, *L_failure);
|
||||
|
||||
// Got a hit. Return success (zero result). Set cache.
|
||||
// Cache load doesn't happen here. For speed it is directly emitted by the compiler.
|
||||
// Cache load doesn't happen here. For speed, it is directly emitted by the compiler.
|
||||
|
||||
BIND(match);
|
||||
|
||||
z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
|
||||
|
||||
if (UseSecondarySupersCache) {
|
||||
z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
|
||||
}
|
||||
final_jmp(*L_success);
|
||||
|
||||
// Exit to the surrounding code.
|
||||
BIND(L_fallthrough);
|
||||
#undef local_brc
|
||||
#undef final_jmp
|
||||
BLOCK_COMMENT("} check_klass_subtype_slow_path_linear");
|
||||
}
|
||||
|
||||
// If Register r is invalid, remove a new register from
|
||||
// available_regs, and add new register to regs_to_push.
|
||||
Register MacroAssembler::allocate_if_noreg(Register r,
|
||||
RegSetIterator<Register> &available_regs,
|
||||
RegSet ®s_to_push) {
|
||||
if (!r->is_valid()) {
|
||||
r = *available_regs++;
|
||||
regs_to_push += r;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// check_klass_subtype_slow_path_table() looks for super_klass in the
|
||||
// hash table belonging to super_klass, branching to L_success or
|
||||
// L_failure as appropriate. This is essentially a shim which
|
||||
// allocates registers as necessary and then calls
|
||||
// lookup_secondary_supers_table() to do the work. Any of the temp
|
||||
// regs may be noreg, in which case this logic will choose some
|
||||
// registers push and pop them from the stack.
|
||||
void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass,
|
||||
Register super_klass,
|
||||
Register temp_reg,
|
||||
Register temp2_reg,
|
||||
Register temp3_reg,
|
||||
Register temp4_reg,
|
||||
Register result_reg,
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
bool set_cond_codes) {
|
||||
BLOCK_COMMENT("check_klass_subtype_slow_path_table {");
|
||||
|
||||
RegSet temps = RegSet::of(temp_reg, temp2_reg, temp3_reg, temp4_reg);
|
||||
|
||||
assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp4_reg);
|
||||
|
||||
Label L_fallthrough;
|
||||
int label_nulls = 0;
|
||||
if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
|
||||
if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
|
||||
assert(label_nulls <= 1, "at most one null in the batch");
|
||||
|
||||
RegSetIterator<Register> available_regs
|
||||
// Z_R0 will be used to hold Z_R15(Z_SP) while pushing a new frame, So don't use that here.
|
||||
// Z_R1 will be used to hold r_bitmap in lookup_secondary_supers_table_var, so can't be used
|
||||
// Z_R2, Z_R3, Z_R4 will be used in secondary_supers_verify, for the failure reporting
|
||||
= (RegSet::range(Z_R0, Z_R15) - temps - sub_klass - super_klass - Z_R1_scratch - Z_R0_scratch - Z_R2 - Z_R3 - Z_R4).begin();
|
||||
|
||||
RegSet pushed_regs;
|
||||
|
||||
temp_reg = allocate_if_noreg(temp_reg, available_regs, pushed_regs);
|
||||
temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs);
|
||||
temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs);;
|
||||
temp4_reg = allocate_if_noreg(temp4_reg, available_regs, pushed_regs);
|
||||
result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs);
|
||||
|
||||
const int frame_size = pushed_regs.size() * BytesPerWord + frame::z_abi_160_size;
|
||||
|
||||
// Push & save registers
|
||||
{
|
||||
int i = 0;
|
||||
save_return_pc();
|
||||
push_frame(frame_size);
|
||||
|
||||
for (auto it = pushed_regs.begin(); *it != noreg; i++) {
|
||||
z_stg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP);
|
||||
}
|
||||
assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity");
|
||||
}
|
||||
|
||||
lookup_secondary_supers_table_var(sub_klass,
|
||||
super_klass,
|
||||
temp_reg, temp2_reg, temp3_reg, temp4_reg, result_reg);
|
||||
|
||||
// NOTE: Condition Code should not be altered before jump instruction below !!!!
|
||||
z_cghi(result_reg, 0);
|
||||
|
||||
{
|
||||
int i = 0;
|
||||
for (auto it = pushed_regs.begin(); *it != noreg; ++i) {
|
||||
z_lg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP);
|
||||
}
|
||||
assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity");
|
||||
pop_frame();
|
||||
restore_return_pc();
|
||||
}
|
||||
|
||||
// NB! Callers may assume that, when set_cond_codes is true, this
|
||||
// code sets temp2_reg to a nonzero value.
|
||||
if (set_cond_codes) {
|
||||
z_lghi(temp2_reg, 1);
|
||||
}
|
||||
|
||||
branch_optimized(bcondNotEqual, *L_failure);
|
||||
|
||||
if(L_success != &L_fallthrough) {
|
||||
z_bru(*L_success);
|
||||
}
|
||||
|
||||
bind(L_fallthrough);
|
||||
BLOCK_COMMENT("} check_klass_subtype_slow_path_table");
|
||||
}
|
||||
|
||||
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
|
||||
Register super_klass,
|
||||
Register temp_reg,
|
||||
Register temp2_reg,
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
bool set_cond_codes) {
|
||||
BLOCK_COMMENT("check_klass_subtype_slow_path {");
|
||||
if (UseSecondarySupersTable) {
|
||||
check_klass_subtype_slow_path_table(sub_klass,
|
||||
super_klass,
|
||||
temp_reg,
|
||||
temp2_reg,
|
||||
/*temp3*/noreg,
|
||||
/*temp4*/noreg,
|
||||
/*result*/noreg,
|
||||
L_success,
|
||||
L_failure,
|
||||
set_cond_codes);
|
||||
} else {
|
||||
check_klass_subtype_slow_path_linear(sub_klass,
|
||||
super_klass,
|
||||
temp_reg,
|
||||
temp2_reg,
|
||||
L_success,
|
||||
L_failure,
|
||||
set_cond_codes);
|
||||
}
|
||||
BLOCK_COMMENT("} check_klass_subtype_slow_path");
|
||||
}
|
||||
|
||||
@ -3206,17 +3314,17 @@ do { \
|
||||
} while(0)
|
||||
|
||||
// Note: this method also kills Z_R1_scratch register on machines older than z15
|
||||
void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register r_temp1,
|
||||
Register r_temp2,
|
||||
Register r_temp3,
|
||||
Register r_temp4,
|
||||
Register r_result,
|
||||
u1 super_klass_slot) {
|
||||
void MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register r_temp1,
|
||||
Register r_temp2,
|
||||
Register r_temp3,
|
||||
Register r_temp4,
|
||||
Register r_result,
|
||||
u1 super_klass_slot) {
|
||||
NearLabel L_done, L_failure;
|
||||
|
||||
BLOCK_COMMENT("lookup_secondary_supers_table {");
|
||||
BLOCK_COMMENT("lookup_secondary_supers_table_const {");
|
||||
|
||||
const Register
|
||||
r_array_base = r_temp1,
|
||||
@ -3291,7 +3399,7 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
|
||||
z_lghi(r_result, 1);
|
||||
|
||||
bind(L_done);
|
||||
BLOCK_COMMENT("} lookup_secondary_supers_table");
|
||||
BLOCK_COMMENT("} lookup_secondary_supers_table_const");
|
||||
|
||||
if (VerifySecondarySupers) {
|
||||
verify_secondary_supers_table(r_sub_klass, r_super_klass, r_result,
|
||||
@ -3299,6 +3407,116 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
|
||||
}
|
||||
}
|
||||
|
||||
// At runtime, return 0 in result if r_super_klass is a superclass of
|
||||
// r_sub_klass, otherwise return nonzero. Use this version of
|
||||
// lookup_secondary_supers_table() if you don't know ahead of time
|
||||
// which superclass will be searched for. Used by interpreter and
|
||||
// runtime stubs. It is larger and has somewhat greater latency than
|
||||
// the version above, which takes a constant super_klass_slot.
|
||||
void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register temp1,
|
||||
Register temp2,
|
||||
Register temp3,
|
||||
Register temp4,
|
||||
Register result) {
|
||||
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result, Z_R1_scratch);
|
||||
|
||||
Label L_done, L_failure;
|
||||
|
||||
BLOCK_COMMENT("lookup_secondary_supers_table_var {");
|
||||
|
||||
const Register
|
||||
r_array_index = temp3,
|
||||
slot = temp4, // NOTE: "slot" can't be Z_R0 otherwise z_sllg and z_rllg instructions below will mess up!!!!
|
||||
r_bitmap = Z_R1_scratch;
|
||||
|
||||
z_llgc(slot, Address(r_super_klass, Klass::hash_slot_offset()));
|
||||
|
||||
// Initialize r_result with 0 (indicating success). If searching fails, r_result will be loaded
|
||||
// with 1 (failure) at the end of this method.
|
||||
clear_reg(result, true /* whole_reg */, false /* set_cc */); // result = 0
|
||||
|
||||
z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset()));
|
||||
|
||||
// First check the bitmap to see if super_klass might be present. If
|
||||
// the bit is zero, we are certain that super_klass is not one of
|
||||
// the secondary supers.
|
||||
z_xilf(slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); // slot ^ 63 === 63 - slot (mod 64)
|
||||
z_sllg(r_array_index, r_bitmap, /*d2 = */ 0, /* b2 = */ slot);
|
||||
|
||||
testbit(r_array_index, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1);
|
||||
branch_optimized(bcondAllZero, L_failure);
|
||||
|
||||
const Register
|
||||
r_array_base = temp1,
|
||||
r_array_length = temp2;
|
||||
|
||||
// Get the first array index that can contain super_klass into r_array_index.
|
||||
// NOTE: Z_R1_scratch is holding bitmap (look above for r_bitmap). So let's try to save it.
|
||||
// On the other hand, r_array_base/temp1 is free at current moment (look at the load operation below).
|
||||
pop_count_long(r_array_index, r_array_index, temp1); // kills r_array_base/temp1 on machines older than z15
|
||||
|
||||
// The value i in r_array_index is >= 1, so even though r_array_base
|
||||
// points to the length, we don't need to adjust it to point to the data.
|
||||
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
|
||||
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");
|
||||
|
||||
// We will consult the secondary-super array.
|
||||
z_lg(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
|
||||
|
||||
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
|
||||
z_sllg(r_array_index, r_array_index, LogBytesPerWord); // scale, r_array_index is loaded by popcnt above
|
||||
|
||||
z_cg(r_super_klass, Address(r_array_base, r_array_index));
|
||||
branch_optimized(bcondEqual, L_done); // found a match
|
||||
|
||||
// Note: this is a small hack:
|
||||
//
|
||||
// The operation "(slot ^ 63) === 63 - slot (mod 64)" has already been performed above.
|
||||
// Since we lack a rotate-right instruction, we achieve the same effect by rotating left
|
||||
// by "64 - slot" positions. This produces the result equivalent to a right rotation by "slot" positions.
|
||||
//
|
||||
// => initial slot value
|
||||
// => slot = 63 - slot // done above with that z_xilf instruction
|
||||
// => slot = 64 - slot // need to do for rotating right by "slot" positions
|
||||
// => slot = 64 - (63 - slot)
|
||||
// => slot = slot - 63 + 64
|
||||
// => slot = slot + 1
|
||||
//
|
||||
// So instead of rotating-left by 64-slot times, we can, for now, just rotate left by slot+1 and it would be fine.
|
||||
|
||||
// Linear probe. Rotate the bitmap so that the next bit to test is
|
||||
// in Bit 1.
|
||||
z_aghi(slot, 1); // slot = slot + 1
|
||||
|
||||
z_rllg(r_bitmap, r_bitmap, /*d2=*/ 0, /*b2=*/ slot);
|
||||
testbit(r_bitmap, 1);
|
||||
branch_optimized(bcondAllZero, L_failure);
|
||||
|
||||
// The slot we just inspected is at secondary_supers[r_array_index - 1].
|
||||
// The next slot to be inspected, by the logic we're about to call,
|
||||
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
|
||||
// have been checked.
|
||||
lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index,
|
||||
r_bitmap, /*temp=*/ r_array_length, result, /*is_stub*/false);
|
||||
|
||||
// pass whatever we got from slow path
|
||||
z_bru(L_done);
|
||||
|
||||
bind(L_failure);
|
||||
z_lghi(result, 1); // load 1 to represent failure
|
||||
|
||||
bind(L_done);
|
||||
|
||||
BLOCK_COMMENT("} lookup_secondary_supers_table_var");
|
||||
|
||||
if (VerifySecondarySupers) {
|
||||
verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
|
||||
temp1, temp2, temp3);
|
||||
}
|
||||
}
|
||||
|
||||
// Called by code generated by check_klass_subtype_slow_path
|
||||
// above. This is called when there is a collision in the hashed
|
||||
// lookup in the secondary supers array.
|
||||
@ -3306,15 +3524,18 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
|
||||
Register r_array_base,
|
||||
Register r_array_index,
|
||||
Register r_bitmap,
|
||||
Register r_temp,
|
||||
Register r_result,
|
||||
Register r_temp1) {
|
||||
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp1);
|
||||
bool is_stub) {
|
||||
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp);
|
||||
|
||||
const Register
|
||||
r_array_length = r_temp1,
|
||||
r_array_length = r_temp,
|
||||
r_sub_klass = noreg;
|
||||
|
||||
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
|
||||
if(is_stub) {
|
||||
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
|
||||
}
|
||||
|
||||
BLOCK_COMMENT("lookup_secondary_supers_table_slow_path {");
|
||||
NearLabel L_done, L_failure;
|
||||
@ -3343,8 +3564,10 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
|
||||
{ // This is conventional linear probing, but instead of terminating
|
||||
// when a null entry is found in the table, we maintain a bitmap
|
||||
// in which a 0 indicates missing entries.
|
||||
// The check above guarantees there are 0s in the bitmap, so the loop
|
||||
// eventually terminates.
|
||||
// As long as the bitmap is not completely full,
|
||||
// array_length == popcount(bitmap). The array_length check above
|
||||
// guarantees there are 0s in the bitmap, so the loop eventually
|
||||
// terminates.
|
||||
|
||||
#ifdef ASSERT
|
||||
// r_result is set to 0 by lookup_secondary_supers_table.
|
||||
@ -3417,8 +3640,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
|
||||
const Register r_one = Z_R0_scratch;
|
||||
z_lghi(r_one, 1); // for locgr down there, to a load result for failure
|
||||
|
||||
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
|
||||
|
||||
BLOCK_COMMENT("verify_secondary_supers_table {");
|
||||
|
||||
Label L_passed, L_failure;
|
||||
|
||||
@ -694,7 +694,7 @@ class MacroAssembler: public Assembler {
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
Label* L_slow_path,
|
||||
RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
|
||||
Register super_check_offset = noreg);
|
||||
|
||||
// The rest of the type check; must be wired to a corresponding fast path.
|
||||
// It does not repeat the fast path logic, so don't use it standalone.
|
||||
@ -706,25 +706,62 @@ class MacroAssembler: public Assembler {
|
||||
Register Rarray_ptr, // tmp
|
||||
Register Rlength, // tmp
|
||||
Label* L_success,
|
||||
Label* L_failure);
|
||||
Label* L_failure,
|
||||
bool set_cond_codes = false);
|
||||
|
||||
void check_klass_subtype_slow_path_linear(Register sub_klass,
|
||||
Register super_klass,
|
||||
Register temp_reg,
|
||||
Register temp2_reg,
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
bool set_cond_codes = false);
|
||||
|
||||
void check_klass_subtype_slow_path_table(Register sub_klass,
|
||||
Register super_klass,
|
||||
Register temp_reg,
|
||||
Register temp2_reg,
|
||||
Register temp3_reg,
|
||||
Register temp4_reg,
|
||||
Register result_reg,
|
||||
Label* L_success,
|
||||
Label* L_failure,
|
||||
bool set_cond_codes = false);
|
||||
|
||||
// If r is valid, return r.
|
||||
// If r is invalid, remove a register r2 from available_regs, add r2
|
||||
// to regs_to_push, then return r2.
|
||||
Register allocate_if_noreg(const Register r,
|
||||
RegSetIterator<Register> &available_regs,
|
||||
RegSet ®s_to_push);
|
||||
|
||||
void repne_scan(Register r_addr, Register r_value, Register r_count, Register r_scratch);
|
||||
|
||||
void lookup_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register r_temp1,
|
||||
Register r_temp2,
|
||||
Register r_temp3,
|
||||
Register r_temp4,
|
||||
Register r_result,
|
||||
u1 super_klass_slot);
|
||||
// Secondary subtype checking
|
||||
void lookup_secondary_supers_table_var(Register sub_klass,
|
||||
Register r_super_klass,
|
||||
Register temp1,
|
||||
Register temp2,
|
||||
Register temp3,
|
||||
Register temp4,
|
||||
Register result);
|
||||
|
||||
void lookup_secondary_supers_table_const(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register r_temp1,
|
||||
Register r_temp2,
|
||||
Register r_temp3,
|
||||
Register r_temp4,
|
||||
Register r_result,
|
||||
u1 super_klass_slot);
|
||||
|
||||
void lookup_secondary_supers_table_slow_path(Register r_super_klass,
|
||||
Register r_array_base,
|
||||
Register r_array_index,
|
||||
Register r_bitmap,
|
||||
Register r_temp,
|
||||
Register r_result,
|
||||
Register r_temp1);
|
||||
bool is_stub);
|
||||
|
||||
void verify_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
|
||||
@ -9979,8 +9979,9 @@ instruct ShouldNotReachHere() %{
|
||||
instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, flagsReg pcc,
|
||||
rarg4RegP scratch1, rarg5RegP scratch2) %{
|
||||
match(Set index (PartialSubtypeCheck sub super));
|
||||
predicate(!UseSecondarySupersTable);
|
||||
effect(KILL pcc, KILL scratch1, KILL scratch2);
|
||||
ins_cost(10 * DEFAULT_COST);
|
||||
ins_cost(20 * DEFAULT_COST); // slightly larger than the next version
|
||||
// TODO: s390 port size(FIXED_SIZE);
|
||||
format %{ " CALL PartialSubtypeCheck\n" %}
|
||||
ins_encode %{
|
||||
@ -9991,21 +9992,45 @@ instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, fl
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
// Two versions of partialSubtypeCheck, both used when we need to
|
||||
// search for a super class in the secondary supers array. The first
|
||||
// is used when we don't know _a priori_ the class being searched
|
||||
// for. The second, far more common, is used when we do know: this is
|
||||
// used for instanceof, checkcast, and any case where C2 can determine
|
||||
// it by constant propagation.
|
||||
instruct partialSubtypeCheckVarSuper(rarg2RegP sub, rarg3RegP super,
|
||||
r11TempRegP result,
|
||||
rarg1RegP temp1, rarg4RegP temp2, rarg5RegP temp3, r10TempRegP temp4,
|
||||
flagsReg pcc) %{
|
||||
match(Set result (PartialSubtypeCheck sub super));
|
||||
predicate(UseSecondarySupersTable);
|
||||
effect(KILL pcc, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
|
||||
ins_cost(10 * DEFAULT_COST); // slightly larger than the next version
|
||||
format %{ "partialSubtypeCheck $result, $sub, $super" %}
|
||||
ins_encode %{
|
||||
__ lookup_secondary_supers_table_var($sub$$Register, $super$$Register,
|
||||
$temp1$$Register, $temp2$$Register, $temp3$$Register, $temp4$$Register,
|
||||
$result$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
|
||||
instruct partialSubtypeCheckConstSuper(rarg2RegP sub, rarg1RegP super, immP super_con,
|
||||
r11TempRegP result, rarg5RegP temp1, rarg4RegP temp2,
|
||||
rarg3RegP temp3, r10TempRegP temp4, flagsReg pcc) %{
|
||||
match(Set result (PartialSubtypeCheck sub (Binary super super_con)));
|
||||
predicate(UseSecondarySupersTable);
|
||||
effect(KILL pcc, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
|
||||
ins_cost(7 * DEFAULT_COST); // needs to be less than competing nodes
|
||||
ins_cost(5 * DEFAULT_COST); // smaller than the next version
|
||||
format %{ "partialSubtypeCheck $result, $sub, $super, $super_con" %}
|
||||
|
||||
ins_encode %{
|
||||
u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
|
||||
if (InlineSecondarySupersTest) {
|
||||
__ lookup_secondary_supers_table($sub$$Register, $super$$Register,
|
||||
$temp1$$Register, $temp2$$Register, $temp3$$Register,
|
||||
$temp4$$Register, $result$$Register, super_klass_slot);
|
||||
__ lookup_secondary_supers_table_const($sub$$Register, $super$$Register,
|
||||
$temp1$$Register, $temp2$$Register, $temp3$$Register,
|
||||
$temp4$$Register, $result$$Register, super_klass_slot);
|
||||
} else {
|
||||
AddressLiteral stub_address(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot));
|
||||
__ load_const_optimized(Z_ARG4, stub_address);
|
||||
@ -10017,21 +10042,6 @@ instruct partialSubtypeCheckConstSuper(rarg2RegP sub, rarg1RegP super, immP supe
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct partialSubtypeCheck_vs_zero(flagsReg pcc, rarg2RegP sub, rarg3RegP super, immP0 zero,
|
||||
rarg1RegP index, rarg4RegP scratch1, rarg5RegP scratch2) %{
|
||||
match(Set pcc (CmpI (PartialSubtypeCheck sub super) zero));
|
||||
effect(KILL scratch1, KILL scratch2, KILL index);
|
||||
ins_cost(10 * DEFAULT_COST);
|
||||
// TODO: s390 port size(FIXED_SIZE);
|
||||
format %{ "CALL PartialSubtypeCheck_vs_zero\n" %}
|
||||
ins_encode %{
|
||||
AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check());
|
||||
__ load_const_optimized(Z_ARG4, stub_address);
|
||||
__ z_basr(Z_R14, Z_ARG4);
|
||||
%}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
// inlined locking and unlocking
|
||||
|
||||
|
||||
@ -635,9 +635,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
r_result = Z_R11;
|
||||
address start = __ pc();
|
||||
|
||||
__ lookup_secondary_supers_table(r_sub_klass, r_super_klass,
|
||||
r_array_base, r_array_length, r_array_index,
|
||||
r_bitmap, r_result, super_klass_index);
|
||||
__ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass,
|
||||
r_array_base, r_array_length, r_array_index,
|
||||
r_bitmap, r_result, super_klass_index);
|
||||
|
||||
__ z_br(Z_R14);
|
||||
|
||||
@ -659,7 +659,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
r_result = Z_R11;
|
||||
|
||||
__ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base,
|
||||
r_array_index, r_bitmap, r_result, r_temp1);
|
||||
r_array_index, r_bitmap, r_temp1, r_result, /* is_stub */ true);
|
||||
|
||||
__ z_br(Z_R14);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user