8339983: [s390x] secondary_super_cache does not scale well: C1 and interpreter

Reviewed-by: lucy, aph
This commit is contained in:
Amit Kumar 2024-12-03 04:06:39 +00:00
parent e023addf70
commit a3b58ee5cd
6 changed files with 379 additions and 108 deletions

View File

@ -2539,13 +2539,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
} else {
bool need_slow_path = !k->is_loaded() ||
((int) k->super_check_offset() == in_bytes(Klass::secondary_super_cache_offset()));
intptr_t super_check_offset = k->is_loaded() ? k->super_check_offset() : -1L;
__ load_klass(klass_RInfo, obj);
// Perform the fast part of the checking logic.
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1,
(need_slow_path ? success_target : nullptr),
failure_target, nullptr,
RegisterOrConstant(super_check_offset));
failure_target, nullptr);
if (need_slow_path) {
// Call out-of-line instance of __ check_klass_subtype_slow_path(...):
address a = Runtime1::entry_for (C1StubId::slow_subtype_check_id);

View File

@ -557,7 +557,12 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
__ z_lg(Rsubklass, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
__ z_lg(Rsuperklass, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP);
__ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, Rarray_ptr, Rlength, nullptr, &miss);
__ check_klass_subtype_slow_path(Rsubklass,
Rsuperklass,
Rarray_ptr /* temp_reg */,
Rlength /* temp2_reg */,
nullptr /* L_success */,
&miss /* L_failure */);
// Match falls through here.
i = 0;

View File

@ -2981,21 +2981,15 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterOrConstant super_check_offset) {
Register super_check_offset) {
// Input registers must not overlap.
assert_different_registers(sub_klass, super_klass, temp1_reg, super_check_offset);
const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
const int sco_offset = in_bytes(Klass::super_check_offset_offset());
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
bool need_slow_path = (must_load_sco ||
super_check_offset.constant_or_zero() == sc_offset);
bool must_load_sco = ! super_check_offset->is_valid();
// Input registers must not overlap.
assert_different_registers(sub_klass, super_klass, temp1_reg);
if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} else if (must_load_sco) {
if (must_load_sco) {
assert(temp1_reg != noreg, "supply either a temp or a register offset");
}
@ -3006,9 +3000,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1 ||
(L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
"at most one null in the batch, usually");
assert(label_nulls <= 1 || (L_slow_path == &L_fallthrough && label_nulls <= 2), "at most one null in the batch, usually");
BLOCK_COMMENT("check_klass_subtype_fast_path {");
// If the pointers are equal, we are done (e.g., String[] elements).
@ -3023,10 +3015,12 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
// Check the supertype display, which is uint.
if (must_load_sco) {
z_llgf(Rsuper_check_offset, sco_offset, super_klass);
super_check_offset = RegisterOrConstant(Rsuper_check_offset);
super_check_offset = Rsuper_check_offset;
}
Address super_check_addr(sub_klass, super_check_offset, 0);
z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
branch_optimized(Assembler::bcondEqual, *L_success);
// This check has worked decisively for primary supers.
// Secondary supers are sought in the super_cache ('super_cache_addr').
@ -3044,46 +3038,27 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
if (&(label) == &L_fallthrough) { /*do nothing*/ } \
else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
if (super_check_offset.is_register()) {
branch_optimized(Assembler::bcondEqual, *L_success);
z_cfi(super_check_offset.as_register(), sc_offset);
if (L_failure == &L_fallthrough) {
branch_optimized(Assembler::bcondEqual, *L_slow_path);
} else {
branch_optimized(Assembler::bcondNotEqual, *L_failure);
final_jmp(*L_slow_path);
}
} else if (super_check_offset.as_constant() == sc_offset) {
// Need a slow path; fast failure is impossible.
if (L_slow_path == &L_fallthrough) {
branch_optimized(Assembler::bcondEqual, *L_success);
} else {
branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
final_jmp(*L_success);
}
z_cfi(super_check_offset, in_bytes(Klass::secondary_super_cache_offset()));
if (L_failure == &L_fallthrough) {
branch_optimized(Assembler::bcondEqual, *L_slow_path);
} else {
// No slow path; it's a fast decision.
if (L_failure == &L_fallthrough) {
branch_optimized(Assembler::bcondEqual, *L_success);
} else {
branch_optimized(Assembler::bcondNotEqual, *L_failure);
final_jmp(*L_success);
}
branch_optimized(Assembler::bcondNotEqual, *L_failure);
final_jmp(*L_slow_path);
}
bind(L_fallthrough);
#undef local_brc
#undef final_jmp
BLOCK_COMMENT("} check_klass_subtype_fast_path");
// fallthru (to slow path)
}
void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
Register Rsuperklass,
Register Rarray_ptr, // tmp
Register Rlength, // tmp
Label* L_success,
Label* L_failure) {
void MacroAssembler::check_klass_subtype_slow_path_linear(Register Rsubklass,
Register Rsuperklass,
Register Rarray_ptr, // tmp
Register Rlength, // tmp
Label* L_success,
Label* L_failure,
bool set_cond_codes /* unused */) {
// Input registers must not overlap.
// Also check for R1 which is explicitly used here.
assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
@ -3106,7 +3081,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
NearLabel loop_iterate, loop_count, match;
BLOCK_COMMENT("check_klass_subtype_slow_path {");
BLOCK_COMMENT("check_klass_subtype_slow_path_linear {");
z_lg(Rarray_ptr, ss_offset, Rsubklass);
load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
@ -3134,18 +3109,151 @@ void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
branch_optimized(Assembler::bcondAlways, *L_failure);
// Got a hit. Return success (zero result). Set cache.
// Cache load doesn't happen here. For speed it is directly emitted by the compiler.
// Cache load doesn't happen here. For speed, it is directly emitted by the compiler.
BIND(match);
z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
if (UseSecondarySupersCache) {
z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
}
final_jmp(*L_success);
// Exit to the surrounding code.
BIND(L_fallthrough);
#undef local_brc
#undef final_jmp
BLOCK_COMMENT("} check_klass_subtype_slow_path_linear");
}
// If Register r is invalid, remove a new register from
// available_regs, and add new register to regs_to_push.
Register MacroAssembler::allocate_if_noreg(Register r,
RegSetIterator<Register> &available_regs,
RegSet &regs_to_push) {
if (!r->is_valid()) {
r = *available_regs++;
regs_to_push += r;
}
return r;
}
// check_klass_subtype_slow_path_table() looks for super_klass in the
// hash table belonging to super_klass, branching to L_success or
// L_failure as appropriate. This is essentially a shim which
// allocates registers as necessary and then calls
// lookup_secondary_supers_table() to do the work. Any of the temp
// regs may be noreg, in which case this logic will choose some
// registers push and pop them from the stack.
void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Register temp3_reg,
Register temp4_reg,
Register result_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes) {
BLOCK_COMMENT("check_klass_subtype_slow_path_table {");
RegSet temps = RegSet::of(temp_reg, temp2_reg, temp3_reg, temp4_reg);
assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp4_reg);
Label L_fallthrough;
int label_nulls = 0;
if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one null in the batch");
RegSetIterator<Register> available_regs
// Z_R0 will be used to hold Z_R15(Z_SP) while pushing a new frame, So don't use that here.
// Z_R1 will be used to hold r_bitmap in lookup_secondary_supers_table_var, so can't be used
// Z_R2, Z_R3, Z_R4 will be used in secondary_supers_verify, for the failure reporting
= (RegSet::range(Z_R0, Z_R15) - temps - sub_klass - super_klass - Z_R1_scratch - Z_R0_scratch - Z_R2 - Z_R3 - Z_R4).begin();
RegSet pushed_regs;
temp_reg = allocate_if_noreg(temp_reg, available_regs, pushed_regs);
temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs);
temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs);;
temp4_reg = allocate_if_noreg(temp4_reg, available_regs, pushed_regs);
result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs);
const int frame_size = pushed_regs.size() * BytesPerWord + frame::z_abi_160_size;
// Push & save registers
{
int i = 0;
save_return_pc();
push_frame(frame_size);
for (auto it = pushed_regs.begin(); *it != noreg; i++) {
z_stg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP);
}
assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity");
}
lookup_secondary_supers_table_var(sub_klass,
super_klass,
temp_reg, temp2_reg, temp3_reg, temp4_reg, result_reg);
// NOTE: Condition Code should not be altered before jump instruction below !!!!
z_cghi(result_reg, 0);
{
int i = 0;
for (auto it = pushed_regs.begin(); *it != noreg; ++i) {
z_lg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP);
}
assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity");
pop_frame();
restore_return_pc();
}
// NB! Callers may assume that, when set_cond_codes is true, this
// code sets temp2_reg to a nonzero value.
if (set_cond_codes) {
z_lghi(temp2_reg, 1);
}
branch_optimized(bcondNotEqual, *L_failure);
if(L_success != &L_fallthrough) {
z_bru(*L_success);
}
bind(L_fallthrough);
BLOCK_COMMENT("} check_klass_subtype_slow_path_table");
}
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes) {
BLOCK_COMMENT("check_klass_subtype_slow_path {");
if (UseSecondarySupersTable) {
check_klass_subtype_slow_path_table(sub_klass,
super_klass,
temp_reg,
temp2_reg,
/*temp3*/noreg,
/*temp4*/noreg,
/*result*/noreg,
L_success,
L_failure,
set_cond_codes);
} else {
check_klass_subtype_slow_path_linear(sub_klass,
super_klass,
temp_reg,
temp2_reg,
L_success,
L_failure,
set_cond_codes);
}
BLOCK_COMMENT("} check_klass_subtype_slow_path");
}
@ -3206,17 +3314,17 @@ do { \
} while(0)
// Note: this method also kills Z_R1_scratch register on machines older than z15
void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register r_temp1,
Register r_temp2,
Register r_temp3,
Register r_temp4,
Register r_result,
u1 super_klass_slot) {
void MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass,
Register r_super_klass,
Register r_temp1,
Register r_temp2,
Register r_temp3,
Register r_temp4,
Register r_result,
u1 super_klass_slot) {
NearLabel L_done, L_failure;
BLOCK_COMMENT("lookup_secondary_supers_table {");
BLOCK_COMMENT("lookup_secondary_supers_table_const {");
const Register
r_array_base = r_temp1,
@ -3291,7 +3399,7 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
z_lghi(r_result, 1);
bind(L_done);
BLOCK_COMMENT("} lookup_secondary_supers_table");
BLOCK_COMMENT("} lookup_secondary_supers_table_const");
if (VerifySecondarySupers) {
verify_secondary_supers_table(r_sub_klass, r_super_klass, r_result,
@ -3299,6 +3407,116 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
}
}
// At runtime, return 0 in result if r_super_klass is a superclass of
// r_sub_klass, otherwise return nonzero. Use this version of
// lookup_secondary_supers_table() if you don't know ahead of time
// which superclass will be searched for. Used by interpreter and
// runtime stubs. It is larger and has somewhat greater latency than
// the version above, which takes a constant super_klass_slot.
void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
Register temp4,
Register result) {
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result, Z_R1_scratch);
Label L_done, L_failure;
BLOCK_COMMENT("lookup_secondary_supers_table_var {");
const Register
r_array_index = temp3,
slot = temp4, // NOTE: "slot" can't be Z_R0 otherwise z_sllg and z_rllg instructions below will mess up!!!!
r_bitmap = Z_R1_scratch;
z_llgc(slot, Address(r_super_klass, Klass::hash_slot_offset()));
// Initialize r_result with 0 (indicating success). If searching fails, r_result will be loaded
// with 1 (failure) at the end of this method.
clear_reg(result, true /* whole_reg */, false /* set_cc */); // result = 0
z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset()));
// First check the bitmap to see if super_klass might be present. If
// the bit is zero, we are certain that super_klass is not one of
// the secondary supers.
z_xilf(slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); // slot ^ 63 === 63 - slot (mod 64)
z_sllg(r_array_index, r_bitmap, /*d2 = */ 0, /* b2 = */ slot);
testbit(r_array_index, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1);
branch_optimized(bcondAllZero, L_failure);
const Register
r_array_base = temp1,
r_array_length = temp2;
// Get the first array index that can contain super_klass into r_array_index.
// NOTE: Z_R1_scratch is holding bitmap (look above for r_bitmap). So let's try to save it.
// On the other hand, r_array_base/temp1 is free at current moment (look at the load operation below).
pop_count_long(r_array_index, r_array_index, temp1); // kills r_array_base/temp1 on machines older than z15
// The value i in r_array_index is >= 1, so even though r_array_base
// points to the length, we don't need to adjust it to point to the data.
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");
// We will consult the secondary-super array.
z_lg(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
z_sllg(r_array_index, r_array_index, LogBytesPerWord); // scale, r_array_index is loaded by popcnt above
z_cg(r_super_klass, Address(r_array_base, r_array_index));
branch_optimized(bcondEqual, L_done); // found a match
// Note: this is a small hack:
//
// The operation "(slot ^ 63) === 63 - slot (mod 64)" has already been performed above.
// Since we lack a rotate-right instruction, we achieve the same effect by rotating left
// by "64 - slot" positions. This produces the result equivalent to a right rotation by "slot" positions.
//
// => initial slot value
// => slot = 63 - slot // done above with that z_xilf instruction
// => slot = 64 - slot // need to do for rotating right by "slot" positions
// => slot = 64 - (63 - slot)
// => slot = slot - 63 + 64
// => slot = slot + 1
//
// So instead of rotating-left by 64-slot times, we can, for now, just rotate left by slot+1 and it would be fine.
// Linear probe. Rotate the bitmap so that the next bit to test is
// in Bit 1.
z_aghi(slot, 1); // slot = slot + 1
z_rllg(r_bitmap, r_bitmap, /*d2=*/ 0, /*b2=*/ slot);
testbit(r_bitmap, 1);
branch_optimized(bcondAllZero, L_failure);
// The slot we just inspected is at secondary_supers[r_array_index - 1].
// The next slot to be inspected, by the logic we're about to call,
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
// have been checked.
lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index,
r_bitmap, /*temp=*/ r_array_length, result, /*is_stub*/false);
// pass whatever we got from slow path
z_bru(L_done);
bind(L_failure);
z_lghi(result, 1); // load 1 to represent failure
bind(L_done);
BLOCK_COMMENT("} lookup_secondary_supers_table_var");
if (VerifySecondarySupers) {
verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
temp1, temp2, temp3);
}
}
// Called by code generated by check_klass_subtype_slow_path
// above. This is called when there is a collision in the hashed
// lookup in the secondary supers array.
@ -3306,15 +3524,18 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
Register r_array_base,
Register r_array_index,
Register r_bitmap,
Register r_temp,
Register r_result,
Register r_temp1) {
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp1);
bool is_stub) {
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp);
const Register
r_array_length = r_temp1,
r_array_length = r_temp,
r_sub_klass = noreg;
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
if(is_stub) {
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
}
BLOCK_COMMENT("lookup_secondary_supers_table_slow_path {");
NearLabel L_done, L_failure;
@ -3343,8 +3564,10 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
{ // This is conventional linear probing, but instead of terminating
// when a null entry is found in the table, we maintain a bitmap
// in which a 0 indicates missing entries.
// The check above guarantees there are 0s in the bitmap, so the loop
// eventually terminates.
// As long as the bitmap is not completely full,
// array_length == popcount(bitmap). The array_length check above
// guarantees there are 0s in the bitmap, so the loop eventually
// terminates.
#ifdef ASSERT
// r_result is set to 0 by lookup_secondary_supers_table.
@ -3417,8 +3640,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
const Register r_one = Z_R0_scratch;
z_lghi(r_one, 1); // for locgr down there, to a load result for failure
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
BLOCK_COMMENT("verify_secondary_supers_table {");
Label L_passed, L_failure;

View File

@ -694,7 +694,7 @@ class MacroAssembler: public Assembler {
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
Register super_check_offset = noreg);
// The rest of the type check; must be wired to a corresponding fast path.
// It does not repeat the fast path logic, so don't use it standalone.
@ -706,25 +706,62 @@ class MacroAssembler: public Assembler {
Register Rarray_ptr, // tmp
Register Rlength, // tmp
Label* L_success,
Label* L_failure);
Label* L_failure,
bool set_cond_codes = false);
void check_klass_subtype_slow_path_linear(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes = false);
void check_klass_subtype_slow_path_table(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Register temp3_reg,
Register temp4_reg,
Register result_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes = false);
// If r is valid, return r.
// If r is invalid, remove a register r2 from available_regs, add r2
// to regs_to_push, then return r2.
Register allocate_if_noreg(const Register r,
RegSetIterator<Register> &available_regs,
RegSet &regs_to_push);
void repne_scan(Register r_addr, Register r_value, Register r_count, Register r_scratch);
void lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register r_temp1,
Register r_temp2,
Register r_temp3,
Register r_temp4,
Register r_result,
u1 super_klass_slot);
// Secondary subtype checking
void lookup_secondary_supers_table_var(Register sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
Register temp4,
Register result);
void lookup_secondary_supers_table_const(Register r_sub_klass,
Register r_super_klass,
Register r_temp1,
Register r_temp2,
Register r_temp3,
Register r_temp4,
Register r_result,
u1 super_klass_slot);
void lookup_secondary_supers_table_slow_path(Register r_super_klass,
Register r_array_base,
Register r_array_index,
Register r_bitmap,
Register r_temp,
Register r_result,
Register r_temp1);
bool is_stub);
void verify_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,

View File

@ -9979,8 +9979,9 @@ instruct ShouldNotReachHere() %{
instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, flagsReg pcc,
rarg4RegP scratch1, rarg5RegP scratch2) %{
match(Set index (PartialSubtypeCheck sub super));
predicate(!UseSecondarySupersTable);
effect(KILL pcc, KILL scratch1, KILL scratch2);
ins_cost(10 * DEFAULT_COST);
ins_cost(20 * DEFAULT_COST); // slightly larger than the next version
// TODO: s390 port size(FIXED_SIZE);
format %{ " CALL PartialSubtypeCheck\n" %}
ins_encode %{
@ -9991,21 +9992,45 @@ instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, fl
ins_pipe(pipe_class_dummy);
%}
// Two versions of partialSubtypeCheck, both used when we need to
// search for a super class in the secondary supers array. The first
// is used when we don't know _a priori_ the class being searched
// for. The second, far more common, is used when we do know: this is
// used for instanceof, checkcast, and any case where C2 can determine
// it by constant propagation.
instruct partialSubtypeCheckVarSuper(rarg2RegP sub, rarg3RegP super,
r11TempRegP result,
rarg1RegP temp1, rarg4RegP temp2, rarg5RegP temp3, r10TempRegP temp4,
flagsReg pcc) %{
match(Set result (PartialSubtypeCheck sub super));
predicate(UseSecondarySupersTable);
effect(KILL pcc, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
ins_cost(10 * DEFAULT_COST); // slightly larger than the next version
format %{ "partialSubtypeCheck $result, $sub, $super" %}
ins_encode %{
__ lookup_secondary_supers_table_var($sub$$Register, $super$$Register,
$temp1$$Register, $temp2$$Register, $temp3$$Register, $temp4$$Register,
$result$$Register);
%}
ins_pipe(pipe_class_dummy);
%}
instruct partialSubtypeCheckConstSuper(rarg2RegP sub, rarg1RegP super, immP super_con,
r11TempRegP result, rarg5RegP temp1, rarg4RegP temp2,
rarg3RegP temp3, r10TempRegP temp4, flagsReg pcc) %{
match(Set result (PartialSubtypeCheck sub (Binary super super_con)));
predicate(UseSecondarySupersTable);
effect(KILL pcc, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
ins_cost(7 * DEFAULT_COST); // needs to be less than competing nodes
ins_cost(5 * DEFAULT_COST); // smaller than the next version
format %{ "partialSubtypeCheck $result, $sub, $super, $super_con" %}
ins_encode %{
u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
if (InlineSecondarySupersTest) {
__ lookup_secondary_supers_table($sub$$Register, $super$$Register,
$temp1$$Register, $temp2$$Register, $temp3$$Register,
$temp4$$Register, $result$$Register, super_klass_slot);
__ lookup_secondary_supers_table_const($sub$$Register, $super$$Register,
$temp1$$Register, $temp2$$Register, $temp3$$Register,
$temp4$$Register, $result$$Register, super_klass_slot);
} else {
AddressLiteral stub_address(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot));
__ load_const_optimized(Z_ARG4, stub_address);
@ -10017,21 +10042,6 @@ instruct partialSubtypeCheckConstSuper(rarg2RegP sub, rarg1RegP super, immP supe
ins_pipe(pipe_class_dummy);
%}
instruct partialSubtypeCheck_vs_zero(flagsReg pcc, rarg2RegP sub, rarg3RegP super, immP0 zero,
rarg1RegP index, rarg4RegP scratch1, rarg5RegP scratch2) %{
match(Set pcc (CmpI (PartialSubtypeCheck sub super) zero));
effect(KILL scratch1, KILL scratch2, KILL index);
ins_cost(10 * DEFAULT_COST);
// TODO: s390 port size(FIXED_SIZE);
format %{ "CALL PartialSubtypeCheck_vs_zero\n" %}
ins_encode %{
AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check());
__ load_const_optimized(Z_ARG4, stub_address);
__ z_basr(Z_R14, Z_ARG4);
%}
ins_pipe(pipe_class_dummy);
%}
// ============================================================================
// inlined locking and unlocking

View File

@ -635,9 +635,9 @@ class StubGenerator: public StubCodeGenerator {
r_result = Z_R11;
address start = __ pc();
__ lookup_secondary_supers_table(r_sub_klass, r_super_klass,
r_array_base, r_array_length, r_array_index,
r_bitmap, r_result, super_klass_index);
__ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass,
r_array_base, r_array_length, r_array_index,
r_bitmap, r_result, super_klass_index);
__ z_br(Z_R14);
@ -659,7 +659,7 @@ class StubGenerator: public StubCodeGenerator {
r_result = Z_R11;
__ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base,
r_array_index, r_bitmap, r_result, r_temp1);
r_array_index, r_bitmap, r_temp1, r_result, /* is_stub */ true);
__ z_br(Z_R14);