diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index 213aa5efe1e..dee64d3db26 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -2539,13 +2539,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L } else { bool need_slow_path = !k->is_loaded() || ((int) k->super_check_offset() == in_bytes(Klass::secondary_super_cache_offset())); - intptr_t super_check_offset = k->is_loaded() ? k->super_check_offset() : -1L; __ load_klass(klass_RInfo, obj); // Perform the fast part of the checking logic. __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, (need_slow_path ? success_target : nullptr), - failure_target, nullptr, - RegisterOrConstant(super_check_offset)); + failure_target, nullptr); if (need_slow_path) { // Call out-of-line instance of __ check_klass_subtype_slow_path(...): address a = Runtime1::entry_for (C1StubId::slow_subtype_check_id); diff --git a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp index 8b30adb4785..0ada76ccef7 100644 --- a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp +++ b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp @@ -557,7 +557,12 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) { __ z_lg(Rsubklass, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP); __ z_lg(Rsuperklass, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP); - __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, Rarray_ptr, Rlength, nullptr, &miss); + __ check_klass_subtype_slow_path(Rsubklass, + Rsuperklass, + Rarray_ptr /* temp_reg */, + Rlength /* temp2_reg */, + nullptr /* L_success */, + &miss /* L_failure */); // Match falls through here. i = 0; diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 9e1c5cbced3..aacfb894c72 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -2981,21 +2981,15 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterOrConstant super_check_offset) { + Register super_check_offset) { + // Input registers must not overlap. + assert_different_registers(sub_klass, super_klass, temp1_reg, super_check_offset); - const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); const int sco_offset = in_bytes(Klass::super_check_offset_offset()); - - bool must_load_sco = (super_check_offset.constant_or_zero() == -1); - bool need_slow_path = (must_load_sco || - super_check_offset.constant_or_zero() == sc_offset); + bool must_load_sco = ! super_check_offset->is_valid(); // Input registers must not overlap. - assert_different_registers(sub_klass, super_klass, temp1_reg); - if (super_check_offset.is_register()) { - assert_different_registers(sub_klass, super_klass, - super_check_offset.as_register()); - } else if (must_load_sco) { + if (must_load_sco) { assert(temp1_reg != noreg, "supply either a temp or a register offset"); } @@ -3006,9 +3000,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } - assert(label_nulls <= 1 || - (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), - "at most one null in the batch, usually"); + assert(label_nulls <= 1 || (L_slow_path == &L_fallthrough && label_nulls <= 2), "at most one null in the batch, usually"); BLOCK_COMMENT("check_klass_subtype_fast_path {"); // If the pointers are equal, we are done (e.g., String[] elements). @@ -3023,10 +3015,12 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, // Check the supertype display, which is uint. if (must_load_sco) { z_llgf(Rsuper_check_offset, sco_offset, super_klass); - super_check_offset = RegisterOrConstant(Rsuper_check_offset); + super_check_offset = Rsuper_check_offset; } + Address super_check_addr(sub_klass, super_check_offset, 0); z_cg(super_klass, super_check_addr); // compare w/ displayed supertype + branch_optimized(Assembler::bcondEqual, *L_success); // This check has worked decisively for primary supers. // Secondary supers are sought in the super_cache ('super_cache_addr'). @@ -3044,46 +3038,27 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, if (&(label) == &L_fallthrough) { /*do nothing*/ } \ else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ - if (super_check_offset.is_register()) { - branch_optimized(Assembler::bcondEqual, *L_success); - z_cfi(super_check_offset.as_register(), sc_offset); - if (L_failure == &L_fallthrough) { - branch_optimized(Assembler::bcondEqual, *L_slow_path); - } else { - branch_optimized(Assembler::bcondNotEqual, *L_failure); - final_jmp(*L_slow_path); - } - } else if (super_check_offset.as_constant() == sc_offset) { - // Need a slow path; fast failure is impossible. - if (L_slow_path == &L_fallthrough) { - branch_optimized(Assembler::bcondEqual, *L_success); - } else { - branch_optimized(Assembler::bcondNotEqual, *L_slow_path); - final_jmp(*L_success); - } + z_cfi(super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); + if (L_failure == &L_fallthrough) { + branch_optimized(Assembler::bcondEqual, *L_slow_path); } else { - // No slow path; it's a fast decision. - if (L_failure == &L_fallthrough) { - branch_optimized(Assembler::bcondEqual, *L_success); - } else { - branch_optimized(Assembler::bcondNotEqual, *L_failure); - final_jmp(*L_success); - } + branch_optimized(Assembler::bcondNotEqual, *L_failure); + final_jmp(*L_slow_path); } bind(L_fallthrough); -#undef local_brc #undef final_jmp BLOCK_COMMENT("} check_klass_subtype_fast_path"); // fallthru (to slow path) } -void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, - Register Rsuperklass, - Register Rarray_ptr, // tmp - Register Rlength, // tmp - Label* L_success, - Label* L_failure) { +void MacroAssembler::check_klass_subtype_slow_path_linear(Register Rsubklass, + Register Rsuperklass, + Register Rarray_ptr, // tmp + Register Rlength, // tmp + Label* L_success, + Label* L_failure, + bool set_cond_codes /* unused */) { // Input registers must not overlap. // Also check for R1 which is explicitly used here. assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); @@ -3106,7 +3081,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, NearLabel loop_iterate, loop_count, match; - BLOCK_COMMENT("check_klass_subtype_slow_path {"); + BLOCK_COMMENT("check_klass_subtype_slow_path_linear {"); z_lg(Rarray_ptr, ss_offset, Rsubklass); load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); @@ -3134,18 +3109,151 @@ void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, branch_optimized(Assembler::bcondAlways, *L_failure); // Got a hit. Return success (zero result). Set cache. - // Cache load doesn't happen here. For speed it is directly emitted by the compiler. + // Cache load doesn't happen here. For speed, it is directly emitted by the compiler. BIND(match); - z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. - + if (UseSecondarySupersCache) { + z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. + } final_jmp(*L_success); // Exit to the surrounding code. BIND(L_fallthrough); -#undef local_brc #undef final_jmp + BLOCK_COMMENT("} check_klass_subtype_slow_path_linear"); +} + +// If Register r is invalid, remove a new register from +// available_regs, and add new register to regs_to_push. +Register MacroAssembler::allocate_if_noreg(Register r, + RegSetIterator &available_regs, + RegSet ®s_to_push) { + if (!r->is_valid()) { + r = *available_regs++; + regs_to_push += r; + } + return r; +} + +// check_klass_subtype_slow_path_table() looks for super_klass in the +// hash table belonging to super_klass, branching to L_success or +// L_failure as appropriate. This is essentially a shim which +// allocates registers as necessary and then calls +// lookup_secondary_supers_table() to do the work. Any of the temp +// regs may be noreg, in which case this logic will choose some +// registers push and pop them from the stack. +void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register temp4_reg, + Register result_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + BLOCK_COMMENT("check_klass_subtype_slow_path_table {"); + + RegSet temps = RegSet::of(temp_reg, temp2_reg, temp3_reg, temp4_reg); + + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp4_reg); + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + + RegSetIterator available_regs + // Z_R0 will be used to hold Z_R15(Z_SP) while pushing a new frame, So don't use that here. + // Z_R1 will be used to hold r_bitmap in lookup_secondary_supers_table_var, so can't be used + // Z_R2, Z_R3, Z_R4 will be used in secondary_supers_verify, for the failure reporting + = (RegSet::range(Z_R0, Z_R15) - temps - sub_klass - super_klass - Z_R1_scratch - Z_R0_scratch - Z_R2 - Z_R3 - Z_R4).begin(); + + RegSet pushed_regs; + + temp_reg = allocate_if_noreg(temp_reg, available_regs, pushed_regs); + temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs); + temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs);; + temp4_reg = allocate_if_noreg(temp4_reg, available_regs, pushed_regs); + result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs); + + const int frame_size = pushed_regs.size() * BytesPerWord + frame::z_abi_160_size; + + // Push & save registers + { + int i = 0; + save_return_pc(); + push_frame(frame_size); + + for (auto it = pushed_regs.begin(); *it != noreg; i++) { + z_stg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP); + } + assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity"); + } + + lookup_secondary_supers_table_var(sub_klass, + super_klass, + temp_reg, temp2_reg, temp3_reg, temp4_reg, result_reg); + + // NOTE: Condition Code should not be altered before jump instruction below !!!! + z_cghi(result_reg, 0); + + { + int i = 0; + for (auto it = pushed_regs.begin(); *it != noreg; ++i) { + z_lg(*it++, i * BytesPerWord + frame::z_abi_160_size, Z_SP); + } + assert(i * BytesPerWord + frame::z_abi_160_size == frame_size, "sanity"); + pop_frame(); + restore_return_pc(); + } + + // NB! Callers may assume that, when set_cond_codes is true, this + // code sets temp2_reg to a nonzero value. + if (set_cond_codes) { + z_lghi(temp2_reg, 1); + } + + branch_optimized(bcondNotEqual, *L_failure); + + if(L_success != &L_fallthrough) { + z_bru(*L_success); + } + + bind(L_fallthrough); + BLOCK_COMMENT("} check_klass_subtype_slow_path_table"); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + BLOCK_COMMENT("check_klass_subtype_slow_path {"); + if (UseSecondarySupersTable) { + check_klass_subtype_slow_path_table(sub_klass, + super_klass, + temp_reg, + temp2_reg, + /*temp3*/noreg, + /*temp4*/noreg, + /*result*/noreg, + L_success, + L_failure, + set_cond_codes); + } else { + check_klass_subtype_slow_path_linear(sub_klass, + super_klass, + temp_reg, + temp2_reg, + L_success, + L_failure, + set_cond_codes); + } BLOCK_COMMENT("} check_klass_subtype_slow_path"); } @@ -3206,17 +3314,17 @@ do { \ } while(0) // Note: this method also kills Z_R1_scratch register on machines older than z15 -void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, - Register r_super_klass, - Register r_temp1, - Register r_temp2, - Register r_temp3, - Register r_temp4, - Register r_result, - u1 super_klass_slot) { +void MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass, + Register r_super_klass, + Register r_temp1, + Register r_temp2, + Register r_temp3, + Register r_temp4, + Register r_result, + u1 super_klass_slot) { NearLabel L_done, L_failure; - BLOCK_COMMENT("lookup_secondary_supers_table {"); + BLOCK_COMMENT("lookup_secondary_supers_table_const {"); const Register r_array_base = r_temp1, @@ -3291,7 +3399,7 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, z_lghi(r_result, 1); bind(L_done); - BLOCK_COMMENT("} lookup_secondary_supers_table"); + BLOCK_COMMENT("} lookup_secondary_supers_table_const"); if (VerifySecondarySupers) { verify_secondary_supers_table(r_sub_klass, r_super_klass, r_result, @@ -3299,6 +3407,116 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, } } +// At runtime, return 0 in result if r_super_klass is a superclass of +// r_sub_klass, otherwise return nonzero. Use this version of +// lookup_secondary_supers_table() if you don't know ahead of time +// which superclass will be searched for. Used by interpreter and +// runtime stubs. It is larger and has somewhat greater latency than +// the version above, which takes a constant super_klass_slot. +void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result) { + assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result, Z_R1_scratch); + + Label L_done, L_failure; + + BLOCK_COMMENT("lookup_secondary_supers_table_var {"); + + const Register + r_array_index = temp3, + slot = temp4, // NOTE: "slot" can't be Z_R0 otherwise z_sllg and z_rllg instructions below will mess up!!!! + r_bitmap = Z_R1_scratch; + + z_llgc(slot, Address(r_super_klass, Klass::hash_slot_offset())); + + // Initialize r_result with 0 (indicating success). If searching fails, r_result will be loaded + // with 1 (failure) at the end of this method. + clear_reg(result, true /* whole_reg */, false /* set_cc */); // result = 0 + + z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); + + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + z_xilf(slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); // slot ^ 63 === 63 - slot (mod 64) + z_sllg(r_array_index, r_bitmap, /*d2 = */ 0, /* b2 = */ slot); + + testbit(r_array_index, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1); + branch_optimized(bcondAllZero, L_failure); + + const Register + r_array_base = temp1, + r_array_length = temp2; + + // Get the first array index that can contain super_klass into r_array_index. + // NOTE: Z_R1_scratch is holding bitmap (look above for r_bitmap). So let's try to save it. + // On the other hand, r_array_base/temp1 is free at current moment (look at the load operation below). + pop_count_long(r_array_index, r_array_index, temp1); // kills r_array_base/temp1 on machines older than z15 + + // The value i in r_array_index is >= 1, so even though r_array_base + // points to the length, we don't need to adjust it to point to the data. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + // We will consult the secondary-super array. + z_lg(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + z_sllg(r_array_index, r_array_index, LogBytesPerWord); // scale, r_array_index is loaded by popcnt above + + z_cg(r_super_klass, Address(r_array_base, r_array_index)); + branch_optimized(bcondEqual, L_done); // found a match + + // Note: this is a small hack: + // + // The operation "(slot ^ 63) === 63 - slot (mod 64)" has already been performed above. + // Since we lack a rotate-right instruction, we achieve the same effect by rotating left + // by "64 - slot" positions. This produces the result equivalent to a right rotation by "slot" positions. + // + // => initial slot value + // => slot = 63 - slot // done above with that z_xilf instruction + // => slot = 64 - slot // need to do for rotating right by "slot" positions + // => slot = 64 - (63 - slot) + // => slot = slot - 63 + 64 + // => slot = slot + 1 + // + // So instead of rotating-left by 64-slot times, we can, for now, just rotate left by slot+1 and it would be fine. + + // Linear probe. Rotate the bitmap so that the next bit to test is + // in Bit 1. + z_aghi(slot, 1); // slot = slot + 1 + + z_rllg(r_bitmap, r_bitmap, /*d2=*/ 0, /*b2=*/ slot); + testbit(r_bitmap, 1); + branch_optimized(bcondAllZero, L_failure); + + // The slot we just inspected is at secondary_supers[r_array_index - 1]. + // The next slot to be inspected, by the logic we're about to call, + // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap + // have been checked. + lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, + r_bitmap, /*temp=*/ r_array_length, result, /*is_stub*/false); + + // pass whatever we got from slow path + z_bru(L_done); + + bind(L_failure); + z_lghi(result, 1); // load 1 to represent failure + + bind(L_done); + + BLOCK_COMMENT("} lookup_secondary_supers_table_var"); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, result, + temp1, temp2, temp3); + } +} + // Called by code generated by check_klass_subtype_slow_path // above. This is called when there is a collision in the hashed // lookup in the secondary supers array. @@ -3306,15 +3524,18 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl Register r_array_base, Register r_array_index, Register r_bitmap, + Register r_temp, Register r_result, - Register r_temp1) { - assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp1); + bool is_stub) { + assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, r_result, r_temp); const Register - r_array_length = r_temp1, + r_array_length = r_temp, r_sub_klass = noreg; - LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + if(is_stub) { + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + } BLOCK_COMMENT("lookup_secondary_supers_table_slow_path {"); NearLabel L_done, L_failure; @@ -3343,8 +3564,10 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl { // This is conventional linear probing, but instead of terminating // when a null entry is found in the table, we maintain a bitmap // in which a 0 indicates missing entries. - // The check above guarantees there are 0s in the bitmap, so the loop - // eventually terminates. + // As long as the bitmap is not completely full, + // array_length == popcount(bitmap). The array_length check above + // guarantees there are 0s in the bitmap, so the loop eventually + // terminates. #ifdef ASSERT // r_result is set to 0 by lookup_secondary_supers_table. @@ -3417,8 +3640,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, const Register r_one = Z_R0_scratch; z_lghi(r_one, 1); // for locgr down there, to a load result for failure - LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; - BLOCK_COMMENT("verify_secondary_supers_table {"); Label L_passed, L_failure; diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index 7806fef3ce8..91703fac994 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -694,7 +694,7 @@ class MacroAssembler: public Assembler { Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + Register super_check_offset = noreg); // The rest of the type check; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. @@ -706,25 +706,62 @@ class MacroAssembler: public Assembler { Register Rarray_ptr, // tmp Register Rlength, // tmp Label* L_success, - Label* L_failure); + Label* L_failure, + bool set_cond_codes = false); + + void check_klass_subtype_slow_path_linear(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + void check_klass_subtype_slow_path_table(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register temp4_reg, + Register result_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // If r is valid, return r. + // If r is invalid, remove a register r2 from available_regs, add r2 + // to regs_to_push, then return r2. + Register allocate_if_noreg(const Register r, + RegSetIterator &available_regs, + RegSet ®s_to_push); void repne_scan(Register r_addr, Register r_value, Register r_count, Register r_scratch); - void lookup_secondary_supers_table(Register r_sub_klass, - Register r_super_klass, - Register r_temp1, - Register r_temp2, - Register r_temp3, - Register r_temp4, - Register r_result, - u1 super_klass_slot); + // Secondary subtype checking + void lookup_secondary_supers_table_var(Register sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result); + + void lookup_secondary_supers_table_const(Register r_sub_klass, + Register r_super_klass, + Register r_temp1, + Register r_temp2, + Register r_temp3, + Register r_temp4, + Register r_result, + u1 super_klass_slot); void lookup_secondary_supers_table_slow_path(Register r_super_klass, Register r_array_base, Register r_array_index, Register r_bitmap, + Register r_temp, Register r_result, - Register r_temp1); + bool is_stub); void verify_secondary_supers_table(Register r_sub_klass, Register r_super_klass, diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index e1a98139992..0f1d98d54b9 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -9979,8 +9979,9 @@ instruct ShouldNotReachHere() %{ instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, flagsReg pcc, rarg4RegP scratch1, rarg5RegP scratch2) %{ match(Set index (PartialSubtypeCheck sub super)); + predicate(!UseSecondarySupersTable); effect(KILL pcc, KILL scratch1, KILL scratch2); - ins_cost(10 * DEFAULT_COST); + ins_cost(20 * DEFAULT_COST); // slightly larger than the next version // TODO: s390 port size(FIXED_SIZE); format %{ " CALL PartialSubtypeCheck\n" %} ins_encode %{ @@ -9991,21 +9992,45 @@ instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, fl ins_pipe(pipe_class_dummy); %} +// Two versions of partialSubtypeCheck, both used when we need to +// search for a super class in the secondary supers array. The first +// is used when we don't know _a priori_ the class being searched +// for. The second, far more common, is used when we do know: this is +// used for instanceof, checkcast, and any case where C2 can determine +// it by constant propagation. +instruct partialSubtypeCheckVarSuper(rarg2RegP sub, rarg3RegP super, + r11TempRegP result, + rarg1RegP temp1, rarg4RegP temp2, rarg5RegP temp3, r10TempRegP temp4, + flagsReg pcc) %{ + match(Set result (PartialSubtypeCheck sub super)); + predicate(UseSecondarySupersTable); + effect(KILL pcc, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4); + ins_cost(10 * DEFAULT_COST); // slightly larger than the next version + format %{ "partialSubtypeCheck $result, $sub, $super" %} + ins_encode %{ + __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, + $temp1$$Register, $temp2$$Register, $temp3$$Register, $temp4$$Register, + $result$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + + instruct partialSubtypeCheckConstSuper(rarg2RegP sub, rarg1RegP super, immP super_con, r11TempRegP result, rarg5RegP temp1, rarg4RegP temp2, rarg3RegP temp3, r10TempRegP temp4, flagsReg pcc) %{ match(Set result (PartialSubtypeCheck sub (Binary super super_con))); predicate(UseSecondarySupersTable); effect(KILL pcc, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4); - ins_cost(7 * DEFAULT_COST); // needs to be less than competing nodes + ins_cost(5 * DEFAULT_COST); // smaller than the next version format %{ "partialSubtypeCheck $result, $sub, $super, $super_con" %} ins_encode %{ u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); if (InlineSecondarySupersTest) { - __ lookup_secondary_supers_table($sub$$Register, $super$$Register, - $temp1$$Register, $temp2$$Register, $temp3$$Register, - $temp4$$Register, $result$$Register, super_klass_slot); + __ lookup_secondary_supers_table_const($sub$$Register, $super$$Register, + $temp1$$Register, $temp2$$Register, $temp3$$Register, + $temp4$$Register, $result$$Register, super_klass_slot); } else { AddressLiteral stub_address(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)); __ load_const_optimized(Z_ARG4, stub_address); @@ -10017,21 +10042,6 @@ instruct partialSubtypeCheckConstSuper(rarg2RegP sub, rarg1RegP super, immP supe ins_pipe(pipe_class_dummy); %} -instruct partialSubtypeCheck_vs_zero(flagsReg pcc, rarg2RegP sub, rarg3RegP super, immP0 zero, - rarg1RegP index, rarg4RegP scratch1, rarg5RegP scratch2) %{ - match(Set pcc (CmpI (PartialSubtypeCheck sub super) zero)); - effect(KILL scratch1, KILL scratch2, KILL index); - ins_cost(10 * DEFAULT_COST); - // TODO: s390 port size(FIXED_SIZE); - format %{ "CALL PartialSubtypeCheck_vs_zero\n" %} - ins_encode %{ - AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check()); - __ load_const_optimized(Z_ARG4, stub_address); - __ z_basr(Z_R14, Z_ARG4); - %} - ins_pipe(pipe_class_dummy); -%} - // ============================================================================ // inlined locking and unlocking diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index dd9ed4c9546..9e33cd3abe8 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -635,9 +635,9 @@ class StubGenerator: public StubCodeGenerator { r_result = Z_R11; address start = __ pc(); - __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, - r_array_base, r_array_length, r_array_index, - r_bitmap, r_result, super_klass_index); + __ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass, + r_array_base, r_array_length, r_array_index, + r_bitmap, r_result, super_klass_index); __ z_br(Z_R14); @@ -659,7 +659,7 @@ class StubGenerator: public StubCodeGenerator { r_result = Z_R11; __ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, - r_array_index, r_bitmap, r_result, r_temp1); + r_array_index, r_bitmap, r_temp1, r_result, /* is_stub */ true); __ z_br(Z_R14);