From 8cdd95e8a2a7814ab7983fb3f41e6fa5793d410f Mon Sep 17 00:00:00 2001 From: Boris Ulasevich Date: Wed, 7 Jun 2023 11:27:52 +0000 Subject: [PATCH] 8305959: x86: Improve itable_stub Reviewed-by: phh, shade, aph --- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 119 ++++++++++++++++++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 10 ++ src/hotspot/cpu/x86/vtableStubs_x86_32.cpp | 43 +++---- src/hotspot/cpu/x86/vtableStubs_x86_64.cpp | 35 +++--- .../bench/vm/compiler/InterfaceCalls.java | 109 ++++++++++++++++ 5 files changed, 271 insertions(+), 45 deletions(-) diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 6dc40a48dc2..66f3ebe7b28 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -4300,6 +4300,125 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, } } +// Look up the method for a megamorphic invokeinterface call in a single pass over itable: +// - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICHolder +// - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method_stub(Register recv_klass, + Register holder_klass, + Register resolved_klass, + Register method_result, + Register scan_temp, + Register temp_reg2, + Register receiver, + int itable_index, + Label& L_no_such_interface) { + assert_different_registers(recv_klass, method_result, holder_klass, resolved_klass, scan_temp, temp_reg2, receiver); + Register temp_itbl_klass = method_result; + Register temp_reg = (temp_reg2 == noreg ? recv_klass : temp_reg2); // reuse recv_klass register on 32-bit x86 impl + + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int itentry_off = in_bytes(itableMethodEntry::method_offset()); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size_in_bytes(); + int ioffset = in_bytes(itableOffsetEntry::interface_offset()); + int ooffset = in_bytes(itableOffsetEntry::offset_offset()); + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "adjust times_vte_scale"); + + Label L_loop_scan_resolved_entry, L_resolved_found, L_holder_found; + + // temp_itbl_klass = recv_klass.itable[0] + // scan_temp = &recv_klass.itable[0] + step + movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); + movptr(temp_itbl_klass, Address(recv_klass, scan_temp, times_vte_scale, vtable_base + ioffset)); + lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base + ioffset + scan_step)); + xorptr(temp_reg, temp_reg); + + // Initial checks: + // - if (holder_klass != resolved_klass), go to "scan for resolved" + // - if (itable[0] == 0), no such interface + // - if (itable[0] == holder_klass), shortcut to "holder found" + cmpptr(holder_klass, resolved_klass); + jccb(Assembler::notEqual, L_loop_scan_resolved_entry); + testptr(temp_itbl_klass, temp_itbl_klass); + jccb(Assembler::zero, L_no_such_interface); + cmpptr(holder_klass, temp_itbl_klass); + jccb(Assembler::equal, L_holder_found); + + // Loop: Look for holder_klass record in itable + // do { + // tmp = itable[index]; + // index += step; + // if (tmp == holder_klass) { + // goto L_holder_found; // Found! + // } + // } while (tmp != 0); + // goto L_no_such_interface // Not found. + Label L_scan_holder; + bind(L_scan_holder); + movptr(temp_itbl_klass, Address(scan_temp, 0)); + addptr(scan_temp, scan_step); + cmpptr(holder_klass, temp_itbl_klass); + jccb(Assembler::equal, L_holder_found); + testptr(temp_itbl_klass, temp_itbl_klass); + jccb(Assembler::notZero, L_scan_holder); + + jmpb(L_no_such_interface); + + // Loop: Look for resolved_class record in itable + // do { + // tmp = itable[index]; + // index += step; + // if (tmp == holder_klass) { + // // Also check if we have met a holder klass + // holder_tmp = itable[index-step-ioffset]; + // } + // if (tmp == resolved_klass) { + // goto L_resolved_found; // Found! + // } + // } while (tmp != 0); + // goto L_no_such_interface // Not found. + // + Label L_loop_scan_resolved; + bind(L_loop_scan_resolved); + movptr(temp_itbl_klass, Address(scan_temp, 0)); + addptr(scan_temp, scan_step); + bind(L_loop_scan_resolved_entry); + cmpptr(holder_klass, temp_itbl_klass); + cmovl(Assembler::equal, temp_reg, Address(scan_temp, ooffset - ioffset - scan_step)); + cmpptr(resolved_klass, temp_itbl_klass); + jccb(Assembler::equal, L_resolved_found); + testptr(temp_itbl_klass, temp_itbl_klass); + jccb(Assembler::notZero, L_loop_scan_resolved); + + jmpb(L_no_such_interface); + + Label L_ready; + + // See if we already have a holder klass. If not, go and scan for it. + bind(L_resolved_found); + testptr(temp_reg, temp_reg); + jccb(Assembler::zero, L_scan_holder); + jmpb(L_ready); + + bind(L_holder_found); + movl(temp_reg, Address(scan_temp, ooffset - ioffset - scan_step)); + + // Finally, temp_reg contains holder_klass vtable offset + bind(L_ready); + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + if (temp_reg2 == noreg) { // recv_klass register is clobbered for 32-bit x86 impl + load_klass(scan_temp, receiver, noreg); + movptr(method_result, Address(scan_temp, temp_reg, Address::times_1, itable_index * wordSize + itentry_off)); + } else { + movptr(method_result, Address(recv_klass, temp_reg, Address::times_1, itable_index * wordSize + itentry_off)); + } +} + // virtual method calling void MacroAssembler::lookup_virtual_method(Register recv_klass, diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 3df81129a55..e4c4b0f10b6 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -603,6 +603,16 @@ public: Label& no_such_interface, bool return_method = true); + void lookup_interface_method_stub(Register recv_klass, + Register holder_klass, + Register resolved_klass, + Register method_result, + Register scan_temp, + Register temp_reg2, + Register receiver, + int itable_index, + Label& L_no_such_interface); + // virtual method calling void lookup_virtual_method(Register recv_klass, RegisterOrConstant vtable_index, diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp index 67944ae716f..0e78e0274d7 100644 --- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp +++ b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp @@ -179,14 +179,16 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // rax: CompiledICHolder // rcx: Receiver - // Most registers are in use; we'll use rax, rbx, rsi, rdi + // Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi // (If we need to make rsi, rdi callee-save, do a push/pop here.) const Register recv_klass_reg = rsi; const Register holder_klass_reg = rax; // declaring interface klass (DECC) - const Register resolved_klass_reg = rbx; // resolved interface klass (REFC) - const Register temp_reg = rdi; + const Register resolved_klass_reg = rdi; // resolved interface klass (REFC) + const Register temp_reg = rdx; + const Register method = rbx; + const Register icholder_reg = rax; + const Register receiver = rcx; - const Register icholder_reg = rax; __ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); __ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); @@ -198,35 +200,26 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { __ load_klass(recv_klass_reg, rcx, noreg); start_pc = __ pc(); + __ push(rdx); // temp_reg // Receiver subtype check against REFC. - // Destroys recv_klass_reg value. - __ lookup_interface_method(// inputs: rec. class, interface - recv_klass_reg, resolved_klass_reg, noreg, - // outputs: scan temp. reg1, scan temp. reg2 - recv_klass_reg, temp_reg, - L_no_such_interface, - /*return_method=*/false); - - const ptrdiff_t typecheckSize = __ pc() - start_pc; - start_pc = __ pc(); - // Get selected method from declaring class and itable index - const Register method = rbx; - __ load_klass(recv_klass_reg, rcx, noreg); // restore recv_klass_reg - __ lookup_interface_method(// inputs: rec. class, interface, itable index - recv_klass_reg, holder_klass_reg, itable_index, - // outputs: method, scan temp. reg - method, temp_reg, - L_no_such_interface); - + __ lookup_interface_method_stub(recv_klass_reg, // input + holder_klass_reg, // input + resolved_klass_reg, // input + method, // output + temp_reg, + noreg, + receiver, // input (x86_32 only: to restore recv_klass value) + itable_index, + L_no_such_interface); const ptrdiff_t lookupSize = __ pc() - start_pc; // We expect we need index_dependent_slop extra bytes. Reason: // The emitted code in lookup_interface_method changes when itable_index exceeds 31. // For windows, a narrow estimate was found to be 104. Other OSes not tested. const ptrdiff_t estimate = 104; - const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop; + const ptrdiff_t codesize = lookupSize + index_dependent_slop; slop_delta = (int)(estimate - codesize); slop_bytes += slop_delta; assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); @@ -246,6 +239,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { } #endif // ASSERT + __ pop(rdx); address ame_addr = __ pc(); __ jmp(Address(method, Method::from_compiled_offset())); @@ -255,6 +249,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // We force resolving of the call site by jumping to the "handle // wrong method" stub, and so let the interpreter runtime do all the // dirty work. + __ pop(rdx); __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); masm->flush(); diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp index dc4bb9f204e..f162a651183 100644 --- a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp +++ b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp @@ -175,10 +175,12 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them) const Register recv_klass_reg = r10; const Register holder_klass_reg = rax; // declaring interface klass (DECC) - const Register resolved_klass_reg = rbx; // resolved interface klass (REFC) + const Register resolved_klass_reg = r14; // resolved interface klass (REFC) const Register temp_reg = r11; + const Register temp_reg2 = r13; + const Register method = rbx; + const Register icholder_reg = rax; - const Register icholder_reg = rax; __ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); __ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); @@ -192,25 +194,16 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { start_pc = __ pc(); // Receiver subtype check against REFC. - // Destroys recv_klass_reg value. - __ lookup_interface_method(// inputs: rec. class, interface - recv_klass_reg, resolved_klass_reg, noreg, - // outputs: scan temp. reg1, scan temp. reg2 - recv_klass_reg, temp_reg, - L_no_such_interface, - /*return_method=*/false); - - const ptrdiff_t typecheckSize = __ pc() - start_pc; - start_pc = __ pc(); - // Get selected method from declaring class and itable index - const Register method = rbx; - __ load_klass(recv_klass_reg, j_rarg0, temp_reg); // restore recv_klass_reg - __ lookup_interface_method(// inputs: rec. class, interface, itable index - recv_klass_reg, holder_klass_reg, itable_index, - // outputs: method, scan temp. reg - method, temp_reg, - L_no_such_interface); + __ lookup_interface_method_stub(recv_klass_reg, // input + holder_klass_reg, // input + resolved_klass_reg, // input + method, // output + temp_reg, + temp_reg2, + noreg, + itable_index, + L_no_such_interface); const ptrdiff_t lookupSize = __ pc() - start_pc; @@ -218,7 +211,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // The emitted code in lookup_interface_method changes when itable_index exceeds 15. // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130). const ptrdiff_t estimate = 136; - const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop; + const ptrdiff_t codesize = lookupSize + index_dependent_slop; slop_delta = (int)(estimate - codesize); slop_bytes += slop_delta; assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); diff --git a/test/micro/org/openjdk/bench/vm/compiler/InterfaceCalls.java b/test/micro/org/openjdk/bench/vm/compiler/InterfaceCalls.java index e313b948c9f..30ee9dc53b7 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/InterfaceCalls.java +++ b/test/micro/org/openjdk/bench/vm/compiler/InterfaceCalls.java @@ -24,6 +24,7 @@ package org.openjdk.bench.vm.compiler; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.CompilerControl; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; @@ -52,6 +53,14 @@ public class InterfaceCalls { public int getIntSecond(); } + interface FirstInterfaceExt extends FirstInterface { + default int getIntFirst() { return 44; } + } + + interface FirstInterfaceExtExt extends FirstInterfaceExt { + default int getIntFirst() { return 45; } + } + class FirstClass implements FirstInterface, SecondInterface { public int getIntFirst() { return 1; @@ -102,8 +111,80 @@ public class InterfaceCalls { } } + class FirstClassDontInline implements FirstInterface { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -1; + } + } + + class SecondClassDontInline implements FirstInterface { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -2; + } + } + + class ThirdClassDontInline implements FirstInterface { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -3; + } + } + + class FourthClassDontInline implements FirstInterface { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -4; + } + } + + class FifthClassDontInline implements FirstInterface { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -5; + } + } + + class FirstClassDontInlineExtExt implements FirstInterfaceExtExt { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -1; + } + } + + class SecondClassDontInlineExtExt implements FirstInterfaceExtExt { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -2; + } + } + + class ThirdClassDontInlineExtExt implements FirstInterfaceExtExt { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -3; + } + } + + class FourthClassDontInlineExtExt implements FirstInterfaceExtExt { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -4; + } + } + + class FifthClassDontInlineExtExt implements FirstInterfaceExtExt { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + public int getIntFirst() { + return -5; + } + } + final int asLength = 5; public FirstInterface[] as = new FirstInterface[asLength]; + public FirstInterface[] noninlined = new FirstInterface[asLength]; + public FirstInterfaceExtExt[] noninlinedextext = new FirstInterfaceExtExt[asLength]; @Setup @@ -113,6 +194,18 @@ public class InterfaceCalls { as[2] = new ThirdClass(); as[3] = new FourthClass(); as[4] = new FifthClass(); + + noninlined[0] = new FirstClassDontInline(); + noninlined[1] = new SecondClassDontInline(); + noninlined[2] = new ThirdClassDontInline(); + noninlined[3] = new FourthClassDontInline(); + noninlined[4] = new FifthClassDontInline(); + + noninlinedextext[0] = new FirstClassDontInlineExtExt(); + noninlinedextext[1] = new SecondClassDontInlineExtExt(); + noninlinedextext[2] = new ThirdClassDontInlineExtExt(); + noninlinedextext[3] = new FourthClassDontInlineExtExt(); + noninlinedextext[4] = new FifthClassDontInlineExtExt(); } /** @@ -126,6 +219,22 @@ public class InterfaceCalls { int l = 0; + /** Tests single base interface method call */ + @Benchmark + public int testIfaceCall(Blackhole bh) { + FirstInterface ai = noninlined[l]; + l = ++ l % asLength; + return ai.getIntFirst(); + } + + /** Tests extended interface method call */ + @Benchmark + public int testIfaceExtCall(Blackhole bh) { + FirstInterfaceExtExt ai = noninlinedextext[l]; + l = ++ l % asLength; + return ai.getIntFirst(); + } + /** * Interface call address computation within loop but the receiver preexists * the loop and the ac can be moved outside of the loop