8195112: x86 (32 bit): implementation for Thread-local handshakes

Reviewed-by: goetz, rehn
This commit is contained in:
Martin Doerr 2018-02-20 16:10:21 +01:00
parent b085ebe7b7
commit 276e1da447
14 changed files with 178 additions and 88 deletions

View File

@ -529,12 +529,16 @@ void LIR_Assembler::return_op(LIR_Opr result) {
if (SafepointMechanism::uses_thread_local_poll()) {
#ifdef _LP64
__ movptr(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
__ relocate(relocInfo::poll_return_type);
__ testl(rax, Address(rscratch1, 0));
const Register poll_addr = rscratch1;
__ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset()));
#else
ShouldNotReachHere();
const Register poll_addr = rbx;
assert(FrameMap::is_caller_save_register(poll_addr), "will overwrite");
__ get_thread(poll_addr);
__ movptr(poll_addr, Address(poll_addr, Thread::polling_page_offset()));
#endif
__ relocate(relocInfo::poll_return_type);
__ testl(rax, Address(poll_addr, 0));
} else {
AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
@ -555,16 +559,20 @@ int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
int offset = __ offset();
if (SafepointMechanism::uses_thread_local_poll()) {
#ifdef _LP64
__ movptr(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
const Register poll_addr = rscratch1;
__ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset()));
#else
assert(tmp->is_cpu_register(), "needed");
const Register poll_addr = tmp->as_register();
__ get_thread(poll_addr);
__ movptr(poll_addr, Address(poll_addr, in_bytes(Thread::polling_page_offset())));
#endif
add_debug_info_for_branch(info);
__ relocate(relocInfo::poll_type);
address pre_pc = __ pc();
__ testl(rax, Address(rscratch1, 0));
__ testl(rax, Address(poll_addr, 0));
address post_pc = __ pc();
guarantee(pointer_delta(post_pc, pre_pc, 1) == 3, "must be exact length");
#else
ShouldNotReachHere();
#endif
guarantee(pointer_delta(post_pc, pre_pc, 1) == 2 LP64_ONLY(+1), "must be exact length");
} else {
AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
if (Assembler::is_polling_page_far()) {

View File

@ -143,6 +143,7 @@ bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
LIR_Opr LIRGenerator::safepoint_poll_register() {
NOT_LP64( if (SafepointMechanism::uses_thread_local_poll()) { return new_register(T_ADDRESS); } )
return LIR_OprFact::illegalOpr;
}
@ -1515,7 +1516,7 @@ void LIRGenerator::do_If(If* x) {
if (x->is_safepoint()) {
// increment backedge counter if needed
increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
__ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
__ safepoint(safepoint_poll_register(), state_for(x, x->state_before()));
}
set_no_result(x);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -65,9 +65,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORT_RESERVED_STACK_AREA
#endif
#ifdef _LP64
// X64 have implemented the local polling
#define THREAD_LOCAL_POLL
#endif
#endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -97,9 +97,10 @@ define_pd_global(bool, PreserveFramePointer, false);
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
#ifdef _LP64
#if defined(_LP64) || defined(_WINDOWS)
define_pd_global(bool, ThreadLocalHandshakes, true);
#else
// get_thread() is slow on linux 32 bit, therefore off by default
define_pd_global(bool, ThreadLocalHandshakes, false);
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -830,13 +830,12 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
if (verifyoop) {
verify_oop(rax, state);
}
#ifdef _LP64
Label no_safepoint, dispatch;
address* const safepoint_table = Interpreter::safept_table(state);
#ifdef _LP64
Label no_safepoint, dispatch;
if (SafepointMechanism::uses_thread_local_poll() && table != safepoint_table && generate_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
jccb(Assembler::zero, no_safepoint);
@ -851,9 +850,23 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
#else
Address index(noreg, rbx, Address::times_ptr);
ExternalAddress tbl((address)table);
ArrayAddress dispatch(tbl, index);
jump(dispatch);
if (SafepointMechanism::uses_thread_local_poll() && table != safepoint_table && generate_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
Label no_safepoint;
const Register thread = rcx;
get_thread(thread);
testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
jccb(Assembler::zero, no_safepoint);
ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index);
jump(dispatch_addr);
bind(no_safepoint);
}
{
ArrayAddress dispatch_addr(ExternalAddress((address)table), index);
jump(dispatch_addr);
}
#endif // _LP64
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -3767,10 +3767,17 @@ void MacroAssembler::serialize_memory(Register thread, Register tmp) {
movl(as_Address(ArrayAddress(page, index)), tmp);
}
#ifdef _LP64
void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) {
if (SafepointMechanism::uses_thread_local_poll()) {
testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
#ifdef _LP64
assert(thread_reg == r15_thread, "should be");
#else
if (thread_reg == noreg) {
thread_reg = temp_reg;
get_thread(thread_reg);
}
#endif
testb(Address(thread_reg, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
} else {
cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
@ -3778,13 +3785,6 @@ void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Regis
jcc(Assembler::notEqual, slow_path);
}
}
#else
void MacroAssembler::safepoint_poll(Label& slow_path) {
cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
SafepointSynchronize::_not_synchronized);
jcc(Assembler::notEqual, slow_path);
}
#endif
// Calls to C land
//

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -656,11 +656,9 @@ class MacroAssembler: public Assembler {
// Support for serializing memory accesses between threads
void serialize_memory(Register thread, Register tmp);
#ifdef _LP64
// If thread_reg is != noreg the code assumes the register passed contains
// the thread (required on 64 bit).
void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg);
#else
void safepoint_poll(Label& slow_path);
#endif
void verify_tlab();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -704,14 +704,18 @@ inline bool NativeInstruction::is_far_jump() { return is_mov_literal64(); }
inline bool NativeInstruction::is_cond_jump() { return (int_at(0) & 0xF0FF) == 0x800F /* long jump */ ||
(ubyte_at(0) & 0xF0) == 0x70; /* short jump */ }
inline bool NativeInstruction::is_safepoint_poll() {
#ifdef AMD64
if (SafepointMechanism::uses_thread_local_poll()) {
#ifdef AMD64
const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix;
const int test_offset = has_rex_prefix ? 1 : 0;
#else
const int test_offset = 0;
#endif
const bool is_test_opcode = ubyte_at(test_offset) == NativeTstRegMem::instruction_code_memXregl;
const bool is_rax_target = (ubyte_at(test_offset + 1) & NativeTstRegMem::modrm_mask) == NativeTstRegMem::modrm_reg;
return is_test_opcode && is_rax_target;
}
#ifdef AMD64
// Try decoding a near safepoint first:
if (ubyte_at(0) == NativeTstRegMem::instruction_code_memXregl &&
ubyte_at(1) == 0x05) { // 00 rax 101

View File

@ -2111,16 +2111,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
Label after_transition;
// check for safepoint operation in progress and/or pending suspend requests
{ Label Continue;
{ Label Continue, slow_path;
__ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
SafepointSynchronize::_not_synchronized);
__ safepoint_poll(slow_path, thread, noreg);
Label L;
__ jcc(Assembler::notEqual, L);
__ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
__ jcc(Assembler::equal, Continue);
__ bind(L);
__ bind(slow_path);
// Don't use call_VM as it will see a possible pending exception and forward it
// and never return here preventing us from clearing _last_native_pc down below.
@ -2996,8 +2993,11 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
// if this was not a poll_return then we need to correct the return address now.
if (!cause_return) {
__ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
__ movptr(Address(rbp, wordSize), rax);
// Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
// Additionally, rbx is a callee saved register and we can look at it later to determine
// if someone changed the return address for us!
__ movptr(rbx, Address(java_thread, JavaThread::saved_exception_pc_offset()));
__ movptr(Address(rbp, wordSize), rbx);
}
// do the call
@ -3029,11 +3029,63 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ bind(noException);
Label no_adjust, bail, not_special;
if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
// If our stashed return pc was modified by the runtime we avoid touching it
__ cmpptr(rbx, Address(rbp, wordSize));
__ jccb(Assembler::notEqual, no_adjust);
// Skip over the poll instruction.
// See NativeInstruction::is_safepoint_poll()
// Possible encodings:
// 85 00 test %eax,(%rax)
// 85 01 test %eax,(%rcx)
// 85 02 test %eax,(%rdx)
// 85 03 test %eax,(%rbx)
// 85 06 test %eax,(%rsi)
// 85 07 test %eax,(%rdi)
//
// 85 04 24 test %eax,(%rsp)
// 85 45 00 test %eax,0x0(%rbp)
#ifdef ASSERT
__ movptr(rax, rbx); // remember where 0x85 should be, for verification below
#endif
// rsp/rbp base encoding takes 3 bytes with the following register values:
// rsp 0x04
// rbp 0x05
__ movzbl(rcx, Address(rbx, 1));
__ andptr(rcx, 0x07); // looking for 0x04 .. 0x05
__ subptr(rcx, 4); // looking for 0x00 .. 0x01
__ cmpptr(rcx, 1);
__ jcc(Assembler::above, not_special);
__ addptr(rbx, 1);
__ bind(not_special);
#ifdef ASSERT
// Verify the correct encoding of the poll we're about to skip.
__ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl);
__ jcc(Assembler::notEqual, bail);
// Mask out the modrm bits
__ testb(Address(rax, 1), NativeTstRegMem::modrm_mask);
// rax encodes to 0, so if the bits are nonzero it's incorrect
__ jcc(Assembler::notZero, bail);
#endif
// Adjust return pc forward to step over the safepoint poll instruction
__ addptr(rbx, 2);
__ movptr(Address(rbp, wordSize), rbx);
}
__ bind(no_adjust);
// Normal exit, register restoring and exit
RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(0);
#ifdef ASSERT
__ bind(bail);
__ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
#endif
// make sure all code is generated
masm->flush();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -1148,7 +1148,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
Label slow_path;
#ifndef _LP64
__ safepoint_poll(slow_path);
__ safepoint_poll(slow_path, thread, noreg);
#else
__ safepoint_poll(slow_path, r15_thread, rscratch1);
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -61,10 +61,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
ExternalAddress state(SafepointSynchronize::address_of_state());
__ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
SafepointSynchronize::_not_synchronized);
__ jcc(Assembler::notEqual, slow_path);
__ safepoint_poll(slow_path, noreg, rdi);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@ -113,10 +110,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
ExternalAddress state(SafepointSynchronize::address_of_state());
__ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
SafepointSynchronize::_not_synchronized);
__ jcc(Assembler::notEqual, slow_path);
__ safepoint_poll(slow_path, noreg, rdi);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -2692,11 +2692,16 @@ void TemplateTable::_return(TosState state) {
__ bind(skip_register_finalizer);
}
#ifdef _LP64
if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
#ifdef _LP64
__ testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
#else
const Register thread = rdi;
__ get_thread(thread);
__ testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
#endif
__ jcc(Assembler::zero, no_safepoint);
__ push(state);
__ call_VM(noreg, CAST_FROM_FN_PTR(address,
@ -2704,7 +2709,6 @@ void TemplateTable::_return(TosState state) {
__ pop(state);
__ bind(no_safepoint);
}
#endif
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry

View File

@ -317,7 +317,7 @@ int MachCallRuntimeNode::ret_addr_offset() {
// Indicate if the safepoint node needs the polling page as an input.
// Since x86 does have absolute addressing, it doesn't.
bool SafePointNode::needs_polling_address_input() {
return false;
return SafepointMechanism::uses_thread_local_poll();
}
//
@ -706,34 +706,25 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
}
if (do_polling() && C->is_method_compilation()) {
cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
emit_opcode(cbuf,0x85);
emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
emit_d32(cbuf, (intptr_t)os::get_polling_page());
if (SafepointMechanism::uses_thread_local_poll()) {
Register pollReg = as_Register(EBX_enc);
MacroAssembler masm(&cbuf);
masm.get_thread(pollReg);
masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
masm.relocate(relocInfo::poll_return_type);
masm.testl(rax, Address(pollReg, 0));
} else {
cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
emit_opcode(cbuf,0x85);
emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
emit_d32(cbuf, (intptr_t)os::get_polling_page());
}
}
}
uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
Compile *C = ra_->C;
// If method set FPU control word, restore to standard control word
int size = C->in_24_bit_fp_mode() ? 6 : 0;
if (C->max_vector_size() > 16) size += 3; // vzeroupper
if (do_polling() && C->is_method_compilation()) size += 6;
int framesize = C->frame_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove two words for return addr and rbp,
framesize -= 2*wordSize;
size++; // popl rbp,
if (framesize >= 128) {
size += 6;
} else {
size += framesize ? 3 : 0;
}
size += 64; // added to support ReservedStackAccess
return size;
return MachNode::size(ra_); // too many variables; just compute it
// the hard way
}
int MachEpilogNode::reloc() const {
@ -13336,6 +13327,7 @@ instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
// ============================================================================
// Safepoint Instruction
instruct safePoint_poll(eFlagsReg cr) %{
predicate(SafepointMechanism::uses_global_page_poll());
match(SafePoint);
effect(KILL cr);
@ -13354,6 +13346,25 @@ instruct safePoint_poll(eFlagsReg cr) %{
ins_pipe( ialu_reg_mem );
%}
instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
predicate(SafepointMechanism::uses_thread_local_poll());
match(SafePoint poll);
effect(KILL cr, USE poll);
format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %}
ins_cost(125);
// EBP would need size(3)
size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
ins_encode %{
__ relocate(relocInfo::poll_type);
address pre_pc = __ pc();
__ testl(rax, Address($poll$$Register, 0));
address post_pc = __ pc();
guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
%}
ins_pipe(ialu_reg_mem);
%}
// ============================================================================
// This name is KNOWN by the ADLC and cannot be changed.

View File

@ -1826,6 +1826,13 @@ jint Arguments::set_ergonomics_flags() {
}
#endif
#if defined(IA32)
// Only server compiler can optimize safepoints well enough.
if (!is_server_compilation_mode_vm()) {
FLAG_SET_ERGO_IF_DEFAULT(bool, ThreadLocalHandshakes, false);
}
#endif
set_conservative_max_heap_alignment();
#ifndef ZERO