8146709: AArch64: Incorrect use of ADRP for byte_map_base

Reviewed-by: roland
This commit is contained in:
Andrew Haley 2016-01-19 17:52:52 +00:00
parent b6658d2b5f
commit 6b826df140
5 changed files with 87 additions and 39 deletions

View File

@ -4442,11 +4442,7 @@ encode %{
enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
MacroAssembler _masm(&cbuf);
address page = (address)$src$$constant;
Register dst_reg = as_Register($dst$$reg);
unsigned long off;
__ adrp(dst_reg, ExternalAddress(page), off);
assert(off == 0, "assumed offset == 0");
__ load_byte_map_base($dst$$Register);
%}
enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{

View File

@ -1150,9 +1150,6 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
#if INCLUDE_ALL_GCS
// Registers to be saved around calls to g1_wb_pre or g1_wb_post
#define G1_SAVE_REGS (RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2))
case g1_pre_barrier_slow_id:
{
StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
@ -1194,10 +1191,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ b(done);
__ bind(runtime);
__ push(G1_SAVE_REGS, sp);
__ push_call_clobbered_registers();
f.load_argument(0, pre_val);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
__ pop(G1_SAVE_REGS, sp);
__ pop_call_clobbered_registers();
__ bind(done);
}
break;
@ -1225,45 +1222,49 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
DirtyCardQueue::byte_offset_of_buf()));
const Register card_addr = rscratch2;
ExternalAddress cardtable((address) ct->byte_map_base);
const Register card_offset = rscratch2;
// LR is free here, so we can use it to hold the byte_map_base.
const Register byte_map_base = lr;
f.load_argument(0, card_addr);
__ lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
unsigned long offset;
__ adrp(rscratch1, cardtable, offset);
__ add(card_addr, card_addr, rscratch1);
__ ldrb(rscratch1, Address(card_addr, offset));
assert_different_registers(card_offset, byte_map_base, rscratch1);
f.load_argument(0, card_offset);
__ lsr(card_offset, card_offset, CardTableModRefBS::card_shift);
__ load_byte_map_base(byte_map_base);
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
__ br(Assembler::EQ, done);
assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
__ membar(Assembler::StoreLoad);
__ ldrb(rscratch1, Address(card_addr, offset));
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cbzw(rscratch1, done);
// storing region crossing non-NULL, card is clean.
// dirty card and log.
__ strb(zr, Address(card_addr, offset));
__ strb(zr, Address(byte_map_base, card_offset));
// Convert card offset into an address in card_addr
Register card_addr = card_offset;
__ add(card_addr, byte_map_base, card_addr);
__ ldr(rscratch1, queue_index);
__ cbz(rscratch1, runtime);
__ sub(rscratch1, rscratch1, wordSize);
__ str(rscratch1, queue_index);
const Register buffer_addr = r0;
// Reuse LR to hold buffer_addr
const Register buffer_addr = lr;
__ push(RegSet::of(r0, r1), sp);
__ ldr(buffer_addr, buffer);
__ str(card_addr, Address(buffer_addr, rscratch1));
__ pop(RegSet::of(r0, r1), sp);
__ b(done);
__ bind(runtime);
__ push(G1_SAVE_REGS, sp);
__ push_call_clobbered_registers();
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
__ pop(G1_SAVE_REGS, sp);
__ pop_call_clobbered_registers();
__ bind(done);
}

View File

@ -2301,6 +2301,30 @@ void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_t
}
#endif
void MacroAssembler::push_call_clobbered_registers() {
push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
// Push v0-v7, v16-v31.
for (int i = 30; i >= 0; i -= 2) {
if (i <= v7->encoding() || i >= v16->encoding()) {
stpd(as_FloatRegister(i), as_FloatRegister(i+1),
Address(pre(sp, -2 * wordSize)));
}
}
}
void MacroAssembler::pop_call_clobbered_registers() {
for (int i = 0; i < 32; i += 2) {
if (i <= v7->encoding() || i >= v16->encoding()) {
ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
Address(post(sp, 2 * wordSize)));
}
}
pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
}
void MacroAssembler::push_CPU_state(bool save_vectors) {
push(0x3fffffff, sp); // integer registers except lr & sp
@ -3099,12 +3123,7 @@ void MacroAssembler::store_check(Register obj) {
assert(CardTableModRefBS::dirty_card_val() == 0, "must be");
{
ExternalAddress cardtable((address) ct->byte_map_base);
unsigned long offset;
adrp(rscratch1, cardtable, offset);
assert(offset == 0, "byte_map_base is misaligned");
}
load_byte_map_base(rscratch1);
if (UseCondCardMark) {
Label L_already_dirty;
@ -3596,12 +3615,10 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr,
lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
unsigned long offset;
adrp(tmp2, cardtable, offset);
// get the address of the card
load_byte_map_base(tmp2);
add(card_addr, card_addr, tmp2);
ldrb(tmp2, Address(card_addr, offset));
ldrb(tmp2, Address(card_addr));
cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
br(Assembler::EQ, done);
@ -3609,13 +3626,13 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr,
membar(Assembler::StoreLoad);
ldrb(tmp2, Address(card_addr, offset));
ldrb(tmp2, Address(card_addr));
cbzw(tmp2, done);
// storing a region crossing, non-NULL oop, card is clean.
// dirty card and log.
strb(zr, Address(card_addr, offset));
strb(zr, Address(card_addr));
ldr(rscratch1, queue_index);
cbz(rscratch1, runtime);
@ -3971,6 +3988,9 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byt
long offset_low = dest_page - low_page;
long offset_high = dest_page - high_page;
assert(is_valid_AArch64_address(dest.target()), "bad address");
assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
InstructionMark im(this);
code_section()->relocate(inst_mark(), dest.rspec());
// 8143067: Ensure that the adrp can reach the dest from anywhere within
@ -3982,11 +4002,26 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byt
long offset = dest_page - pc_page;
offset = (offset & ((1<<20)-1)) << 12;
_adrp(reg1, pc()+offset);
movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
movk(reg1, (unsigned long)dest.target() >> 32, 32);
}
byte_offset = (unsigned long)dest.target() & 0xfff;
}
void MacroAssembler::load_byte_map_base(Register reg) {
jbyte *byte_map_base =
((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
if (is_valid_AArch64_address((address)byte_map_base)) {
// Strictly speaking the byte_map_base isn't an address at all,
// and it might even be negative.
unsigned long offset;
adrp(reg, ExternalAddress((address)byte_map_base), offset);
assert(offset == 0, "misaligned card table base");
} else {
mov(reg, (uint64_t)byte_map_base);
}
}
void MacroAssembler::build_frame(int framesize) {
assert(framesize > 0, "framesize must be > 0");
if (framesize < ((1 << 9) + 2 * wordSize)) {

View File

@ -437,6 +437,13 @@ public:
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
// Push and pop everything that might be clobbered by a native
// runtime call except rscratch1 and rscratch2. (They are always
// scratch, so we don't have to protect them.) Only save the lower
// 64 bits of each vector register.
void push_call_clobbered_registers();
void pop_call_clobbered_registers();
// now mov instructions for loading absolute addresses and 32 or
// 64 bit integers
@ -1116,6 +1123,15 @@ public:
// of your data.
Address form_address(Register Rd, Register base, long byte_offset, int shift);
// Return true iff an address is within the 48-bit AArch64 address
// space.
bool is_valid_AArch64_address(address a) {
return ((uint64_t)a >> 48) == 0;
}
// Load the base of the cardtable byte map into reg.
void load_byte_map_base(Register reg);
// Prolog generator routines to support switch between x86 code and
// generated ARM code

View File

@ -744,7 +744,7 @@ class StubGenerator: public StubCodeGenerator {
__ sub(end, end, start); // number of bytes to copy
const Register count = end; // 'end' register contains bytes count now
__ mov(scratch, (address)ct->byte_map_base);
__ load_byte_map_base(scratch);
__ add(start, start, scratch);
if (UseConcMarkSweepGC) {
__ membar(__ StoreStore);