8077838: Recent developments for ppc

Power 8 recognition and instructions, math.*Exact intrinsics and rtm, C2 optimizations

Reviewed-by: kvn, simonis
This commit is contained in:
Goetz Lindenmaier 2015-04-15 12:44:56 +02:00
parent dc67bb0a0e
commit 5827e6ce0f
23 changed files with 1390 additions and 453 deletions

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -85,8 +85,7 @@ int Assembler::branch_destination(int inst, int pos) {
}
// Low-level andi-one-instruction-macro.
void Assembler::andi(Register a, Register s, const int ui16) {
assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
void Assembler::andi(Register a, Register s, const long ui16) {
if (is_power_of_2_long(((jlong) ui16)+1)) {
// pow2minus1
clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
@ -97,6 +96,7 @@ void Assembler::andi(Register a, Register s, const int ui16) {
// negpow2
clrrdi(a, s, log2_long((jlong)-ui16));
} else {
assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
andi_(a, s, ui16);
}
}
@ -356,7 +356,6 @@ void Assembler::load_const(Register d, long x, Register tmp) {
// 16 bit immediate offset.
int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
// Avoid accidentally trying to use R0 for indexed addressing.
assert(d != R0, "R0 not allowed");
assert_different_registers(d, tmp);
short xa, xb, xc, xd; // Four 16-bit chunks of const.
@ -370,6 +369,58 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
return 0;
}
int retval = 0;
if (return_simm16_rest) {
retval = xd;
x = rem << 16;
xd = 0;
}
if (d == R0) { // Can't use addi.
if (is_simm(x, 32)) { // opt 2: simm32
lis(d, x >> 16);
if (xd) ori(d, d, (unsigned short)xd);
} else {
// 64-bit value: x = xa xb xc xd
xa = (x >> 48) & 0xffff;
xb = (x >> 32) & 0xffff;
xc = (x >> 16) & 0xffff;
bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
if (tmp == noreg || (xc == 0 && xd == 0)) {
if (xa_loaded) {
lis(d, xa);
if (xb) { ori(d, d, (unsigned short)xb); }
} else {
li(d, xb);
}
sldi(d, d, 32);
if (xc) { oris(d, d, (unsigned short)xc); }
if (xd) { ori( d, d, (unsigned short)xd); }
} else {
// Exploit instruction level parallelism if we have a tmp register.
bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
if (xa_loaded) {
lis(tmp, xa);
}
if (xc_loaded) {
lis(d, xc);
}
if (xa_loaded) {
if (xb) { ori(tmp, tmp, (unsigned short)xb); }
} else {
li(tmp, xb);
}
if (xc_loaded) {
if (xd) { ori(d, d, (unsigned short)xd); }
} else {
li(d, xd);
}
insrdi(d, tmp, 32, 0);
}
}
return retval;
}
xc = rem & 0xFFFF; // Next 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
@ -377,28 +428,27 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
lis(d, xc);
} else { // High 32 bits needed.
if (tmp != noreg) { // opt 3: We have a temp reg.
if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg.
// No carry propagation between xc and higher chunks here (use logical instructions).
xa = (x >> 48) & 0xffff;
xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
bool load_xa = (xa != 0) || (xb < 0);
bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
bool return_xd = false;
if (load_xa) { lis(tmp, xa); }
if (xa_loaded) { lis(tmp, xa); }
if (xc) { lis(d, xc); }
if (load_xa) {
if (xa_loaded) {
if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
} else {
li(tmp, xb); // non-negative
li(tmp, xb);
}
if (xc) {
if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
else if (xd) { addi(d, d, xd); }
if (xd) { addi(d, d, xd); }
} else {
li(d, xd);
}
insrdi(d, tmp, 32, 0);
return return_xd ? xd : 0; // non-negative
return retval;
}
xb = rem & 0xFFFF; // Next 16-bit chunk.
@ -417,11 +467,51 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
if (xc) { addis(d, d, xc); }
}
// opt 5: Return offset to be inserted into following instruction.
if (return_simm16_rest) return xd;
if (xd) { addi(d, d, xd); }
return 0;
return retval;
}
// We emit only one addition to s to optimize latency.
int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
assert(s != R0 && s != tmp, "unsupported");
long rem = x;
// Case 1: Can use mr or addi.
short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xd >> 15);
if (rem == 0) {
if (xd == 0) {
if (d != s) { mr(d, s); }
return 0;
}
if (return_simm16_rest) {
return xd;
}
addi(d, s, xd);
return 0;
}
// Case 2: Can use addis.
if (xd == 0) {
short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xd >> 15);
if (rem == 0) {
addis(d, s, xc);
return 0;
}
}
// Other cases: load & add.
Register tmp1 = tmp,
tmp2 = noreg;
if ((d != tmp) && (d != s)) {
// Can use d.
tmp1 = d;
tmp2 = tmp;
}
int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
add(d, tmp1, s);
return simm16_rest;
}
#ifndef PRODUCT

View File

@ -224,10 +224,13 @@ class Assembler : public AbstractAssembler {
ADDIS_OPCODE = (15u << OPCODE_SHIFT),
ADDIC__OPCODE = (13u << OPCODE_SHIFT),
ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1),
ADDME_OPCODE = (31u << OPCODE_SHIFT | 234u << 1),
ADDZE_OPCODE = (31u << OPCODE_SHIFT | 202u << 1),
SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1),
SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1),
SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1),
SUBFIC_OPCODE = (8u << OPCODE_SHIFT),
SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1),
SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1),
MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1),
@ -657,6 +660,9 @@ class Assembler : public AbstractAssembler {
SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1),
EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1),
// Wait instructions for polling.
WAIT_OPCODE = (31u << OPCODE_SHIFT | 62u << 1),
// Trap instructions
TDI_OPCODE = (2u << OPCODE_SHIFT),
TWI_OPCODE = (3u << OPCODE_SHIFT),
@ -666,8 +672,10 @@ class Assembler : public AbstractAssembler {
// Atomics.
LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1),
LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1),
LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1),
STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1),
STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1)
STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1),
STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1)
};
@ -1171,6 +1179,14 @@ class Assembler : public AbstractAssembler {
inline void adde_( Register d, Register a, Register b);
inline void subfe( Register d, Register a, Register b);
inline void subfe_( Register d, Register a, Register b);
inline void addme( Register d, Register a);
inline void addme_( Register d, Register a);
inline void subfme( Register d, Register a);
inline void subfme_(Register d, Register a);
inline void addze( Register d, Register a);
inline void addze_( Register d, Register a);
inline void subfze( Register d, Register a);
inline void subfze_(Register d, Register a);
inline void neg( Register d, Register a);
inline void neg_( Register d, Register a);
inline void mulli( Register d, Register a, int si16);
@ -1189,6 +1205,38 @@ class Assembler : public AbstractAssembler {
inline void divw( Register d, Register a, Register b);
inline void divw_( Register d, Register a, Register b);
// Fixed-Point Arithmetic Instructions with Overflow detection
inline void addo( Register d, Register a, Register b);
inline void addo_( Register d, Register a, Register b);
inline void subfo( Register d, Register a, Register b);
inline void subfo_( Register d, Register a, Register b);
inline void addco( Register d, Register a, Register b);
inline void addco_( Register d, Register a, Register b);
inline void subfco( Register d, Register a, Register b);
inline void subfco_( Register d, Register a, Register b);
inline void addeo( Register d, Register a, Register b);
inline void addeo_( Register d, Register a, Register b);
inline void subfeo( Register d, Register a, Register b);
inline void subfeo_( Register d, Register a, Register b);
inline void addmeo( Register d, Register a);
inline void addmeo_( Register d, Register a);
inline void subfmeo( Register d, Register a);
inline void subfmeo_(Register d, Register a);
inline void addzeo( Register d, Register a);
inline void addzeo_( Register d, Register a);
inline void subfzeo( Register d, Register a);
inline void subfzeo_(Register d, Register a);
inline void nego( Register d, Register a);
inline void nego_( Register d, Register a);
inline void mulldo( Register d, Register a, Register b);
inline void mulldo_( Register d, Register a, Register b);
inline void mullwo( Register d, Register a, Register b);
inline void mullwo_( Register d, Register a, Register b);
inline void divdo( Register d, Register a, Register b);
inline void divdo_( Register d, Register a, Register b);
inline void divwo( Register d, Register a, Register b);
inline void divwo_( Register d, Register a, Register b);
// extended mnemonics
inline void li( Register d, int si16);
inline void lis( Register d, int si16);
@ -1303,7 +1351,7 @@ class Assembler : public AbstractAssembler {
inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
void andi( Register a, Register s, int ui16); // optimized version
void andi( Register a, Register s, long ui16); // optimized version
inline void andi_( Register a, Register s, int ui16);
inline void andis_( Register a, Register s, int ui16);
inline void ori( Register a, Register s, int ui16);
@ -1688,14 +1736,21 @@ class Assembler : public AbstractAssembler {
inline void isync();
inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
// Wait instructions for polling. Attention: May result in SIGILL.
inline void wait();
inline void waitrsv(); // >=Power7
// atomics
inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
inline bool lxarx_hint_exclusive_access();
inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
inline void stwcx_( Register s, Register a, Register b);
inline void stdcx_( Register s, Register a, Register b);
inline void stqcx_( Register s, Register a, Register b);
// Instructions for adjusting thread priority for simultaneous
// multithreading (SMT) on Power5.
@ -2054,10 +2109,13 @@ class Assembler : public AbstractAssembler {
// Atomics: use ra0mem to disallow R0 as base.
inline void lwarx_unchecked(Register d, Register b, int eh1);
inline void ldarx_unchecked(Register d, Register b, int eh1);
inline void lqarx_unchecked(Register d, Register b, int eh1);
inline void lwarx( Register d, Register b, bool hint_exclusive_access);
inline void ldarx( Register d, Register b, bool hint_exclusive_access);
inline void lqarx( Register d, Register b, bool hint_exclusive_access);
inline void stwcx_(Register s, Register b);
inline void stdcx_(Register s, Register b);
inline void stqcx_(Register s, Register b);
inline void lfs( FloatRegister d, int si16);
inline void lfsx( FloatRegister d, Register b);
inline void lfd( FloatRegister d, int si16);
@ -2120,6 +2178,20 @@ class Assembler : public AbstractAssembler {
return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
}
// If return_simm16_rest, the return value needs to get added afterwards.
int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false);
inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
}
// If return_simm16_rest, the return value needs to get added afterwards.
inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) {
return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
}
inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
}
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
#ifdef CHECK_DELAY

View File

@ -100,6 +100,14 @@ inline void Assembler::adde( Register d, Register a, Register b) { emit_int32(
inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
inline void Assembler::addme( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
inline void Assembler::addme_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::subfme( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
inline void Assembler::subfme_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::addze( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
inline void Assembler::addze_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::subfze( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
inline void Assembler::subfze_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
@ -118,6 +126,38 @@ inline void Assembler::divd_( Register d, Register a, Register b) { emit_int32(
inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
// Fixed-Point Arithmetic Instructions with Overflow detection
inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::subfo( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::subfo_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::addco( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::addco_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::subfco( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::addeo( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::addeo_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::subfeo( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::addmeo( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
inline void Assembler::addmeo_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
inline void Assembler::subfmeo( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
inline void Assembler::subfmeo_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
inline void Assembler::addzeo( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
inline void Assembler::addzeo_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
inline void Assembler::subfzeo( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
inline void Assembler::subfzeo_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
inline void Assembler::nego( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
inline void Assembler::nego_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
inline void Assembler::mulldo( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::mullwo( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::divdo( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::divdo_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
inline void Assembler::divwo( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
// extended mnemonics
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
@ -540,15 +580,22 @@ inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); }
inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); }
inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
// Wait instructions for polling.
inline void Assembler::wait() { emit_int32( WAIT_OPCODE); }
inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7
// atomics
// Use ra0mem to disallow R0 as base.
inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); }
inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
// Instructions for adjusting thread priority
// for simultaneous multithreading (SMT) on POWER5.
@ -873,10 +920,13 @@ inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCOD
// ra0 version
inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
// ra0 version
inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -47,7 +47,7 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FLOATPRESSURE, 28);
define_pd_global(intx, FreqInlineSize, 175);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, INTPRESSURE, 25);
define_pd_global(intx, INTPRESSURE, 26);
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 16000);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -58,7 +58,7 @@ define_pd_global(bool, UseMembar, false);
// GC Ergo Flags
define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread.
define_pd_global(uintx, TypeProfileLevel, 0);
define_pd_global(uintx, TypeProfileLevel, 111);
// Platform dependent flag handling: flags only defined on this platform.
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
@ -71,14 +71,26 @@ define_pd_global(uintx, TypeProfileLevel, 0);
\
product(uintx, PowerArchitecturePPC64, 0, \
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power7. Default is 0. CPUs newer than Power7 will be " \
"recognized as Power7.") \
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
\
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
/* indirect call by a direct call. */ \
product(bool, ReoptimizeCallSequences, true, \
"Reoptimize code-sequences of calls at runtime.") \
\
/* Power 8: Configure Data Stream Control Register. */ \
product(uint64_t,DSCR_PPC64, (uintx)-1, \
"Power8 or later: Specify encoded value for Data Stream Control " \
"Register") \
product(uint64_t,DSCR_DPFD_PPC64, 8, \
"Power8 or later: DPFD (default prefetch depth) value of the " \
"Data Stream Control Register." \
" 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
product(uint64_t,DSCR_URG_PPC64, 8, \
"Power8 or later: URG (depth attainment urgency) value of the " \
"Data Stream Control Register." \
" 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
\
product(bool, UseLoadInstructionsForStackBangingPPC64, false, \
"Use load instructions for stack banging.") \
\
@ -121,6 +133,41 @@ define_pd_global(uintx, TypeProfileLevel, 0);
\
product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \
" Use this to ease debugging.") \
\
/* Use Restricted Transactional Memory for lock eliding */ \
product(bool, UseRTMLocking, false, \
"Enable RTM lock eliding for inflated locks in compiled code") \
\
experimental(bool, UseRTMForStackLocks, false, \
"Enable RTM lock eliding for stack locks in compiled code") \
\
product(bool, UseRTMDeopt, false, \
"Perform deopt and recompilation based on RTM abort ratio") \
\
product(uintx, RTMRetryCount, 5, \
"Number of RTM retries on lock abort or busy") \
\
experimental(intx, RTMSpinLoopCount, 100, \
"Spin count for lock to become free before RTM retry") \
\
experimental(intx, RTMAbortThreshold, 1000, \
"Calculate abort ratio after this number of aborts") \
\
experimental(intx, RTMLockingThreshold, 10000, \
"Lock count at which to do RTM lock eliding without " \
"abort ratio calculation") \
\
experimental(intx, RTMAbortRatio, 50, \
"Lock abort ratio at which to stop use RTM lock eliding") \
\
experimental(intx, RTMTotalCountIncrRate, 64, \
"Increment total RTM attempted lock count once every n times") \
\
experimental(intx, RTMLockingCalculationDelay, 0, \
"Number of milliseconds to wait before start calculating aborts " \
"for RTM locking") \
\
experimental(bool, UseRTMXendForLockBusy, true, \
"Use RTM Xend instead of Xabort when lock busy") \
#endif // CPU_PPC_VM_GLOBALS_PPC_HPP

View File

@ -446,7 +446,7 @@ void InterpreterMacroAssembler::get_u4(Register Rdst, Register Rsrc, int offset,
}
// Load object from cpool->resolved_references(index).
void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) {
assert_different_registers(result, index);
get_constant_pool(result);
@ -469,7 +469,7 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result
#endif
// Add in the index.
add(result, tmp, result);
load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
}
// Generate a subtype check: branch to ok_is_subtype if sub_klass is
@ -876,7 +876,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// If condition is true we are done and hence we can store 0 in the displaced
// header indicating it is a recursive lock.
bne(CCR0, slow_case);
release();
std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
b(done);
@ -1861,7 +1860,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register
const Register mdp = tmp1;
add(mdp, tmp1, R28_mdx);
// Pffset of the current profile entry to update.
// Offset of the current profile entry to update.
const Register entry_offset = tmp2;
// entry_offset = array len in number of cells
ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -85,7 +85,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
// Load object from cpool->resolved_references(index).
void load_resolved_reference_at_index(Register result, Register index);
void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL);
void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
void load_receiver(Register Rparam_count, Register Rrecv_dst);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -47,4 +47,4 @@
}
#endif
#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -1455,7 +1455,7 @@ void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_valu
// Several special cases exist to avoid that unnecessary information is generated.
//
void MacroAssembler::cmpxchgd(ConditionRegister flag,
Register dest_current_value, Register compare_value, Register exchange_value,
Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
Register addr_base, int semantics, bool cmpxchgx_hint,
Register int_flag_success, Label* failed_ext, bool contention_hint) {
Label retry;
@ -1465,7 +1465,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
// Save one branch if result is returned via register and result register is different from the other ones.
bool use_result_reg = (int_flag_success!=noreg);
bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
int_flag_success!=exchange_value && int_flag_success!=addr_base);
assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
@ -1481,7 +1481,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
if (contention_hint) { // Don't try to reserve if cmp fails.
ld(dest_current_value, 0, addr_base);
cmpd(flag, dest_current_value, compare_value);
cmpd(flag, compare_value, dest_current_value);
bne(flag, failed);
}
@ -1489,7 +1489,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
bind(retry);
ldarx(dest_current_value, addr_base, cmpxchgx_hint);
cmpd(flag, dest_current_value, compare_value);
cmpd(flag, compare_value, dest_current_value);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(flag, failed);
} else {
@ -1873,7 +1873,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
/*where=*/obj_reg,
@ -1909,7 +1908,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
/*where=*/obj_reg,
@ -1946,7 +1944,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
/*where=*/obj_reg,
@ -1987,9 +1984,371 @@ void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mar
beq(cr_reg, done);
}
// TM on PPC64.
void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
Label retry;
bind(retry);
ldarx(result, addr, /*hint*/ false);
addi(result, result, simm16);
stdcx_(result, addr);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
} else {
bne( CCR0, retry); // stXcx_ sets CCR0
}
}
void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
Label retry;
bind(retry);
lwarx(result, addr, /*hint*/ false);
ori(result, result, uimm16);
stwcx_(result, addr);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
} else {
bne( CCR0, retry); // stXcx_ sets CCR0
}
}
#if INCLUDE_RTM_OPT
// Update rtm_counters based on abort status
// input: abort_status
// rtm_counters (RTMLockingCounters*)
void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
// Mapping to keep PreciseRTMLockingStatistics similar to x86.
// x86 ppc (! means inverted, ? means not the same)
// 0 31 Set if abort caused by XABORT instruction.
// 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
// 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
// 3 10 Set if an internal buffer overflowed.
// 4 ?12 Set if a debug breakpoint was hit.
// 5 ?32 Set if an abort occurred during execution of a nested transaction.
const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
Assembler::tm_failure_persistent, // inverted: transient
Assembler::tm_trans_cf,
Assembler::tm_footprint_of,
Assembler::tm_non_trans_cf,
Assembler::tm_suspended};
const bool tm_failure_inv[] = {false, true, false, false, false, false};
assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
const Register addr_Reg = R0;
// Keep track of offset to where rtm_counters_Reg had pointed to.
int counters_offs = RTMLockingCounters::abort_count_offset();
addi(addr_Reg, rtm_counters_Reg, counters_offs);
const Register temp_Reg = rtm_counters_Reg;
//atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
ldx(temp_Reg, addr_Reg);
addi(temp_Reg, temp_Reg, 1);
stdx(temp_Reg, addr_Reg);
if (PrintPreciseRTMLockingStatistics) {
int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
//mftexasr(abort_status); done by caller
for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
counters_offs += counters_offs_delta;
li(temp_Reg, counters_offs_delta); // can't use addi with R0
add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
counters_offs_delta = sizeof(uintx);
Label check_abort;
rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
if (tm_failure_inv[i]) {
bne(CCR0, check_abort);
} else {
beq(CCR0, check_abort);
}
//atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
ldx(temp_Reg, addr_Reg);
addi(temp_Reg, temp_Reg, 1);
stdx(temp_Reg, addr_Reg);
bind(check_abort);
}
}
li(temp_Reg, -counters_offs); // can't use addi with R0
add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
}
// Branch if (random & (count-1) != 0), count is 2^n
// tmp and CR0 are killed
void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
mftb(tmp);
andi_(tmp, tmp, count-1);
bne(CCR0, brLabel);
}
// Perform abort ratio calculation, set no_rtm bit if high ratio.
// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data) {
Label L_done, L_check_always_rtm1, L_check_always_rtm2;
if (RTMLockingCalculationDelay > 0) {
// Delay calculation.
ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
cmpdi(CCR0, rtm_counters_Reg, 0);
beq(CCR0, L_done);
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
}
// Abort ratio calculation only if abort_count > RTMAbortThreshold.
// Aborted transactions = abort_count * 100
// All transactions = total_count * RTMTotalCountIncrRate
// Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
cmpdi(CCR0, R0, RTMAbortThreshold);
blt(CCR0, L_check_always_rtm2);
mulli(R0, R0, 100);
const Register tmpReg = rtm_counters_Reg;
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
mulli(tmpReg, tmpReg, RTMAbortRatio);
cmpd(CCR0, R0, tmpReg);
blt(CCR0, L_check_always_rtm1); // jump to reload
if (method_data != NULL) {
// Set rtm_state to "no rtm" in MDO.
// Not using a metadata relocation. Method and Class Loader are kept alive anyway.
// (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
atomic_ori_int(R0, tmpReg, NoRTM);
}
b(L_done);
bind(L_check_always_rtm1);
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
bind(L_check_always_rtm2);
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
blt(CCR0, L_done);
if (method_data != NULL) {
// Set rtm_state to "always rtm" in MDO.
// Not using a metadata relocation. See above.
load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
atomic_ori_int(R0, tmpReg, UseRTM);
}
bind(L_done);
}
// Update counters and perform abort ratio calculation.
// input: abort_status_Reg
void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data,
bool profile_rtm) {
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
// Update rtm counters based on state at abort.
// Reads abort_status_Reg, updates flags.
assert_different_registers(abort_status_Reg, temp_Reg);
load_const_optimized(temp_Reg, (address)rtm_counters, R0);
rtm_counters_update(abort_status_Reg, temp_Reg);
if (profile_rtm) {
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
}
}
// Retry on abort if abort's status indicates non-persistent failure.
// inputs: retry_count_Reg
// : abort_status_Reg
// output: retry_count_Reg decremented by 1
void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
Label& retryLabel, Label* checkRetry) {
Label doneRetry;
rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
bne(CCR0, doneRetry);
if (checkRetry) { bind(*checkRetry); }
addic_(retry_count_Reg, retry_count_Reg, -1);
blt(CCR0, doneRetry);
smt_yield(); // Can't use wait(). No permission (SIGILL).
b(retryLabel);
bind(doneRetry);
}
// Spin and retry if lock is busy.
// inputs: box_Reg (monitor address)
// : retry_count_Reg
// output: retry_count_Reg decremented by 1
// CTR is killed
void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
Label SpinLoop, doneRetry;
addic_(retry_count_Reg, retry_count_Reg, -1);
blt(CCR0, doneRetry);
li(R0, RTMSpinLoopCount);
mtctr(R0);
bind(SpinLoop);
smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
bdz(retryLabel);
ld(R0, 0, owner_addr_Reg);
cmpdi(CCR0, R0, 0);
bne(CCR0, SpinLoop);
b(retryLabel);
bind(doneRetry);
}
// Use RTM for normal stack locks.
// Input: objReg (object to lock)
void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
Register obj, Register mark_word, Register tmp,
Register retry_on_abort_count_Reg,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated) {
assert(UseRTMForStackLocks, "why call this otherwise?");
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
Label L_rtm_retry, L_decrement_retry, L_on_abort;
if (RTMRetryCount > 0) {
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
bind(L_rtm_retry);
}
andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
bne(CCR0, IsInflated);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
Label L_noincrement;
if (RTMTotalCountIncrRate > 1) {
branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
}
assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
//atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
ldx(mark_word, tmp);
addi(mark_word, mark_word, 1);
stdx(mark_word, tmp);
bind(L_noincrement);
}
tbegin_();
beq(CCR0, L_on_abort);
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
beq(flag, DONE_LABEL); // all done if unlocked
if (UseRTMXendForLockBusy) {
tend_();
b(L_decrement_retry);
} else {
tabort_();
}
bind(L_on_abort);
const Register abort_status_Reg = tmp;
mftexasr(abort_status_Reg);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
}
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
if (RTMRetryCount > 0) {
// Retry on lock abort if abort status is not permanent.
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
} else {
bind(L_decrement_retry);
}
}
// Use RTM for inflating locks
// inputs: obj (object to lock)
// mark_word (current header - KILLED)
// boxReg (on-stack box address (displaced header location) - KILLED)
void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
Register obj, Register mark_word, Register boxReg,
Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL) {
assert(UseRTMLocking, "why call this otherwise?");
Label L_rtm_retry, L_decrement_retry, L_on_abort;
// Clean monitor_value bit to get valid pointer.
int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
// Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
const Register tmpReg = boxReg;
const Register owner_addr_Reg = mark_word;
addi(owner_addr_Reg, mark_word, owner_offset);
if (RTMRetryCount > 0) {
load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
bind(L_rtm_retry);
}
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
Label L_noincrement;
if (RTMTotalCountIncrRate > 1) {
branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
}
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
//atomic_inc_ptr(R0, tmpReg); We don't increment atomically
ldx(tmpReg, R0);
addi(tmpReg, tmpReg, 1);
stdx(tmpReg, R0);
bind(L_noincrement);
}
tbegin_();
beq(CCR0, L_on_abort);
// We don't reload mark word. Will only be reset at safepoint.
ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
cmpdi(flag, R0, 0);
beq(flag, DONE_LABEL);
if (UseRTMXendForLockBusy) {
tend_();
b(L_decrement_retry);
} else {
tabort_();
}
bind(L_on_abort);
const Register abort_status_Reg = tmpReg;
mftexasr(abort_status_Reg);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
// Restore owner_addr_Reg
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
#ifdef ASSERT
andi_(R0, mark_word, markOopDesc::monitor_value);
asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
#endif
addi(owner_addr_Reg, mark_word, owner_offset);
}
if (RTMRetryCount > 0) {
// Retry on lock abort if abort status is not permanent.
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
}
// Appears unlocked - try to swing _owner from null to non-null.
cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
if (RTMRetryCount > 0) {
// success done else retry
b(DONE_LABEL);
bind(L_decrement_retry);
// Spin and retry if lock is busy.
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
} else {
bind(L_decrement_retry);
}
}
#endif // INCLUDE_RTM_OPT
// "The box" is the space on the stack where we copy the object mark.
void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
Register temp, Register displaced_header, Register current_header) {
Register temp, Register displaced_header, Register current_header,
bool try_bias,
RTMLockingCounters* rtm_counters,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data,
bool use_rtm, bool profile_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register");
Label cont;
@ -2006,10 +2365,18 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
return;
}
if (UseBiasedLocking) {
if (try_bias) {
biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
}
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
stack_rtm_counters, method_data, profile_rtm,
cont, object_has_monitor);
}
#endif // INCLUDE_RTM_OPT
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
@ -2066,14 +2433,22 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
//
#if INCLUDE_RTM_OPT
// Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) {
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
rtm_counters, method_data, profile_rtm, cont);
} else {
#endif // INCLUDE_RTM_OPT
// Try to CAS m->owner from NULL to current thread.
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
li(displaced_header, 0);
// CmpxchgX sets flag to cmpX(current, displaced).
cmpxchgd(/*flag=*/flag,
/*current_value=*/current_header,
/*compare_value=*/displaced_header,
/*compare_value=*/(intptr_t)0,
/*exchange_value=*/R16_thread,
/*where=*/temp,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
@ -2095,6 +2470,10 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
//asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
// "monitor->OwnerIsThread shouldn't be 0", -1);
# endif
#if INCLUDE_RTM_OPT
} // use_rtm()
#endif
}
bind(cont);
@ -2103,7 +2482,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
}
void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
Register temp, Register displaced_header, Register current_header) {
Register temp, Register displaced_header, Register current_header,
bool try_bias, bool use_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register");
Label cont;
@ -2115,10 +2495,24 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
return;
}
if (UseBiasedLocking) {
if (try_bias) {
biased_locking_exit(flag, oop, current_header, cont);
}
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
Label L_regular_unlock;
ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
bne(flag, L_regular_unlock); // else RegularLock
tend_(); // otherwise end...
b(cont); // ... and we're done
bind(L_regular_unlock);
}
#endif
// Find the lock address and load the displaced header from the stack.
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
@ -2129,13 +2523,12 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
andi(temp, current_header, markOopDesc::monitor_value);
cmpdi(flag, temp, 0);
bne(flag, object_has_monitor);
andi_(R0, current_header, markOopDesc::monitor_value);
bne(CCR0, object_has_monitor);
}
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object.
// Cmpxchg sets flag to cmpd(current_header, box).
@ -2158,6 +2551,20 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
bind(object_has_monitor);
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
// Clean monitor_value bit to get valid pointer
cmpdi(flag, temp, 0);
bne(flag, L_regular_inflated_unlock);
tend_();
b(cont);
bind(L_regular_inflated_unlock);
}
#endif
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
@ -2441,6 +2848,8 @@ void MacroAssembler::get_vm_result(Register oop_result) {
// oop_result
// R16_thread->in_bytes(JavaThread::vm_result_offset())
verify_thread();
ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
li(R0, 0);
std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
@ -2462,26 +2871,24 @@ void MacroAssembler::get_vm_result_2(Register metadata_result) {
std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
}
void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
if (Universe::narrow_klass_base() != 0) {
// Use dst as temp if it is free.
load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
sub(dst, current, R0);
sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
current = dst;
}
if (Universe::narrow_klass_shift() != 0) {
srdi(dst, current, Universe::narrow_klass_shift());
current = dst;
}
mr_if_needed(dst, current); // Move may be required.
return current;
}
void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
if (UseCompressedClassPointers) {
encode_klass_not_null(ck, klass);
stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
Register compressedKlass = encode_klass_not_null(ck, klass);
stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
} else {
std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
}
@ -2514,8 +2921,7 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
sldi(shifted_src, src, Universe::narrow_klass_shift());
}
if (Universe::narrow_klass_base() != 0) {
load_const(R0, Universe::narrow_klass_base());
add(dst, shifted_src, R0);
add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
}
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,6 +27,7 @@
#define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
#include "asm/assembler.hpp"
#include "runtime/rtmLocking.hpp"
#include "utilities/macros.hpp"
// MacroAssembler extends Assembler by a few frequently used macros.
@ -432,8 +433,8 @@ class MacroAssembler: public Assembler {
int semantics, bool cmpxchgx_hint = false,
Register int_flag_success = noreg, bool contention_hint = false);
void cmpxchgd(ConditionRegister flag,
Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
int semantics, bool cmpxchgx_hint = false,
Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
Register addr_base, int semantics, bool cmpxchgx_hint = false,
Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
// interface method calling
@ -506,8 +507,42 @@ class MacroAssembler: public Assembler {
// biased locking exit case failed.
void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
void atomic_ori_int(Register addr, Register result, int uimm16);
#if INCLUDE_RTM_OPT
void rtm_counters_update(Register abort_status, Register rtm_counters);
void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
Metadata* method_data);
void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
Label& retryLabel, Label* checkRetry = NULL);
void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
Register retry_on_abort_count,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated);
void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
Register retry_on_busy_count, Register retry_on_abort_count,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL);
#endif
void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3,
bool try_bias = UseBiasedLocking,
RTMLockingCounters* rtm_counters = NULL,
RTMLockingCounters* stack_rtm_counters = NULL,
Metadata* method_data = NULL,
bool use_rtm = false, bool profile_rtm = false);
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3,
bool try_bias = UseBiasedLocking, bool use_rtm = false);
// Support for serializing memory accesses between threads
void serialize_memory(Register thread, Register tmp1, Register tmp2);
@ -576,7 +611,7 @@ class MacroAssembler: public Assembler {
Register tmp = noreg);
// Null allowed.
inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
// Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
// src == d allowed.
@ -593,7 +628,7 @@ class MacroAssembler: public Assembler {
void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
static int instr_size_for_decode_klass_not_null();
void decode_klass_not_null(Register dst, Register src = noreg);
void encode_klass_not_null(Register dst, Register src = noreg);
Register encode_klass_not_null(Register dst, Register src = noreg);
// Load common heap base into register.
void reinit_heapbase(Register d, Register tmp = noreg);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -333,19 +333,29 @@ inline void MacroAssembler::store_heap_oop_not_null(Register d, RegisterOrConsta
}
}
inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) {
if (UseCompressedOops) {
lwz(d, offs, s1);
decode_heap_oop(d);
if (is_null != NULL) {
cmpwi(CCR0, d, 0);
beq(CCR0, *is_null);
decode_heap_oop_not_null(d);
} else {
decode_heap_oop(d);
}
} else {
ld(d, offs, s1);
if (is_null != NULL) {
cmpdi(CCR0, d, 0);
beq(CCR0, *is_null);
}
}
}
inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided.
if (Universe::narrow_oop_base_overlaps()) {
sub(d, current, R30);
sub_const_optimized(d, current, Universe::narrow_oop_base(), R0);
current = d;
}
if (Universe::narrow_oop_shift() != 0) {
@ -358,7 +368,7 @@ inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register sr
inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
Universe::narrow_oop_shift() != 0) {
mr(d, R30);
load_const_optimized(d, Universe::narrow_oop_base(), R0);
rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift());
return d;
}
@ -369,7 +379,7 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register sr
current = d;
}
if (Universe::narrow_oop_base() != NULL) {
add(d, current, R30);
add_const_optimized(d, current, Universe::narrow_oop_base(), R0);
current = d;
}
return current; // Decoded oop is in this register.
@ -377,11 +387,19 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register sr
inline void MacroAssembler::decode_heap_oop(Register d) {
Label isNull;
bool use_isel = false;
if (Universe::narrow_oop_base() != NULL) {
cmpwi(CCR0, d, 0);
beq(CCR0, isNull);
if (VM_Version::has_isel()) {
use_isel = true;
} else {
beq(CCR0, isNull);
}
}
decode_heap_oop_not_null(d);
if (use_isel) {
isel_0(d, CCR0, Assembler::equal);
}
bind(isNull);
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,9 +27,6 @@
// These definitions are inlined into class MethodHandles.
// Adapters
//static unsigned int adapter_code_size() {
// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
//}
enum /* platform_dependent_constants */ {
adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
};
@ -45,7 +42,9 @@ public:
static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
Register temp_reg, Register temp2_reg) {
Unimplemented();
verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
temp_reg, temp2_reg,
"reference is a MH");
}
static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,19 +23,10 @@
*
*/
// make sure the defines don't screw up the declarations later on in this file
// Make sure the defines don't screw up the declarations later on in this file.
#define DONT_USE_REGISTER_DEFINES
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/register.hpp"
#include "register_ppc.hpp"
#ifdef TARGET_ARCH_MODEL_ppc_32
# include "interp_masm_ppc_32.hpp"
#endif
#ifdef TARGET_ARCH_MODEL_ppc_64
# include "interp_masm_ppc_64.hpp"
#endif
REGISTER_DEFINITION(Register, noreg);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,14 +25,12 @@
#include "precompiled.hpp"
#include "asm/assembler.inline.hpp"
#include "assembler_ppc.inline.hpp"
#include "code/relocInfo.hpp"
#include "nativeInst_ppc.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/safepoint.hpp"
void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
bool copy_back_to_oop_pool = true; // TODO: PPC port
// The following comment is from the declaration of DataRelocation:
//
// "The "o" (displacement) argument is relevant only to split relocations

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -28,6 +28,7 @@
#include "code/debugInfoRec.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
#include "frame_ppc.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interp_masm.hpp"
#include "oops/compiledICHolder.hpp"
@ -194,8 +195,8 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
RegisterSaver_LiveIntReg( R27 ),
RegisterSaver_LiveIntReg( R28 ),
RegisterSaver_LiveIntReg( R29 ),
RegisterSaver_LiveIntReg( R31 ),
RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register
RegisterSaver_LiveIntReg( R30 ),
RegisterSaver_LiveIntReg( R31 ), // must be the last register (see save/restore functions below)
};
OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
@ -229,29 +230,30 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
// Save r30 in the last slot of the not yet pushed frame so that we
// Save r31 in the last slot of the not yet pushed frame so that we
// can use it as scratch reg.
__ std(R30, -reg_size, R1_SP);
__ std(R31, -reg_size, R1_SP);
assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
"consistency check");
// save the flags
// Do the save_LR_CR by hand and adjust the return pc if requested.
__ mfcr(R30);
__ std(R30, _abi(cr), R1_SP);
__ mfcr(R31);
__ std(R31, _abi(cr), R1_SP);
switch (return_pc_location) {
case return_pc_is_lr: __ mflr(R30); break;
case return_pc_is_r4: __ mr(R30, R4); break;
case return_pc_is_lr: __ mflr(R31); break;
case return_pc_is_r4: __ mr(R31, R4); break;
case return_pc_is_thread_saved_exception_pc:
__ ld(R30, thread_(saved_exception_pc)); break;
__ ld(R31, thread_(saved_exception_pc)); break;
default: ShouldNotReachHere();
}
if (return_pc_adjustment != 0)
__ addi(R30, R30, return_pc_adjustment);
__ std(R30, _abi(lr), R1_SP);
if (return_pc_adjustment != 0) {
__ addi(R31, R31, return_pc_adjustment);
}
__ std(R31, _abi(lr), R1_SP);
// push a new frame
__ push_frame(frame_size_in_bytes, R30);
__ push_frame(frame_size_in_bytes, R31);
// save all registers (ints and floats)
offset = register_save_offset;
@ -261,7 +263,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
switch (reg_type) {
case RegisterSaver::int_reg: {
if (reg_num != 30) { // We spilled R30 right at the beginning.
if (reg_num != 31) { // We spilled R31 right at the beginning.
__ std(as_Register(reg_num), offset, R1_SP);
}
break;
@ -272,8 +274,8 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
}
case RegisterSaver::special_reg: {
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
__ mfctr(R30);
__ std(R30, offset, R1_SP);
__ mfctr(R31);
__ std(R31, offset, R1_SP);
} else {
Unimplemented();
}
@ -321,7 +323,7 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
switch (reg_type) {
case RegisterSaver::int_reg: {
if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
__ ld(as_Register(reg_num), offset, R1_SP);
break;
}
@ -332,8 +334,8 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
case RegisterSaver::special_reg: {
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
__ ld(R30, offset, R1_SP);
__ mtctr(R30);
__ ld(R31, offset, R1_SP);
__ mtctr(R31);
}
} else {
Unimplemented();
@ -350,10 +352,10 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
__ pop_frame();
// restore the flags
__ restore_LR_CR(R30);
__ restore_LR_CR(R31);
// restore scratch register's value
__ ld(R30, -reg_size, R1_SP);
__ ld(R31, -reg_size, R1_SP);
BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
}
@ -2021,6 +2023,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
frame_done_pc = (intptr_t)__ pc();
__ verify_thread();
// Native nmethod wrappers never take possesion of the oop arguments.
// So the caller will gc the arguments.
// The only thing we need an oopMap for is if the call is static.
@ -2594,7 +2598,7 @@ int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals)
}
uint SharedRuntime::out_preserve_stack_slots() {
#ifdef COMPILER2
#if defined(COMPILER1) || defined(COMPILER2)
return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
#else
return 0;
@ -2868,11 +2872,6 @@ void SharedRuntime::generate_deopt_blob() {
__ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
__ BIND(skip_restore_excp);
// reload narrro_oop_base
if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
__ load_const_optimized(R30, Universe::narrow_oop_base());
}
__ pop_frame();
// stack: (deoptee, optional i2c, caller of deoptee, ...).

View File

@ -261,9 +261,6 @@ class StubGenerator: public StubCodeGenerator {
// global toc register
__ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
// Load narrow oop base.
__ reinit_heapbase(R30, R11_scratch1);
// Remember the senderSP so we interpreter can pop c2i arguments off of the stack
// when called via a c2i.
@ -418,6 +415,23 @@ class StubGenerator: public StubCodeGenerator {
// or native call stub. The pending exception in Thread is
// converted into a Java-level exception.
//
// Read:
//
// LR: The pc the runtime library callee wants to return to.
// Since the exception occurred in the callee, the return pc
// from the point of view of Java is the exception pc.
// thread: Needed for method handles.
//
// Invalidate:
//
// volatile registers (except below).
//
// Update:
//
// R4_ARG2: exception
//
// (LR is unchanged and is live out).
//
address generate_forward_exception() {
StubCodeMark mark(this, "StubRoutines", "forward_exception");
address start = __ pc();
@ -1256,9 +1270,9 @@ class StubGenerator: public StubCodeGenerator {
Register tmp3 = R8_ARG6;
#if defined(ABI_ELFv2)
address nooverlap_target = aligned ?
StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
StubRoutines::jbyte_disjoint_arraycopy();
address nooverlap_target = aligned ?
StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
StubRoutines::jbyte_disjoint_arraycopy();
#else
address nooverlap_target = aligned ?
((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013, 2014 SAP AG. All rights reserved.
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -264,11 +264,11 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
__ cmpdi(CCR0, Rmdo, 0);
__ beq(CCR0, no_mdo);
// Increment invocation counter in the MDO.
const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
__ lwz(Rscratch2, mdo_ic_offs, Rmdo);
// Increment backedge counter in the MDO.
const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
__ lwz(Rscratch2, mdo_bc_offs, Rmdo);
__ addi(Rscratch2, Rscratch2, increment);
__ stw(Rscratch2, mdo_ic_offs, Rmdo);
__ stw(Rscratch2, mdo_bc_offs, Rmdo);
__ load_const_optimized(Rscratch1, mask, R0);
__ and_(Rscratch1, Rscratch2, Rscratch1);
__ bne(CCR0, done);
@ -276,12 +276,12 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
}
// Increment counter in MethodCounters*.
const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
__ bind(no_mdo);
__ get_method_counters(R19_method, R3_counters, done);
__ lwz(Rscratch2, mo_ic_offs, R3_counters);
__ lwz(Rscratch2, mo_bc_offs, R3_counters);
__ addi(Rscratch2, Rscratch2, increment);
__ stw(Rscratch2, mo_ic_offs, R3_counters);
__ stw(Rscratch2, mo_bc_offs, R3_counters);
__ load_const_optimized(Rscratch1, mask, R0);
__ and_(Rscratch1, Rscratch2, Rscratch1);
__ beq(CCR0, *overflow);
@ -611,12 +611,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
// For others we can use a normal (native) entry.
inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
// Provide math entry with debugging on demand.
// Note: Debugging changes which code will get executed:
// Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
// Not debugging and enabled InlineIntrinics: processor instruction will get used.
// Result might differ slightly due to rounding etc.
if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
if (!InlineIntrinsics) return false;
return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
(kind==Interpreter::java_lang_math_abs));
@ -628,15 +623,8 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
return Interpreter::entry_for_kind(Interpreter::zerolocals);
}
Label Lslow_path;
const Register Rjvmti_mode = R11_scratch1;
address entry = __ pc();
// Provide math entry with debugging on demand.
__ lwz(Rjvmti_mode, thread_(interp_only_mode));
__ cmpwi(CCR0, Rjvmti_mode, 0);
__ bne(CCR0, Lslow_path); // jvmti_mode!=0
__ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
// Pop c2i arguments (if any) off when we return.
@ -659,9 +647,6 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
// And we're done.
__ blr();
// Provide slow path for JVMTI case.
__ bind(Lslow_path);
__ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
__ flush();
return entry;

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013, 2014 SAP AG. All rights reserved.
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@
// Run with +PrintInterpreter to get the VM to print out the size.
// Max size with JVMTI
const static int InterpreterCodeSize = 210*K;
const static int InterpreterCodeSize = 230*K;
#endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -375,23 +375,22 @@ void TemplateTable::fast_aldc(bool wide) {
int index_size = wide ? sizeof(u2) : sizeof(u1);
const Register Rscratch = R11_scratch1;
Label resolved;
Label is_null;
// We are resolved if the resolved reference cache entry contains a
// non-null object (CallSite, etc.)
__ get_cache_index_at_bcp(Rscratch, 1, index_size); // Load index.
__ load_resolved_reference_at_index(R17_tos, Rscratch);
__ cmpdi(CCR0, R17_tos, 0);
__ bne(CCR0, resolved);
__ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null);
__ verify_oop(R17_tos);
__ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
__ bind(is_null);
__ load_const_optimized(R3_ARG1, (int)bytecode());
address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
// First time invocation - must resolve first.
__ call_VM(R17_tos, entry, R3_ARG1);
__ align(32, 12);
__ bind(resolved);
__ verify_oop(R17_tos);
}
@ -3795,9 +3794,9 @@ void TemplateTable::instanceof() {
transition(atos, itos);
Label Ldone, Lis_null, Lquicked, Lresolved;
Register Roffset = R5_ARG3,
Register Roffset = R6_ARG4,
RobjKlass = R4_ARG2,
RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
RspecifiedKlass = R5_ARG3,
Rcpool = R11_scratch1,
Rtags = R12_scratch2;

View File

@ -32,12 +32,13 @@
#include "runtime/os.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "utilities/defaultStream.hpp"
#include "utilities/globalDefinitions.hpp"
#include "vm_version_ppc.hpp"
# include <sys/sysinfo.h>
int VM_Version::_features = VM_Version::unknown_m;
int VM_Version::_measured_cache_line_size = 128; // default value
int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
const char* VM_Version::_features_str = "";
bool VM_Version::_is_determine_features_test_running = false;
@ -55,7 +56,9 @@ void VM_Version::initialize() {
// If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
if (VM_Version::has_popcntw()) {
if (VM_Version::has_lqarx()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
} else if (VM_Version::has_popcntw()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
} else if (VM_Version::has_cmpb()) {
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
@ -66,8 +69,14 @@ void VM_Version::initialize() {
}
}
guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
"PowerArchitecturePPC64 should be 0, 5, 6 or 7");
PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
PowerArchitecturePPC64 == 8,
"PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
// Power 8: Configure Data Stream Control Register.
if (PowerArchitecturePPC64 >= 8) {
config_dscr();
}
if (!UseSIGTRAP) {
MSG(TrapBasedICMissChecks);
@ -97,7 +106,7 @@ void VM_Version::initialize() {
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@ -106,11 +115,17 @@ void VM_Version::initialize() {
(has_popcntb() ? " popcntb" : ""),
(has_popcntw() ? " popcntw" : ""),
(has_fcfids() ? " fcfids" : ""),
(has_vand() ? " vand" : "")
(has_vand() ? " vand" : ""),
(has_lqarx() ? " lqarx" : ""),
(has_vcipher() ? " vcipher" : ""),
(has_vpmsumb() ? " vpmsumb" : ""),
(has_tcheck() ? " tcheck" : "")
// Make sure number of %s matches num_features!
);
_features_str = os::strdup(buf);
NOT_PRODUCT(if (Verbose) print_features(););
if (Verbose) {
print_features();
}
// PPC64 supports 8-byte compare-exchange operations (see
// Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
@ -171,6 +186,58 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
// Adjust RTM (Restricted Transactional Memory) flags.
if (!has_tcheck() && UseRTMLocking) {
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
// setting during arguments processing. See use_biased_locking().
// VM_Version_init() is executed after UseBiasedLocking is used
// in Thread::allocate().
vm_exit_during_initialization("RTM instructions are not available on this CPU");
}
if (UseRTMLocking) {
#if INCLUDE_RTM_OPT
if (!UnlockExperimentalVMOptions) {
vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
"It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
} else {
warning("UseRTMLocking is only available as experimental option on this platform.");
}
if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
// RTM locking should be used only for applications with
// high lock contention. For now we do not use it by default.
vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
}
if (!is_power_of_2(RTMTotalCountIncrRate)) {
warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
}
if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
FLAG_SET_DEFAULT(RTMAbortRatio, 50);
}
FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM.
guarantee(RTMSpinLoopCount > 0, "unsupported");
#else
// Only C2 does RTM locking optimization.
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
// setting during arguments processing. See use_biased_locking().
vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
#endif
} else { // !UseRTMLocking
if (UseRTMForStackLocks) {
if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
}
FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
}
if (UseRTMDeopt) {
FLAG_SET_DEFAULT(UseRTMDeopt, false);
}
if (PrintPreciseRTMLockingStatistics) {
FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
}
}
// This machine does not allow unaligned memory accesses
if (UseUnalignedAccesses) {
@ -180,6 +247,27 @@ void VM_Version::initialize() {
}
}
bool VM_Version::use_biased_locking() {
#if INCLUDE_RTM_OPT
// RTM locking is most useful when there is high lock contention and
// low data contention. With high lock contention the lock is usually
// inflated and biased locking is not suitable for that case.
// RTM locking code requires that biased locking is off.
// Note: we can't switch off UseBiasedLocking in get_processor_features()
// because it is used by Thread::allocate() which is called before
// VM_Version::initialize().
if (UseRTMLocking && UseBiasedLocking) {
if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
FLAG_SET_DEFAULT(UseBiasedLocking, false);
} else {
warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
UseBiasedLocking = false;
}
}
#endif
return UseBiasedLocking;
}
void VM_Version::print_features() {
tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
}
@ -443,16 +531,19 @@ void VM_Version::determine_features() {
// Don't use R0 in ldarx.
// Keep R3_ARG1 unmodified, it contains &field (see below).
// Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
a->fsqrts(F3, F4); // code[1] -> fsqrts_m
a->isel(R7, R5, R6, 0); // code[2] -> isel_m
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
a->cmpb(R7, R5, R6); // code[4] -> bcmp
//a->mftgpr(R7, F3); // code[5] -> mftgpr
a->popcntb(R7, R5); // code[6] -> popcntb
a->popcntw(R7, R5); // code[7] -> popcntw
a->fcfids(F3, F4); // code[8] -> fcfids
a->vand(VR0, VR0, VR0); // code[9] -> vand
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
a->fsqrts(F3, F4); // code[1] -> fsqrts_m
a->isel(R7, R5, R6, 0); // code[2] -> isel_m
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
a->cmpb(R7, R5, R6); // code[4] -> cmpb
a->popcntb(R7, R5); // code[5] -> popcntb
a->popcntw(R7, R5); // code[6] -> popcntw
a->fcfids(F3, F4); // code[7] -> fcfids
a->vand(VR0, VR0, VR0); // code[8] -> vand
a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m
a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher
a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb
a->tcheck(0); // code[12] -> tcheck
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -491,11 +582,14 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= isel_m;
if (code[feature_cntr++]) features |= lxarxeh_m;
if (code[feature_cntr++]) features |= cmpb_m;
//if(code[feature_cntr++])features |= mftgpr_m;
if (code[feature_cntr++]) features |= popcntb_m;
if (code[feature_cntr++]) features |= popcntw_m;
if (code[feature_cntr++]) features |= fcfids_m;
if (code[feature_cntr++]) features |= vand_m;
if (code[feature_cntr++]) features |= lqarx_m;
if (code[feature_cntr++]) features |= vcipher_m;
if (code[feature_cntr++]) features |= vpmsumb_m;
if (code[feature_cntr++]) features |= tcheck_m;
// Print the detection code.
if (PrintAssembly) {
@ -507,6 +601,69 @@ void VM_Version::determine_features() {
_features = features;
}
// Power 8: Configure Data Stream Control Register.
void VM_Version::config_dscr() {
assert(has_tcheck(), "Only execute on Power 8 or later!");
// 7 InstWords for each call (function descriptor + blr instruction).
const int code_size = (2+2*7)*BytesPerInstWord;
// Allocate space for the code.
ResourceMark rm;
CodeBuffer cb("config_dscr", code_size, 0);
MacroAssembler* a = new MacroAssembler(&cb);
// Emit code.
uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
uint32_t *code = (uint32_t *)a->pc();
a->mfdscr(R3);
a->blr();
void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
a->mtdscr(R3);
a->blr();
uint32_t *code_end = (uint32_t *)a->pc();
a->flush();
// Print the detection code.
if (PrintAssembly) {
ttyLocker ttyl;
tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
}
// Apply the configuration if needed.
uint64_t dscr_val = (*get_dscr)();
if (Verbose) {
tty->print_cr("dscr value was 0x%lx" , dscr_val);
}
bool change_requested = false;
if (DSCR_PPC64 != (uintx)-1) {
dscr_val = DSCR_PPC64;
change_requested = true;
}
if (DSCR_DPFD_PPC64 <= 7) {
uint64_t mask = 0x7;
if ((dscr_val & mask) != DSCR_DPFD_PPC64) {
dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64);
change_requested = true;
}
}
if (DSCR_URG_PPC64 <= 7) {
uint64_t mask = 0x7 << 6;
if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
change_requested = true;
}
}
if (change_requested) {
(*set_dscr)(dscr_val);
if (Verbose) {
tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
}
}
}
static int saved_features = 0;

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -41,7 +41,10 @@ protected:
popcntw,
fcfids,
vand,
dcba,
lqarx,
vcipher,
vpmsumb,
tcheck,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@ -55,7 +58,10 @@ protected:
popcntw_m = (1 << popcntw),
fcfids_m = (1 << fcfids ),
vand_m = (1 << vand ),
dcba_m = (1 << dcba ),
lqarx_m = (1 << lqarx ),
vcipher_m = (1 << vcipher),
vpmsumb_m = (1 << vpmsumb),
tcheck_m = (1 << tcheck ),
all_features_m = -1
};
static int _features;
@ -65,12 +71,16 @@ protected:
static void print_features();
static void determine_features(); // also measures cache line size
static void config_dscr(); // Power 8: Configure Data Stream Control Register.
static void determine_section_size();
static void power6_micro_bench();
public:
// Initialization
static void initialize();
// Override Abstract_VM_Version implementation
static bool use_biased_locking();
static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
// CPU instruction support
static bool has_fsqrt() { return (_features & fsqrt_m) != 0; }
@ -82,7 +92,10 @@ public:
static bool has_popcntw() { return (_features & popcntw_m) != 0; }
static bool has_fcfids() { return (_features & fcfids_m) != 0; }
static bool has_vand() { return (_features & vand_m) != 0; }
static bool has_dcba() { return (_features & dcba_m) != 0; }
static bool has_lqarx() { return (_features & lqarx_m) != 0; }
static bool has_vcipher() { return (_features & vcipher_m) != 0; }
static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
static bool has_tcheck() { return (_features & tcheck_m) != 0; }
static const char* cpu_features() { return _features_str; }

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2014 SAP AG. All rights reserved.
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,7 +24,6 @@
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/vtableStubs.hpp"
#include "interp_masm_ppc_64.hpp"