mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-07 00:48:38 +00:00
8077838: Recent developments for ppc
Power 8 recognition and instructions, math.*Exact intrinsics and rtm, C2 optimizations Reviewed-by: kvn, simonis
This commit is contained in:
parent
dc67bb0a0e
commit
5827e6ce0f
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -85,8 +85,7 @@ int Assembler::branch_destination(int inst, int pos) {
|
||||
}
|
||||
|
||||
// Low-level andi-one-instruction-macro.
|
||||
void Assembler::andi(Register a, Register s, const int ui16) {
|
||||
assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
|
||||
void Assembler::andi(Register a, Register s, const long ui16) {
|
||||
if (is_power_of_2_long(((jlong) ui16)+1)) {
|
||||
// pow2minus1
|
||||
clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
|
||||
@ -97,6 +96,7 @@ void Assembler::andi(Register a, Register s, const int ui16) {
|
||||
// negpow2
|
||||
clrrdi(a, s, log2_long((jlong)-ui16));
|
||||
} else {
|
||||
assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
|
||||
andi_(a, s, ui16);
|
||||
}
|
||||
}
|
||||
@ -356,7 +356,6 @@ void Assembler::load_const(Register d, long x, Register tmp) {
|
||||
// 16 bit immediate offset.
|
||||
int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
|
||||
// Avoid accidentally trying to use R0 for indexed addressing.
|
||||
assert(d != R0, "R0 not allowed");
|
||||
assert_different_registers(d, tmp);
|
||||
|
||||
short xa, xb, xc, xd; // Four 16-bit chunks of const.
|
||||
@ -370,6 +369,58 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
|
||||
return 0;
|
||||
}
|
||||
|
||||
int retval = 0;
|
||||
if (return_simm16_rest) {
|
||||
retval = xd;
|
||||
x = rem << 16;
|
||||
xd = 0;
|
||||
}
|
||||
|
||||
if (d == R0) { // Can't use addi.
|
||||
if (is_simm(x, 32)) { // opt 2: simm32
|
||||
lis(d, x >> 16);
|
||||
if (xd) ori(d, d, (unsigned short)xd);
|
||||
} else {
|
||||
// 64-bit value: x = xa xb xc xd
|
||||
xa = (x >> 48) & 0xffff;
|
||||
xb = (x >> 32) & 0xffff;
|
||||
xc = (x >> 16) & 0xffff;
|
||||
bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
|
||||
if (tmp == noreg || (xc == 0 && xd == 0)) {
|
||||
if (xa_loaded) {
|
||||
lis(d, xa);
|
||||
if (xb) { ori(d, d, (unsigned short)xb); }
|
||||
} else {
|
||||
li(d, xb);
|
||||
}
|
||||
sldi(d, d, 32);
|
||||
if (xc) { oris(d, d, (unsigned short)xc); }
|
||||
if (xd) { ori( d, d, (unsigned short)xd); }
|
||||
} else {
|
||||
// Exploit instruction level parallelism if we have a tmp register.
|
||||
bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
|
||||
if (xa_loaded) {
|
||||
lis(tmp, xa);
|
||||
}
|
||||
if (xc_loaded) {
|
||||
lis(d, xc);
|
||||
}
|
||||
if (xa_loaded) {
|
||||
if (xb) { ori(tmp, tmp, (unsigned short)xb); }
|
||||
} else {
|
||||
li(tmp, xb);
|
||||
}
|
||||
if (xc_loaded) {
|
||||
if (xd) { ori(d, d, (unsigned short)xd); }
|
||||
} else {
|
||||
li(d, xd);
|
||||
}
|
||||
insrdi(d, tmp, 32, 0);
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
xc = rem & 0xFFFF; // Next 16-bit chunk.
|
||||
rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
|
||||
|
||||
@ -377,28 +428,27 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
|
||||
lis(d, xc);
|
||||
} else { // High 32 bits needed.
|
||||
|
||||
if (tmp != noreg) { // opt 3: We have a temp reg.
|
||||
if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg.
|
||||
// No carry propagation between xc and higher chunks here (use logical instructions).
|
||||
xa = (x >> 48) & 0xffff;
|
||||
xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
|
||||
bool load_xa = (xa != 0) || (xb < 0);
|
||||
bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
|
||||
bool return_xd = false;
|
||||
|
||||
if (load_xa) { lis(tmp, xa); }
|
||||
if (xa_loaded) { lis(tmp, xa); }
|
||||
if (xc) { lis(d, xc); }
|
||||
if (load_xa) {
|
||||
if (xa_loaded) {
|
||||
if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
|
||||
} else {
|
||||
li(tmp, xb); // non-negative
|
||||
li(tmp, xb);
|
||||
}
|
||||
if (xc) {
|
||||
if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
|
||||
else if (xd) { addi(d, d, xd); }
|
||||
if (xd) { addi(d, d, xd); }
|
||||
} else {
|
||||
li(d, xd);
|
||||
}
|
||||
insrdi(d, tmp, 32, 0);
|
||||
return return_xd ? xd : 0; // non-negative
|
||||
return retval;
|
||||
}
|
||||
|
||||
xb = rem & 0xFFFF; // Next 16-bit chunk.
|
||||
@ -417,11 +467,51 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
|
||||
if (xc) { addis(d, d, xc); }
|
||||
}
|
||||
|
||||
// opt 5: Return offset to be inserted into following instruction.
|
||||
if (return_simm16_rest) return xd;
|
||||
|
||||
if (xd) { addi(d, d, xd); }
|
||||
return 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
// We emit only one addition to s to optimize latency.
|
||||
int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
|
||||
assert(s != R0 && s != tmp, "unsupported");
|
||||
long rem = x;
|
||||
|
||||
// Case 1: Can use mr or addi.
|
||||
short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
|
||||
rem = (rem >> 16) + ((unsigned short)xd >> 15);
|
||||
if (rem == 0) {
|
||||
if (xd == 0) {
|
||||
if (d != s) { mr(d, s); }
|
||||
return 0;
|
||||
}
|
||||
if (return_simm16_rest) {
|
||||
return xd;
|
||||
}
|
||||
addi(d, s, xd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Case 2: Can use addis.
|
||||
if (xd == 0) {
|
||||
short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
|
||||
rem = (rem >> 16) + ((unsigned short)xd >> 15);
|
||||
if (rem == 0) {
|
||||
addis(d, s, xc);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Other cases: load & add.
|
||||
Register tmp1 = tmp,
|
||||
tmp2 = noreg;
|
||||
if ((d != tmp) && (d != s)) {
|
||||
// Can use d.
|
||||
tmp1 = d;
|
||||
tmp2 = tmp;
|
||||
}
|
||||
int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
|
||||
add(d, tmp1, s);
|
||||
return simm16_rest;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
||||
@ -224,10 +224,13 @@ class Assembler : public AbstractAssembler {
|
||||
ADDIS_OPCODE = (15u << OPCODE_SHIFT),
|
||||
ADDIC__OPCODE = (13u << OPCODE_SHIFT),
|
||||
ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1),
|
||||
ADDME_OPCODE = (31u << OPCODE_SHIFT | 234u << 1),
|
||||
ADDZE_OPCODE = (31u << OPCODE_SHIFT | 202u << 1),
|
||||
SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1),
|
||||
SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1),
|
||||
SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1),
|
||||
SUBFIC_OPCODE = (8u << OPCODE_SHIFT),
|
||||
SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1),
|
||||
SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
|
||||
DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1),
|
||||
MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1),
|
||||
@ -657,6 +660,9 @@ class Assembler : public AbstractAssembler {
|
||||
SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1),
|
||||
EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1),
|
||||
|
||||
// Wait instructions for polling.
|
||||
WAIT_OPCODE = (31u << OPCODE_SHIFT | 62u << 1),
|
||||
|
||||
// Trap instructions
|
||||
TDI_OPCODE = (2u << OPCODE_SHIFT),
|
||||
TWI_OPCODE = (3u << OPCODE_SHIFT),
|
||||
@ -666,8 +672,10 @@ class Assembler : public AbstractAssembler {
|
||||
// Atomics.
|
||||
LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1),
|
||||
LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1),
|
||||
LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1),
|
||||
STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1),
|
||||
STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1)
|
||||
STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1),
|
||||
STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1)
|
||||
|
||||
};
|
||||
|
||||
@ -1171,6 +1179,14 @@ class Assembler : public AbstractAssembler {
|
||||
inline void adde_( Register d, Register a, Register b);
|
||||
inline void subfe( Register d, Register a, Register b);
|
||||
inline void subfe_( Register d, Register a, Register b);
|
||||
inline void addme( Register d, Register a);
|
||||
inline void addme_( Register d, Register a);
|
||||
inline void subfme( Register d, Register a);
|
||||
inline void subfme_(Register d, Register a);
|
||||
inline void addze( Register d, Register a);
|
||||
inline void addze_( Register d, Register a);
|
||||
inline void subfze( Register d, Register a);
|
||||
inline void subfze_(Register d, Register a);
|
||||
inline void neg( Register d, Register a);
|
||||
inline void neg_( Register d, Register a);
|
||||
inline void mulli( Register d, Register a, int si16);
|
||||
@ -1189,6 +1205,38 @@ class Assembler : public AbstractAssembler {
|
||||
inline void divw( Register d, Register a, Register b);
|
||||
inline void divw_( Register d, Register a, Register b);
|
||||
|
||||
// Fixed-Point Arithmetic Instructions with Overflow detection
|
||||
inline void addo( Register d, Register a, Register b);
|
||||
inline void addo_( Register d, Register a, Register b);
|
||||
inline void subfo( Register d, Register a, Register b);
|
||||
inline void subfo_( Register d, Register a, Register b);
|
||||
inline void addco( Register d, Register a, Register b);
|
||||
inline void addco_( Register d, Register a, Register b);
|
||||
inline void subfco( Register d, Register a, Register b);
|
||||
inline void subfco_( Register d, Register a, Register b);
|
||||
inline void addeo( Register d, Register a, Register b);
|
||||
inline void addeo_( Register d, Register a, Register b);
|
||||
inline void subfeo( Register d, Register a, Register b);
|
||||
inline void subfeo_( Register d, Register a, Register b);
|
||||
inline void addmeo( Register d, Register a);
|
||||
inline void addmeo_( Register d, Register a);
|
||||
inline void subfmeo( Register d, Register a);
|
||||
inline void subfmeo_(Register d, Register a);
|
||||
inline void addzeo( Register d, Register a);
|
||||
inline void addzeo_( Register d, Register a);
|
||||
inline void subfzeo( Register d, Register a);
|
||||
inline void subfzeo_(Register d, Register a);
|
||||
inline void nego( Register d, Register a);
|
||||
inline void nego_( Register d, Register a);
|
||||
inline void mulldo( Register d, Register a, Register b);
|
||||
inline void mulldo_( Register d, Register a, Register b);
|
||||
inline void mullwo( Register d, Register a, Register b);
|
||||
inline void mullwo_( Register d, Register a, Register b);
|
||||
inline void divdo( Register d, Register a, Register b);
|
||||
inline void divdo_( Register d, Register a, Register b);
|
||||
inline void divwo( Register d, Register a, Register b);
|
||||
inline void divwo_( Register d, Register a, Register b);
|
||||
|
||||
// extended mnemonics
|
||||
inline void li( Register d, int si16);
|
||||
inline void lis( Register d, int si16);
|
||||
@ -1303,7 +1351,7 @@ class Assembler : public AbstractAssembler {
|
||||
inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
|
||||
|
||||
// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
|
||||
void andi( Register a, Register s, int ui16); // optimized version
|
||||
void andi( Register a, Register s, long ui16); // optimized version
|
||||
inline void andi_( Register a, Register s, int ui16);
|
||||
inline void andis_( Register a, Register s, int ui16);
|
||||
inline void ori( Register a, Register s, int ui16);
|
||||
@ -1688,14 +1736,21 @@ class Assembler : public AbstractAssembler {
|
||||
inline void isync();
|
||||
inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
|
||||
|
||||
// Wait instructions for polling. Attention: May result in SIGILL.
|
||||
inline void wait();
|
||||
inline void waitrsv(); // >=Power7
|
||||
|
||||
// atomics
|
||||
inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
|
||||
inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
|
||||
inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
|
||||
inline bool lxarx_hint_exclusive_access();
|
||||
inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
|
||||
inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
|
||||
inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
|
||||
inline void stwcx_( Register s, Register a, Register b);
|
||||
inline void stdcx_( Register s, Register a, Register b);
|
||||
inline void stqcx_( Register s, Register a, Register b);
|
||||
|
||||
// Instructions for adjusting thread priority for simultaneous
|
||||
// multithreading (SMT) on Power5.
|
||||
@ -2054,10 +2109,13 @@ class Assembler : public AbstractAssembler {
|
||||
// Atomics: use ra0mem to disallow R0 as base.
|
||||
inline void lwarx_unchecked(Register d, Register b, int eh1);
|
||||
inline void ldarx_unchecked(Register d, Register b, int eh1);
|
||||
inline void lqarx_unchecked(Register d, Register b, int eh1);
|
||||
inline void lwarx( Register d, Register b, bool hint_exclusive_access);
|
||||
inline void ldarx( Register d, Register b, bool hint_exclusive_access);
|
||||
inline void lqarx( Register d, Register b, bool hint_exclusive_access);
|
||||
inline void stwcx_(Register s, Register b);
|
||||
inline void stdcx_(Register s, Register b);
|
||||
inline void stqcx_(Register s, Register b);
|
||||
inline void lfs( FloatRegister d, int si16);
|
||||
inline void lfsx( FloatRegister d, Register b);
|
||||
inline void lfd( FloatRegister d, int si16);
|
||||
@ -2120,6 +2178,20 @@ class Assembler : public AbstractAssembler {
|
||||
return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
|
||||
}
|
||||
|
||||
// If return_simm16_rest, the return value needs to get added afterwards.
|
||||
int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false);
|
||||
inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
|
||||
return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
|
||||
}
|
||||
|
||||
// If return_simm16_rest, the return value needs to get added afterwards.
|
||||
inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) {
|
||||
return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
|
||||
}
|
||||
inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
|
||||
return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
|
||||
}
|
||||
|
||||
// Creation
|
||||
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
||||
#ifdef CHECK_DELAY
|
||||
|
||||
@ -100,6 +100,14 @@ inline void Assembler::adde( Register d, Register a, Register b) { emit_int32(
|
||||
inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
|
||||
inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
|
||||
inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
|
||||
inline void Assembler::addme( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
|
||||
inline void Assembler::addme_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
|
||||
inline void Assembler::subfme( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
|
||||
inline void Assembler::subfme_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
|
||||
inline void Assembler::addze( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
|
||||
inline void Assembler::addze_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
|
||||
inline void Assembler::subfze( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
|
||||
inline void Assembler::subfze_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
|
||||
inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
|
||||
inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
|
||||
inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
|
||||
@ -118,6 +126,38 @@ inline void Assembler::divd_( Register d, Register a, Register b) { emit_int32(
|
||||
inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
|
||||
inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
|
||||
|
||||
// Fixed-Point Arithmetic Instructions with Overflow detection
|
||||
inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::subfo( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::subfo_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::addco( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::addco_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::subfco( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::addeo( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::addeo_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::subfeo( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::addmeo( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
|
||||
inline void Assembler::addmeo_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
|
||||
inline void Assembler::subfmeo( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
|
||||
inline void Assembler::subfmeo_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
|
||||
inline void Assembler::addzeo( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
|
||||
inline void Assembler::addzeo_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
|
||||
inline void Assembler::subfzeo( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
|
||||
inline void Assembler::subfzeo_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
|
||||
inline void Assembler::nego( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); }
|
||||
inline void Assembler::nego_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); }
|
||||
inline void Assembler::mulldo( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::mullwo( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::divdo( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::divdo_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
inline void Assembler::divwo( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
|
||||
inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
|
||||
|
||||
// extended mnemonics
|
||||
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
|
||||
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
|
||||
@ -540,15 +580,22 @@ inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); }
|
||||
inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); }
|
||||
inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
|
||||
|
||||
// Wait instructions for polling.
|
||||
inline void Assembler::wait() { emit_int32( WAIT_OPCODE); }
|
||||
inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7
|
||||
|
||||
// atomics
|
||||
// Use ra0mem to disallow R0 as base.
|
||||
inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
|
||||
inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
|
||||
inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
|
||||
inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); }
|
||||
inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
|
||||
inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
|
||||
inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
|
||||
inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
|
||||
inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
|
||||
inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
|
||||
|
||||
// Instructions for adjusting thread priority
|
||||
// for simultaneous multithreading (SMT) on POWER5.
|
||||
@ -873,10 +920,13 @@ inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCOD
|
||||
// ra0 version
|
||||
inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
|
||||
inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
|
||||
inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
|
||||
inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
|
||||
inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
|
||||
inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
|
||||
inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
|
||||
inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
|
||||
inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
|
||||
|
||||
// ra0 version
|
||||
inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -47,7 +47,7 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
|
||||
define_pd_global(intx, FLOATPRESSURE, 28);
|
||||
define_pd_global(intx, FreqInlineSize, 175);
|
||||
define_pd_global(intx, MinJumpTableSize, 10);
|
||||
define_pd_global(intx, INTPRESSURE, 25);
|
||||
define_pd_global(intx, INTPRESSURE, 26);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, RegisterCostAreaRatio, 16000);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -58,7 +58,7 @@ define_pd_global(bool, UseMembar, false);
|
||||
// GC Ergo Flags
|
||||
define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread.
|
||||
|
||||
define_pd_global(uintx, TypeProfileLevel, 0);
|
||||
define_pd_global(uintx, TypeProfileLevel, 111);
|
||||
|
||||
// Platform dependent flag handling: flags only defined on this platform.
|
||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
|
||||
@ -71,14 +71,26 @@ define_pd_global(uintx, TypeProfileLevel, 0);
|
||||
\
|
||||
product(uintx, PowerArchitecturePPC64, 0, \
|
||||
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
|
||||
"Power7. Default is 0. CPUs newer than Power7 will be " \
|
||||
"recognized as Power7.") \
|
||||
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
|
||||
\
|
||||
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
|
||||
/* indirect call by a direct call. */ \
|
||||
product(bool, ReoptimizeCallSequences, true, \
|
||||
"Reoptimize code-sequences of calls at runtime.") \
|
||||
\
|
||||
/* Power 8: Configure Data Stream Control Register. */ \
|
||||
product(uint64_t,DSCR_PPC64, (uintx)-1, \
|
||||
"Power8 or later: Specify encoded value for Data Stream Control " \
|
||||
"Register") \
|
||||
product(uint64_t,DSCR_DPFD_PPC64, 8, \
|
||||
"Power8 or later: DPFD (default prefetch depth) value of the " \
|
||||
"Data Stream Control Register." \
|
||||
" 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
|
||||
product(uint64_t,DSCR_URG_PPC64, 8, \
|
||||
"Power8 or later: URG (depth attainment urgency) value of the " \
|
||||
"Data Stream Control Register." \
|
||||
" 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \
|
||||
\
|
||||
product(bool, UseLoadInstructionsForStackBangingPPC64, false, \
|
||||
"Use load instructions for stack banging.") \
|
||||
\
|
||||
@ -121,6 +133,41 @@ define_pd_global(uintx, TypeProfileLevel, 0);
|
||||
\
|
||||
product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \
|
||||
" Use this to ease debugging.") \
|
||||
|
||||
\
|
||||
/* Use Restricted Transactional Memory for lock eliding */ \
|
||||
product(bool, UseRTMLocking, false, \
|
||||
"Enable RTM lock eliding for inflated locks in compiled code") \
|
||||
\
|
||||
experimental(bool, UseRTMForStackLocks, false, \
|
||||
"Enable RTM lock eliding for stack locks in compiled code") \
|
||||
\
|
||||
product(bool, UseRTMDeopt, false, \
|
||||
"Perform deopt and recompilation based on RTM abort ratio") \
|
||||
\
|
||||
product(uintx, RTMRetryCount, 5, \
|
||||
"Number of RTM retries on lock abort or busy") \
|
||||
\
|
||||
experimental(intx, RTMSpinLoopCount, 100, \
|
||||
"Spin count for lock to become free before RTM retry") \
|
||||
\
|
||||
experimental(intx, RTMAbortThreshold, 1000, \
|
||||
"Calculate abort ratio after this number of aborts") \
|
||||
\
|
||||
experimental(intx, RTMLockingThreshold, 10000, \
|
||||
"Lock count at which to do RTM lock eliding without " \
|
||||
"abort ratio calculation") \
|
||||
\
|
||||
experimental(intx, RTMAbortRatio, 50, \
|
||||
"Lock abort ratio at which to stop use RTM lock eliding") \
|
||||
\
|
||||
experimental(intx, RTMTotalCountIncrRate, 64, \
|
||||
"Increment total RTM attempted lock count once every n times") \
|
||||
\
|
||||
experimental(intx, RTMLockingCalculationDelay, 0, \
|
||||
"Number of milliseconds to wait before start calculating aborts " \
|
||||
"for RTM locking") \
|
||||
\
|
||||
experimental(bool, UseRTMXendForLockBusy, true, \
|
||||
"Use RTM Xend instead of Xabort when lock busy") \
|
||||
|
||||
#endif // CPU_PPC_VM_GLOBALS_PPC_HPP
|
||||
|
||||
@ -446,7 +446,7 @@ void InterpreterMacroAssembler::get_u4(Register Rdst, Register Rsrc, int offset,
|
||||
}
|
||||
|
||||
// Load object from cpool->resolved_references(index).
|
||||
void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
|
||||
void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) {
|
||||
assert_different_registers(result, index);
|
||||
get_constant_pool(result);
|
||||
|
||||
@ -469,7 +469,7 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result
|
||||
#endif
|
||||
// Add in the index.
|
||||
add(result, tmp, result);
|
||||
load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
|
||||
load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
|
||||
}
|
||||
|
||||
// Generate a subtype check: branch to ok_is_subtype if sub_klass is
|
||||
@ -876,7 +876,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
|
||||
// If condition is true we are done and hence we can store 0 in the displaced
|
||||
// header indicating it is a recursive lock.
|
||||
bne(CCR0, slow_case);
|
||||
release();
|
||||
std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
|
||||
BasicLock::displaced_header_offset_in_bytes(), monitor);
|
||||
b(done);
|
||||
@ -1861,7 +1860,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register
|
||||
const Register mdp = tmp1;
|
||||
add(mdp, tmp1, R28_mdx);
|
||||
|
||||
// Pffset of the current profile entry to update.
|
||||
// Offset of the current profile entry to update.
|
||||
const Register entry_offset = tmp2;
|
||||
// entry_offset = array len in number of cells
|
||||
ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -85,7 +85,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
|
||||
Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
|
||||
|
||||
// Load object from cpool->resolved_references(index).
|
||||
void load_resolved_reference_at_index(Register result, Register index);
|
||||
void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL);
|
||||
|
||||
void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
|
||||
void load_receiver(Register Rparam_count, Register Rrecv_dst);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -47,4 +47,4 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
|
||||
#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1455,7 +1455,7 @@ void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_valu
|
||||
// Several special cases exist to avoid that unnecessary information is generated.
|
||||
//
|
||||
void MacroAssembler::cmpxchgd(ConditionRegister flag,
|
||||
Register dest_current_value, Register compare_value, Register exchange_value,
|
||||
Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
|
||||
Register addr_base, int semantics, bool cmpxchgx_hint,
|
||||
Register int_flag_success, Label* failed_ext, bool contention_hint) {
|
||||
Label retry;
|
||||
@ -1465,7 +1465,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
|
||||
|
||||
// Save one branch if result is returned via register and result register is different from the other ones.
|
||||
bool use_result_reg = (int_flag_success!=noreg);
|
||||
bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
|
||||
bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
|
||||
int_flag_success!=exchange_value && int_flag_success!=addr_base);
|
||||
assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
|
||||
|
||||
@ -1481,7 +1481,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
|
||||
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
|
||||
if (contention_hint) { // Don't try to reserve if cmp fails.
|
||||
ld(dest_current_value, 0, addr_base);
|
||||
cmpd(flag, dest_current_value, compare_value);
|
||||
cmpd(flag, compare_value, dest_current_value);
|
||||
bne(flag, failed);
|
||||
}
|
||||
|
||||
@ -1489,7 +1489,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
|
||||
bind(retry);
|
||||
|
||||
ldarx(dest_current_value, addr_base, cmpxchgx_hint);
|
||||
cmpd(flag, dest_current_value, compare_value);
|
||||
cmpd(flag, compare_value, dest_current_value);
|
||||
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
|
||||
bne_predict_not_taken(flag, failed);
|
||||
} else {
|
||||
@ -1873,7 +1873,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
|
||||
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
||||
|
||||
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
|
||||
fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
|
||||
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
|
||||
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
|
||||
/*where=*/obj_reg,
|
||||
@ -1909,7 +1908,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
|
||||
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
||||
|
||||
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
|
||||
fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
|
||||
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
|
||||
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
|
||||
/*where=*/obj_reg,
|
||||
@ -1946,7 +1944,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
|
||||
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
||||
|
||||
// CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
|
||||
fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
|
||||
cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
|
||||
/*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
|
||||
/*where=*/obj_reg,
|
||||
@ -1987,9 +1984,371 @@ void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mar
|
||||
beq(cr_reg, done);
|
||||
}
|
||||
|
||||
// TM on PPC64.
|
||||
void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
|
||||
Label retry;
|
||||
bind(retry);
|
||||
ldarx(result, addr, /*hint*/ false);
|
||||
addi(result, result, simm16);
|
||||
stdcx_(result, addr);
|
||||
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
|
||||
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
|
||||
} else {
|
||||
bne( CCR0, retry); // stXcx_ sets CCR0
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
|
||||
Label retry;
|
||||
bind(retry);
|
||||
lwarx(result, addr, /*hint*/ false);
|
||||
ori(result, result, uimm16);
|
||||
stwcx_(result, addr);
|
||||
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
|
||||
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
|
||||
} else {
|
||||
bne( CCR0, retry); // stXcx_ sets CCR0
|
||||
}
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
|
||||
// Update rtm_counters based on abort status
|
||||
// input: abort_status
|
||||
// rtm_counters (RTMLockingCounters*)
|
||||
void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
|
||||
// Mapping to keep PreciseRTMLockingStatistics similar to x86.
|
||||
// x86 ppc (! means inverted, ? means not the same)
|
||||
// 0 31 Set if abort caused by XABORT instruction.
|
||||
// 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
|
||||
// 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
|
||||
// 3 10 Set if an internal buffer overflowed.
|
||||
// 4 ?12 Set if a debug breakpoint was hit.
|
||||
// 5 ?32 Set if an abort occurred during execution of a nested transaction.
|
||||
const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
|
||||
Assembler::tm_failure_persistent, // inverted: transient
|
||||
Assembler::tm_trans_cf,
|
||||
Assembler::tm_footprint_of,
|
||||
Assembler::tm_non_trans_cf,
|
||||
Assembler::tm_suspended};
|
||||
const bool tm_failure_inv[] = {false, true, false, false, false, false};
|
||||
assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
|
||||
|
||||
const Register addr_Reg = R0;
|
||||
// Keep track of offset to where rtm_counters_Reg had pointed to.
|
||||
int counters_offs = RTMLockingCounters::abort_count_offset();
|
||||
addi(addr_Reg, rtm_counters_Reg, counters_offs);
|
||||
const Register temp_Reg = rtm_counters_Reg;
|
||||
|
||||
//atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
|
||||
ldx(temp_Reg, addr_Reg);
|
||||
addi(temp_Reg, temp_Reg, 1);
|
||||
stdx(temp_Reg, addr_Reg);
|
||||
|
||||
if (PrintPreciseRTMLockingStatistics) {
|
||||
int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
|
||||
|
||||
//mftexasr(abort_status); done by caller
|
||||
for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
|
||||
counters_offs += counters_offs_delta;
|
||||
li(temp_Reg, counters_offs_delta); // can't use addi with R0
|
||||
add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
|
||||
counters_offs_delta = sizeof(uintx);
|
||||
|
||||
Label check_abort;
|
||||
rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
|
||||
if (tm_failure_inv[i]) {
|
||||
bne(CCR0, check_abort);
|
||||
} else {
|
||||
beq(CCR0, check_abort);
|
||||
}
|
||||
//atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
|
||||
ldx(temp_Reg, addr_Reg);
|
||||
addi(temp_Reg, temp_Reg, 1);
|
||||
stdx(temp_Reg, addr_Reg);
|
||||
bind(check_abort);
|
||||
}
|
||||
}
|
||||
li(temp_Reg, -counters_offs); // can't use addi with R0
|
||||
add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
|
||||
}
|
||||
|
||||
// Branch if (random & (count-1) != 0), count is 2^n
|
||||
// tmp and CR0 are killed
|
||||
void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
|
||||
mftb(tmp);
|
||||
andi_(tmp, tmp, count-1);
|
||||
bne(CCR0, brLabel);
|
||||
}
|
||||
|
||||
// Perform abort ratio calculation, set no_rtm bit if high ratio.
|
||||
// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
|
||||
void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data) {
|
||||
Label L_done, L_check_always_rtm1, L_check_always_rtm2;
|
||||
|
||||
if (RTMLockingCalculationDelay > 0) {
|
||||
// Delay calculation.
|
||||
ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
|
||||
cmpdi(CCR0, rtm_counters_Reg, 0);
|
||||
beq(CCR0, L_done);
|
||||
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
|
||||
}
|
||||
// Abort ratio calculation only if abort_count > RTMAbortThreshold.
|
||||
// Aborted transactions = abort_count * 100
|
||||
// All transactions = total_count * RTMTotalCountIncrRate
|
||||
// Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
|
||||
ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
|
||||
cmpdi(CCR0, R0, RTMAbortThreshold);
|
||||
blt(CCR0, L_check_always_rtm2);
|
||||
mulli(R0, R0, 100);
|
||||
|
||||
const Register tmpReg = rtm_counters_Reg;
|
||||
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
|
||||
mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
|
||||
mulli(tmpReg, tmpReg, RTMAbortRatio);
|
||||
cmpd(CCR0, R0, tmpReg);
|
||||
blt(CCR0, L_check_always_rtm1); // jump to reload
|
||||
if (method_data != NULL) {
|
||||
// Set rtm_state to "no rtm" in MDO.
|
||||
// Not using a metadata relocation. Method and Class Loader are kept alive anyway.
|
||||
// (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
|
||||
load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
|
||||
atomic_ori_int(R0, tmpReg, NoRTM);
|
||||
}
|
||||
b(L_done);
|
||||
|
||||
bind(L_check_always_rtm1);
|
||||
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
|
||||
bind(L_check_always_rtm2);
|
||||
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
|
||||
cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
|
||||
blt(CCR0, L_done);
|
||||
if (method_data != NULL) {
|
||||
// Set rtm_state to "always rtm" in MDO.
|
||||
// Not using a metadata relocation. See above.
|
||||
load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
|
||||
atomic_ori_int(R0, tmpReg, UseRTM);
|
||||
}
|
||||
bind(L_done);
|
||||
}
|
||||
|
||||
// Update counters and perform abort ratio calculation.
|
||||
// input: abort_status_Reg
|
||||
void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data,
|
||||
bool profile_rtm) {
|
||||
|
||||
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
|
||||
// Update rtm counters based on state at abort.
|
||||
// Reads abort_status_Reg, updates flags.
|
||||
assert_different_registers(abort_status_Reg, temp_Reg);
|
||||
load_const_optimized(temp_Reg, (address)rtm_counters, R0);
|
||||
rtm_counters_update(abort_status_Reg, temp_Reg);
|
||||
if (profile_rtm) {
|
||||
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
|
||||
rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
|
||||
}
|
||||
}
|
||||
|
||||
// Retry on abort if abort's status indicates non-persistent failure.
|
||||
// inputs: retry_count_Reg
|
||||
// : abort_status_Reg
|
||||
// output: retry_count_Reg decremented by 1
|
||||
void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
|
||||
Label& retryLabel, Label* checkRetry) {
|
||||
Label doneRetry;
|
||||
rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
|
||||
bne(CCR0, doneRetry);
|
||||
if (checkRetry) { bind(*checkRetry); }
|
||||
addic_(retry_count_Reg, retry_count_Reg, -1);
|
||||
blt(CCR0, doneRetry);
|
||||
smt_yield(); // Can't use wait(). No permission (SIGILL).
|
||||
b(retryLabel);
|
||||
bind(doneRetry);
|
||||
}
|
||||
|
||||
// Spin and retry if lock is busy.
|
||||
// inputs: box_Reg (monitor address)
|
||||
// : retry_count_Reg
|
||||
// output: retry_count_Reg decremented by 1
|
||||
// CTR is killed
|
||||
void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
|
||||
Label SpinLoop, doneRetry;
|
||||
addic_(retry_count_Reg, retry_count_Reg, -1);
|
||||
blt(CCR0, doneRetry);
|
||||
li(R0, RTMSpinLoopCount);
|
||||
mtctr(R0);
|
||||
|
||||
bind(SpinLoop);
|
||||
smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
|
||||
bdz(retryLabel);
|
||||
ld(R0, 0, owner_addr_Reg);
|
||||
cmpdi(CCR0, R0, 0);
|
||||
bne(CCR0, SpinLoop);
|
||||
b(retryLabel);
|
||||
|
||||
bind(doneRetry);
|
||||
}
|
||||
|
||||
// Use RTM for normal stack locks.
|
||||
// Input: objReg (object to lock)
|
||||
void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
|
||||
Register obj, Register mark_word, Register tmp,
|
||||
Register retry_on_abort_count_Reg,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL, Label& IsInflated) {
|
||||
assert(UseRTMForStackLocks, "why call this otherwise?");
|
||||
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
|
||||
Label L_rtm_retry, L_decrement_retry, L_on_abort;
|
||||
|
||||
if (RTMRetryCount > 0) {
|
||||
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
|
||||
bind(L_rtm_retry);
|
||||
}
|
||||
andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
|
||||
bne(CCR0, IsInflated);
|
||||
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
Label L_noincrement;
|
||||
if (RTMTotalCountIncrRate > 1) {
|
||||
branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
|
||||
}
|
||||
assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
|
||||
load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
|
||||
//atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
|
||||
ldx(mark_word, tmp);
|
||||
addi(mark_word, mark_word, 1);
|
||||
stdx(mark_word, tmp);
|
||||
bind(L_noincrement);
|
||||
}
|
||||
tbegin_();
|
||||
beq(CCR0, L_on_abort);
|
||||
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
|
||||
andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
|
||||
cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
|
||||
beq(flag, DONE_LABEL); // all done if unlocked
|
||||
|
||||
if (UseRTMXendForLockBusy) {
|
||||
tend_();
|
||||
b(L_decrement_retry);
|
||||
} else {
|
||||
tabort_();
|
||||
}
|
||||
bind(L_on_abort);
|
||||
const Register abort_status_Reg = tmp;
|
||||
mftexasr(abort_status_Reg);
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
|
||||
}
|
||||
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
|
||||
if (RTMRetryCount > 0) {
|
||||
// Retry on lock abort if abort status is not permanent.
|
||||
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
|
||||
} else {
|
||||
bind(L_decrement_retry);
|
||||
}
|
||||
}
|
||||
|
||||
// Use RTM for inflating locks
|
||||
// inputs: obj (object to lock)
|
||||
// mark_word (current header - KILLED)
|
||||
// boxReg (on-stack box address (displaced header location) - KILLED)
|
||||
void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
|
||||
Register obj, Register mark_word, Register boxReg,
|
||||
Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL) {
|
||||
assert(UseRTMLocking, "why call this otherwise?");
|
||||
Label L_rtm_retry, L_decrement_retry, L_on_abort;
|
||||
// Clean monitor_value bit to get valid pointer.
|
||||
int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
|
||||
|
||||
// Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
|
||||
std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
|
||||
const Register tmpReg = boxReg;
|
||||
const Register owner_addr_Reg = mark_word;
|
||||
addi(owner_addr_Reg, mark_word, owner_offset);
|
||||
|
||||
if (RTMRetryCount > 0) {
|
||||
load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
|
||||
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
|
||||
bind(L_rtm_retry);
|
||||
}
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
Label L_noincrement;
|
||||
if (RTMTotalCountIncrRate > 1) {
|
||||
branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
|
||||
}
|
||||
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
|
||||
load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
|
||||
//atomic_inc_ptr(R0, tmpReg); We don't increment atomically
|
||||
ldx(tmpReg, R0);
|
||||
addi(tmpReg, tmpReg, 1);
|
||||
stdx(tmpReg, R0);
|
||||
bind(L_noincrement);
|
||||
}
|
||||
tbegin_();
|
||||
beq(CCR0, L_on_abort);
|
||||
// We don't reload mark word. Will only be reset at safepoint.
|
||||
ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
|
||||
cmpdi(flag, R0, 0);
|
||||
beq(flag, DONE_LABEL);
|
||||
|
||||
if (UseRTMXendForLockBusy) {
|
||||
tend_();
|
||||
b(L_decrement_retry);
|
||||
} else {
|
||||
tabort_();
|
||||
}
|
||||
bind(L_on_abort);
|
||||
const Register abort_status_Reg = tmpReg;
|
||||
mftexasr(abort_status_Reg);
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
|
||||
// Restore owner_addr_Reg
|
||||
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
|
||||
#ifdef ASSERT
|
||||
andi_(R0, mark_word, markOopDesc::monitor_value);
|
||||
asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
|
||||
#endif
|
||||
addi(owner_addr_Reg, mark_word, owner_offset);
|
||||
}
|
||||
if (RTMRetryCount > 0) {
|
||||
// Retry on lock abort if abort status is not permanent.
|
||||
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
|
||||
}
|
||||
|
||||
// Appears unlocked - try to swing _owner from null to non-null.
|
||||
cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
|
||||
|
||||
if (RTMRetryCount > 0) {
|
||||
// success done else retry
|
||||
b(DONE_LABEL);
|
||||
bind(L_decrement_retry);
|
||||
// Spin and retry if lock is busy.
|
||||
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
|
||||
} else {
|
||||
bind(L_decrement_retry);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// "The box" is the space on the stack where we copy the object mark.
|
||||
void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register temp, Register displaced_header, Register current_header) {
|
||||
Register temp, Register displaced_header, Register current_header,
|
||||
bool try_bias,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data,
|
||||
bool use_rtm, bool profile_rtm) {
|
||||
assert_different_registers(oop, box, temp, displaced_header, current_header);
|
||||
assert(flag != CCR0, "bad condition register");
|
||||
Label cont;
|
||||
@ -2006,10 +2365,18 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
return;
|
||||
}
|
||||
|
||||
if (UseBiasedLocking) {
|
||||
if (try_bias) {
|
||||
biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
|
||||
stack_rtm_counters, method_data, profile_rtm,
|
||||
cont, object_has_monitor);
|
||||
}
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
@ -2066,14 +2433,22 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
//
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
// Use the same RTM locking code in 32- and 64-bit VM.
|
||||
if (use_rtm) {
|
||||
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
|
||||
rtm_counters, method_data, profile_rtm, cont);
|
||||
} else {
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
|
||||
li(displaced_header, 0);
|
||||
// CmpxchgX sets flag to cmpX(current, displaced).
|
||||
cmpxchgd(/*flag=*/flag,
|
||||
/*current_value=*/current_header,
|
||||
/*compare_value=*/displaced_header,
|
||||
/*compare_value=*/(intptr_t)0,
|
||||
/*exchange_value=*/R16_thread,
|
||||
/*where=*/temp,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
@ -2095,6 +2470,10 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
//asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
|
||||
// "monitor->OwnerIsThread shouldn't be 0", -1);
|
||||
# endif
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
} // use_rtm()
|
||||
#endif
|
||||
}
|
||||
|
||||
bind(cont);
|
||||
@ -2103,7 +2482,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
}
|
||||
|
||||
void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register temp, Register displaced_header, Register current_header) {
|
||||
Register temp, Register displaced_header, Register current_header,
|
||||
bool try_bias, bool use_rtm) {
|
||||
assert_different_registers(oop, box, temp, displaced_header, current_header);
|
||||
assert(flag != CCR0, "bad condition register");
|
||||
Label cont;
|
||||
@ -2115,10 +2495,24 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
return;
|
||||
}
|
||||
|
||||
if (UseBiasedLocking) {
|
||||
if (try_bias) {
|
||||
biased_locking_exit(flag, oop, current_header, cont);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
|
||||
Label L_regular_unlock;
|
||||
ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
|
||||
andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
|
||||
cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
|
||||
bne(flag, L_regular_unlock); // else RegularLock
|
||||
tend_(); // otherwise end...
|
||||
b(cont); // ... and we're done
|
||||
bind(L_regular_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find the lock address and load the displaced header from the stack.
|
||||
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
|
||||
@ -2129,13 +2523,12 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
|
||||
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
|
||||
andi(temp, current_header, markOopDesc::monitor_value);
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, object_has_monitor);
|
||||
andi_(R0, current_header, markOopDesc::monitor_value);
|
||||
bne(CCR0, object_has_monitor);
|
||||
}
|
||||
|
||||
|
||||
// Check if it is still a light weight lock, this is is true if we see
|
||||
// the stack address of the basicLock in the markOop of the object.
|
||||
// Cmpxchg sets flag to cmpd(current_header, box).
|
||||
@ -2158,6 +2551,20 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
bind(object_has_monitor);
|
||||
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
|
||||
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
|
||||
|
||||
// It's inflated.
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (use_rtm) {
|
||||
Label L_regular_inflated_unlock;
|
||||
// Clean monitor_value bit to get valid pointer
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, L_regular_inflated_unlock);
|
||||
tend_();
|
||||
b(cont);
|
||||
bind(L_regular_inflated_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
|
||||
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
|
||||
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
|
||||
@ -2441,6 +2848,8 @@ void MacroAssembler::get_vm_result(Register oop_result) {
|
||||
// oop_result
|
||||
// R16_thread->in_bytes(JavaThread::vm_result_offset())
|
||||
|
||||
verify_thread();
|
||||
|
||||
ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
|
||||
li(R0, 0);
|
||||
std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
|
||||
@ -2462,26 +2871,24 @@ void MacroAssembler::get_vm_result_2(Register metadata_result) {
|
||||
std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
|
||||
Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
|
||||
Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
|
||||
if (Universe::narrow_klass_base() != 0) {
|
||||
// Use dst as temp if it is free.
|
||||
load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
|
||||
sub(dst, current, R0);
|
||||
sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
|
||||
current = dst;
|
||||
}
|
||||
if (Universe::narrow_klass_shift() != 0) {
|
||||
srdi(dst, current, Universe::narrow_klass_shift());
|
||||
current = dst;
|
||||
}
|
||||
mr_if_needed(dst, current); // Move may be required.
|
||||
return current;
|
||||
}
|
||||
|
||||
void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
|
||||
if (UseCompressedClassPointers) {
|
||||
encode_klass_not_null(ck, klass);
|
||||
stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
|
||||
Register compressedKlass = encode_klass_not_null(ck, klass);
|
||||
stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
|
||||
} else {
|
||||
std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
|
||||
}
|
||||
@ -2514,8 +2921,7 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
|
||||
sldi(shifted_src, src, Universe::narrow_klass_shift());
|
||||
}
|
||||
if (Universe::narrow_klass_base() != 0) {
|
||||
load_const(R0, Universe::narrow_klass_base());
|
||||
add(dst, shifted_src, R0);
|
||||
add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,6 +27,7 @@
|
||||
#define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
|
||||
|
||||
#include "asm/assembler.hpp"
|
||||
#include "runtime/rtmLocking.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
// MacroAssembler extends Assembler by a few frequently used macros.
|
||||
@ -432,8 +433,8 @@ class MacroAssembler: public Assembler {
|
||||
int semantics, bool cmpxchgx_hint = false,
|
||||
Register int_flag_success = noreg, bool contention_hint = false);
|
||||
void cmpxchgd(ConditionRegister flag,
|
||||
Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
|
||||
int semantics, bool cmpxchgx_hint = false,
|
||||
Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
|
||||
Register addr_base, int semantics, bool cmpxchgx_hint = false,
|
||||
Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
|
||||
|
||||
// interface method calling
|
||||
@ -506,8 +507,42 @@ class MacroAssembler: public Assembler {
|
||||
// biased locking exit case failed.
|
||||
void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
|
||||
|
||||
void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
|
||||
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
|
||||
void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
|
||||
void atomic_ori_int(Register addr, Register result, int uimm16);
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
void rtm_counters_update(Register abort_status, Register rtm_counters);
|
||||
void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
|
||||
void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data);
|
||||
void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
|
||||
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
|
||||
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
|
||||
Label& retryLabel, Label* checkRetry = NULL);
|
||||
void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
|
||||
void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
|
||||
Register retry_on_abort_count,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL, Label& IsInflated);
|
||||
void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
|
||||
Register retry_on_busy_count, Register retry_on_abort_count,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL);
|
||||
#endif
|
||||
|
||||
void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
bool try_bias = UseBiasedLocking,
|
||||
RTMLockingCounters* rtm_counters = NULL,
|
||||
RTMLockingCounters* stack_rtm_counters = NULL,
|
||||
Metadata* method_data = NULL,
|
||||
bool use_rtm = false, bool profile_rtm = false);
|
||||
|
||||
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
bool try_bias = UseBiasedLocking, bool use_rtm = false);
|
||||
|
||||
// Support for serializing memory accesses between threads
|
||||
void serialize_memory(Register thread, Register tmp1, Register tmp2);
|
||||
@ -576,7 +611,7 @@ class MacroAssembler: public Assembler {
|
||||
Register tmp = noreg);
|
||||
|
||||
// Null allowed.
|
||||
inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
|
||||
inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
|
||||
|
||||
// Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
|
||||
// src == d allowed.
|
||||
@ -593,7 +628,7 @@ class MacroAssembler: public Assembler {
|
||||
void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
|
||||
static int instr_size_for_decode_klass_not_null();
|
||||
void decode_klass_not_null(Register dst, Register src = noreg);
|
||||
void encode_klass_not_null(Register dst, Register src = noreg);
|
||||
Register encode_klass_not_null(Register dst, Register src = noreg);
|
||||
|
||||
// Load common heap base into register.
|
||||
void reinit_heapbase(Register d, Register tmp = noreg);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -333,19 +333,29 @@ inline void MacroAssembler::store_heap_oop_not_null(Register d, RegisterOrConsta
|
||||
}
|
||||
}
|
||||
|
||||
inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
|
||||
inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) {
|
||||
if (UseCompressedOops) {
|
||||
lwz(d, offs, s1);
|
||||
decode_heap_oop(d);
|
||||
if (is_null != NULL) {
|
||||
cmpwi(CCR0, d, 0);
|
||||
beq(CCR0, *is_null);
|
||||
decode_heap_oop_not_null(d);
|
||||
} else {
|
||||
decode_heap_oop(d);
|
||||
}
|
||||
} else {
|
||||
ld(d, offs, s1);
|
||||
if (is_null != NULL) {
|
||||
cmpdi(CCR0, d, 0);
|
||||
beq(CCR0, *is_null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
|
||||
Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided.
|
||||
if (Universe::narrow_oop_base_overlaps()) {
|
||||
sub(d, current, R30);
|
||||
sub_const_optimized(d, current, Universe::narrow_oop_base(), R0);
|
||||
current = d;
|
||||
}
|
||||
if (Universe::narrow_oop_shift() != 0) {
|
||||
@ -358,7 +368,7 @@ inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register sr
|
||||
inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
|
||||
if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
|
||||
Universe::narrow_oop_shift() != 0) {
|
||||
mr(d, R30);
|
||||
load_const_optimized(d, Universe::narrow_oop_base(), R0);
|
||||
rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift());
|
||||
return d;
|
||||
}
|
||||
@ -369,7 +379,7 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register sr
|
||||
current = d;
|
||||
}
|
||||
if (Universe::narrow_oop_base() != NULL) {
|
||||
add(d, current, R30);
|
||||
add_const_optimized(d, current, Universe::narrow_oop_base(), R0);
|
||||
current = d;
|
||||
}
|
||||
return current; // Decoded oop is in this register.
|
||||
@ -377,11 +387,19 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register sr
|
||||
|
||||
inline void MacroAssembler::decode_heap_oop(Register d) {
|
||||
Label isNull;
|
||||
bool use_isel = false;
|
||||
if (Universe::narrow_oop_base() != NULL) {
|
||||
cmpwi(CCR0, d, 0);
|
||||
beq(CCR0, isNull);
|
||||
if (VM_Version::has_isel()) {
|
||||
use_isel = true;
|
||||
} else {
|
||||
beq(CCR0, isNull);
|
||||
}
|
||||
}
|
||||
decode_heap_oop_not_null(d);
|
||||
if (use_isel) {
|
||||
isel_0(d, CCR0, Assembler::equal);
|
||||
}
|
||||
bind(isNull);
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,9 +27,6 @@
|
||||
// These definitions are inlined into class MethodHandles.
|
||||
|
||||
// Adapters
|
||||
//static unsigned int adapter_code_size() {
|
||||
// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
|
||||
//}
|
||||
enum /* platform_dependent_constants */ {
|
||||
adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
|
||||
};
|
||||
@ -45,7 +42,9 @@ public:
|
||||
|
||||
static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
|
||||
Register temp_reg, Register temp2_reg) {
|
||||
Unimplemented();
|
||||
verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
|
||||
temp_reg, temp2_reg,
|
||||
"reference is a MH");
|
||||
}
|
||||
|
||||
static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,19 +23,10 @@
|
||||
*
|
||||
*/
|
||||
|
||||
// make sure the defines don't screw up the declarations later on in this file
|
||||
// Make sure the defines don't screw up the declarations later on in this file.
|
||||
#define DONT_USE_REGISTER_DEFINES
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "asm/register.hpp"
|
||||
#include "register_ppc.hpp"
|
||||
#ifdef TARGET_ARCH_MODEL_ppc_32
|
||||
# include "interp_masm_ppc_32.hpp"
|
||||
#endif
|
||||
#ifdef TARGET_ARCH_MODEL_ppc_64
|
||||
# include "interp_masm_ppc_64.hpp"
|
||||
#endif
|
||||
|
||||
REGISTER_DEFINITION(Register, noreg);
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2013 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,14 +25,12 @@
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "assembler_ppc.inline.hpp"
|
||||
#include "code/relocInfo.hpp"
|
||||
#include "nativeInst_ppc.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
|
||||
void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
|
||||
bool copy_back_to_oop_pool = true; // TODO: PPC port
|
||||
// The following comment is from the declaration of DataRelocation:
|
||||
//
|
||||
// "The "o" (displacement) argument is relevant only to split relocations
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -28,6 +28,7 @@
|
||||
#include "code/debugInfoRec.hpp"
|
||||
#include "code/icBuffer.hpp"
|
||||
#include "code/vtableStubs.hpp"
|
||||
#include "frame_ppc.hpp"
|
||||
#include "interpreter/interpreter.hpp"
|
||||
#include "interpreter/interp_masm.hpp"
|
||||
#include "oops/compiledICHolder.hpp"
|
||||
@ -194,8 +195,8 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
|
||||
RegisterSaver_LiveIntReg( R27 ),
|
||||
RegisterSaver_LiveIntReg( R28 ),
|
||||
RegisterSaver_LiveIntReg( R29 ),
|
||||
RegisterSaver_LiveIntReg( R31 ),
|
||||
RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register
|
||||
RegisterSaver_LiveIntReg( R30 ),
|
||||
RegisterSaver_LiveIntReg( R31 ), // must be the last register (see save/restore functions below)
|
||||
};
|
||||
|
||||
OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
|
||||
@ -229,29 +230,30 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
|
||||
|
||||
BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
|
||||
|
||||
// Save r30 in the last slot of the not yet pushed frame so that we
|
||||
// Save r31 in the last slot of the not yet pushed frame so that we
|
||||
// can use it as scratch reg.
|
||||
__ std(R30, -reg_size, R1_SP);
|
||||
__ std(R31, -reg_size, R1_SP);
|
||||
assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
|
||||
"consistency check");
|
||||
|
||||
// save the flags
|
||||
// Do the save_LR_CR by hand and adjust the return pc if requested.
|
||||
__ mfcr(R30);
|
||||
__ std(R30, _abi(cr), R1_SP);
|
||||
__ mfcr(R31);
|
||||
__ std(R31, _abi(cr), R1_SP);
|
||||
switch (return_pc_location) {
|
||||
case return_pc_is_lr: __ mflr(R30); break;
|
||||
case return_pc_is_r4: __ mr(R30, R4); break;
|
||||
case return_pc_is_lr: __ mflr(R31); break;
|
||||
case return_pc_is_r4: __ mr(R31, R4); break;
|
||||
case return_pc_is_thread_saved_exception_pc:
|
||||
__ ld(R30, thread_(saved_exception_pc)); break;
|
||||
__ ld(R31, thread_(saved_exception_pc)); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
if (return_pc_adjustment != 0)
|
||||
__ addi(R30, R30, return_pc_adjustment);
|
||||
__ std(R30, _abi(lr), R1_SP);
|
||||
if (return_pc_adjustment != 0) {
|
||||
__ addi(R31, R31, return_pc_adjustment);
|
||||
}
|
||||
__ std(R31, _abi(lr), R1_SP);
|
||||
|
||||
// push a new frame
|
||||
__ push_frame(frame_size_in_bytes, R30);
|
||||
__ push_frame(frame_size_in_bytes, R31);
|
||||
|
||||
// save all registers (ints and floats)
|
||||
offset = register_save_offset;
|
||||
@ -261,7 +263,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
|
||||
|
||||
switch (reg_type) {
|
||||
case RegisterSaver::int_reg: {
|
||||
if (reg_num != 30) { // We spilled R30 right at the beginning.
|
||||
if (reg_num != 31) { // We spilled R31 right at the beginning.
|
||||
__ std(as_Register(reg_num), offset, R1_SP);
|
||||
}
|
||||
break;
|
||||
@ -272,8 +274,8 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
|
||||
}
|
||||
case RegisterSaver::special_reg: {
|
||||
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
|
||||
__ mfctr(R30);
|
||||
__ std(R30, offset, R1_SP);
|
||||
__ mfctr(R31);
|
||||
__ std(R31, offset, R1_SP);
|
||||
} else {
|
||||
Unimplemented();
|
||||
}
|
||||
@ -321,7 +323,7 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
|
||||
|
||||
switch (reg_type) {
|
||||
case RegisterSaver::int_reg: {
|
||||
if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
|
||||
if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
|
||||
__ ld(as_Register(reg_num), offset, R1_SP);
|
||||
break;
|
||||
}
|
||||
@ -332,8 +334,8 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
|
||||
case RegisterSaver::special_reg: {
|
||||
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
|
||||
if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
|
||||
__ ld(R30, offset, R1_SP);
|
||||
__ mtctr(R30);
|
||||
__ ld(R31, offset, R1_SP);
|
||||
__ mtctr(R31);
|
||||
}
|
||||
} else {
|
||||
Unimplemented();
|
||||
@ -350,10 +352,10 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
|
||||
__ pop_frame();
|
||||
|
||||
// restore the flags
|
||||
__ restore_LR_CR(R30);
|
||||
__ restore_LR_CR(R31);
|
||||
|
||||
// restore scratch register's value
|
||||
__ ld(R30, -reg_size, R1_SP);
|
||||
__ ld(R31, -reg_size, R1_SP);
|
||||
|
||||
BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
|
||||
}
|
||||
@ -2021,6 +2023,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
|
||||
__ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
|
||||
frame_done_pc = (intptr_t)__ pc();
|
||||
|
||||
__ verify_thread();
|
||||
|
||||
// Native nmethod wrappers never take possesion of the oop arguments.
|
||||
// So the caller will gc the arguments.
|
||||
// The only thing we need an oopMap for is if the call is static.
|
||||
@ -2594,7 +2598,7 @@ int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals)
|
||||
}
|
||||
|
||||
uint SharedRuntime::out_preserve_stack_slots() {
|
||||
#ifdef COMPILER2
|
||||
#if defined(COMPILER1) || defined(COMPILER2)
|
||||
return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
|
||||
#else
|
||||
return 0;
|
||||
@ -2868,11 +2872,6 @@ void SharedRuntime::generate_deopt_blob() {
|
||||
__ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
|
||||
__ BIND(skip_restore_excp);
|
||||
|
||||
// reload narrro_oop_base
|
||||
if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
|
||||
__ load_const_optimized(R30, Universe::narrow_oop_base());
|
||||
}
|
||||
|
||||
__ pop_frame();
|
||||
|
||||
// stack: (deoptee, optional i2c, caller of deoptee, ...).
|
||||
|
||||
@ -261,9 +261,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// global toc register
|
||||
__ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
|
||||
|
||||
// Load narrow oop base.
|
||||
__ reinit_heapbase(R30, R11_scratch1);
|
||||
|
||||
// Remember the senderSP so we interpreter can pop c2i arguments off of the stack
|
||||
// when called via a c2i.
|
||||
|
||||
@ -418,6 +415,23 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// or native call stub. The pending exception in Thread is
|
||||
// converted into a Java-level exception.
|
||||
//
|
||||
// Read:
|
||||
//
|
||||
// LR: The pc the runtime library callee wants to return to.
|
||||
// Since the exception occurred in the callee, the return pc
|
||||
// from the point of view of Java is the exception pc.
|
||||
// thread: Needed for method handles.
|
||||
//
|
||||
// Invalidate:
|
||||
//
|
||||
// volatile registers (except below).
|
||||
//
|
||||
// Update:
|
||||
//
|
||||
// R4_ARG2: exception
|
||||
//
|
||||
// (LR is unchanged and is live out).
|
||||
//
|
||||
address generate_forward_exception() {
|
||||
StubCodeMark mark(this, "StubRoutines", "forward_exception");
|
||||
address start = __ pc();
|
||||
@ -1256,9 +1270,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
Register tmp3 = R8_ARG6;
|
||||
|
||||
#if defined(ABI_ELFv2)
|
||||
address nooverlap_target = aligned ?
|
||||
StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
|
||||
StubRoutines::jbyte_disjoint_arraycopy();
|
||||
address nooverlap_target = aligned ?
|
||||
StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
|
||||
StubRoutines::jbyte_disjoint_arraycopy();
|
||||
#else
|
||||
address nooverlap_target = aligned ?
|
||||
((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2013, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2013, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -264,11 +264,11 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
|
||||
__ cmpdi(CCR0, Rmdo, 0);
|
||||
__ beq(CCR0, no_mdo);
|
||||
|
||||
// Increment invocation counter in the MDO.
|
||||
const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
|
||||
__ lwz(Rscratch2, mdo_ic_offs, Rmdo);
|
||||
// Increment backedge counter in the MDO.
|
||||
const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
|
||||
__ lwz(Rscratch2, mdo_bc_offs, Rmdo);
|
||||
__ addi(Rscratch2, Rscratch2, increment);
|
||||
__ stw(Rscratch2, mdo_ic_offs, Rmdo);
|
||||
__ stw(Rscratch2, mdo_bc_offs, Rmdo);
|
||||
__ load_const_optimized(Rscratch1, mask, R0);
|
||||
__ and_(Rscratch1, Rscratch2, Rscratch1);
|
||||
__ bne(CCR0, done);
|
||||
@ -276,12 +276,12 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
|
||||
}
|
||||
|
||||
// Increment counter in MethodCounters*.
|
||||
const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
|
||||
const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
|
||||
__ bind(no_mdo);
|
||||
__ get_method_counters(R19_method, R3_counters, done);
|
||||
__ lwz(Rscratch2, mo_ic_offs, R3_counters);
|
||||
__ lwz(Rscratch2, mo_bc_offs, R3_counters);
|
||||
__ addi(Rscratch2, Rscratch2, increment);
|
||||
__ stw(Rscratch2, mo_ic_offs, R3_counters);
|
||||
__ stw(Rscratch2, mo_bc_offs, R3_counters);
|
||||
__ load_const_optimized(Rscratch1, mask, R0);
|
||||
__ and_(Rscratch1, Rscratch2, Rscratch1);
|
||||
__ beq(CCR0, *overflow);
|
||||
@ -611,12 +611,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
|
||||
// For others we can use a normal (native) entry.
|
||||
|
||||
inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
|
||||
// Provide math entry with debugging on demand.
|
||||
// Note: Debugging changes which code will get executed:
|
||||
// Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
|
||||
// Not debugging and enabled InlineIntrinics: processor instruction will get used.
|
||||
// Result might differ slightly due to rounding etc.
|
||||
if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
|
||||
if (!InlineIntrinsics) return false;
|
||||
|
||||
return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
|
||||
(kind==Interpreter::java_lang_math_abs));
|
||||
@ -628,15 +623,8 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
return Interpreter::entry_for_kind(Interpreter::zerolocals);
|
||||
}
|
||||
|
||||
Label Lslow_path;
|
||||
const Register Rjvmti_mode = R11_scratch1;
|
||||
address entry = __ pc();
|
||||
|
||||
// Provide math entry with debugging on demand.
|
||||
__ lwz(Rjvmti_mode, thread_(interp_only_mode));
|
||||
__ cmpwi(CCR0, Rjvmti_mode, 0);
|
||||
__ bne(CCR0, Lslow_path); // jvmti_mode!=0
|
||||
|
||||
__ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
|
||||
|
||||
// Pop c2i arguments (if any) off when we return.
|
||||
@ -659,9 +647,6 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
||||
// And we're done.
|
||||
__ blr();
|
||||
|
||||
// Provide slow path for JVMTI case.
|
||||
__ bind(Lslow_path);
|
||||
__ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
|
||||
__ flush();
|
||||
|
||||
return entry;
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2013, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2013, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,7 +34,7 @@
|
||||
// Run with +PrintInterpreter to get the VM to print out the size.
|
||||
// Max size with JVMTI
|
||||
|
||||
const static int InterpreterCodeSize = 210*K;
|
||||
const static int InterpreterCodeSize = 230*K;
|
||||
|
||||
#endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2013, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -375,23 +375,22 @@ void TemplateTable::fast_aldc(bool wide) {
|
||||
|
||||
int index_size = wide ? sizeof(u2) : sizeof(u1);
|
||||
const Register Rscratch = R11_scratch1;
|
||||
Label resolved;
|
||||
Label is_null;
|
||||
|
||||
// We are resolved if the resolved reference cache entry contains a
|
||||
// non-null object (CallSite, etc.)
|
||||
__ get_cache_index_at_bcp(Rscratch, 1, index_size); // Load index.
|
||||
__ load_resolved_reference_at_index(R17_tos, Rscratch);
|
||||
__ cmpdi(CCR0, R17_tos, 0);
|
||||
__ bne(CCR0, resolved);
|
||||
__ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null);
|
||||
__ verify_oop(R17_tos);
|
||||
__ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
|
||||
|
||||
__ bind(is_null);
|
||||
__ load_const_optimized(R3_ARG1, (int)bytecode());
|
||||
|
||||
address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
|
||||
|
||||
// First time invocation - must resolve first.
|
||||
__ call_VM(R17_tos, entry, R3_ARG1);
|
||||
|
||||
__ align(32, 12);
|
||||
__ bind(resolved);
|
||||
__ verify_oop(R17_tos);
|
||||
}
|
||||
|
||||
@ -3795,9 +3794,9 @@ void TemplateTable::instanceof() {
|
||||
transition(atos, itos);
|
||||
|
||||
Label Ldone, Lis_null, Lquicked, Lresolved;
|
||||
Register Roffset = R5_ARG3,
|
||||
Register Roffset = R6_ARG4,
|
||||
RobjKlass = R4_ARG2,
|
||||
RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
|
||||
RspecifiedKlass = R5_ARG3,
|
||||
Rcpool = R11_scratch1,
|
||||
Rtags = R12_scratch2;
|
||||
|
||||
|
||||
@ -32,12 +32,13 @@
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/stubCodeGenerator.hpp"
|
||||
#include "utilities/defaultStream.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "vm_version_ppc.hpp"
|
||||
|
||||
# include <sys/sysinfo.h>
|
||||
|
||||
int VM_Version::_features = VM_Version::unknown_m;
|
||||
int VM_Version::_measured_cache_line_size = 128; // default value
|
||||
int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
|
||||
const char* VM_Version::_features_str = "";
|
||||
bool VM_Version::_is_determine_features_test_running = false;
|
||||
|
||||
@ -55,7 +56,9 @@ void VM_Version::initialize() {
|
||||
|
||||
// If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
|
||||
if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
|
||||
if (VM_Version::has_popcntw()) {
|
||||
if (VM_Version::has_lqarx()) {
|
||||
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
|
||||
} else if (VM_Version::has_popcntw()) {
|
||||
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
|
||||
} else if (VM_Version::has_cmpb()) {
|
||||
FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
|
||||
@ -66,8 +69,14 @@ void VM_Version::initialize() {
|
||||
}
|
||||
}
|
||||
guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
|
||||
PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
|
||||
"PowerArchitecturePPC64 should be 0, 5, 6 or 7");
|
||||
PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
|
||||
PowerArchitecturePPC64 == 8,
|
||||
"PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
|
||||
|
||||
// Power 8: Configure Data Stream Control Register.
|
||||
if (PowerArchitecturePPC64 >= 8) {
|
||||
config_dscr();
|
||||
}
|
||||
|
||||
if (!UseSIGTRAP) {
|
||||
MSG(TrapBasedICMissChecks);
|
||||
@ -97,7 +106,7 @@ void VM_Version::initialize() {
|
||||
// Create and print feature-string.
|
||||
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
|
||||
jio_snprintf(buf, sizeof(buf),
|
||||
"ppc64%s%s%s%s%s%s%s%s",
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_fsqrt() ? " fsqrt" : ""),
|
||||
(has_isel() ? " isel" : ""),
|
||||
(has_lxarxeh() ? " lxarxeh" : ""),
|
||||
@ -106,11 +115,17 @@ void VM_Version::initialize() {
|
||||
(has_popcntb() ? " popcntb" : ""),
|
||||
(has_popcntw() ? " popcntw" : ""),
|
||||
(has_fcfids() ? " fcfids" : ""),
|
||||
(has_vand() ? " vand" : "")
|
||||
(has_vand() ? " vand" : ""),
|
||||
(has_lqarx() ? " lqarx" : ""),
|
||||
(has_vcipher() ? " vcipher" : ""),
|
||||
(has_vpmsumb() ? " vpmsumb" : ""),
|
||||
(has_tcheck() ? " tcheck" : "")
|
||||
// Make sure number of %s matches num_features!
|
||||
);
|
||||
_features_str = os::strdup(buf);
|
||||
NOT_PRODUCT(if (Verbose) print_features(););
|
||||
if (Verbose) {
|
||||
print_features();
|
||||
}
|
||||
|
||||
// PPC64 supports 8-byte compare-exchange operations (see
|
||||
// Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
|
||||
@ -171,6 +186,58 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
// Adjust RTM (Restricted Transactional Memory) flags.
|
||||
if (!has_tcheck() && UseRTMLocking) {
|
||||
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
|
||||
// setting during arguments processing. See use_biased_locking().
|
||||
// VM_Version_init() is executed after UseBiasedLocking is used
|
||||
// in Thread::allocate().
|
||||
vm_exit_during_initialization("RTM instructions are not available on this CPU");
|
||||
}
|
||||
|
||||
if (UseRTMLocking) {
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (!UnlockExperimentalVMOptions) {
|
||||
vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
|
||||
"It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
|
||||
} else {
|
||||
warning("UseRTMLocking is only available as experimental option on this platform.");
|
||||
}
|
||||
if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
|
||||
// RTM locking should be used only for applications with
|
||||
// high lock contention. For now we do not use it by default.
|
||||
vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
|
||||
}
|
||||
if (!is_power_of_2(RTMTotalCountIncrRate)) {
|
||||
warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
|
||||
FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
|
||||
}
|
||||
if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
|
||||
warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
|
||||
FLAG_SET_DEFAULT(RTMAbortRatio, 50);
|
||||
}
|
||||
FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM.
|
||||
guarantee(RTMSpinLoopCount > 0, "unsupported");
|
||||
#else
|
||||
// Only C2 does RTM locking optimization.
|
||||
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
|
||||
// setting during arguments processing. See use_biased_locking().
|
||||
vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
|
||||
#endif
|
||||
} else { // !UseRTMLocking
|
||||
if (UseRTMForStackLocks) {
|
||||
if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
|
||||
warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
|
||||
}
|
||||
FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
|
||||
}
|
||||
if (UseRTMDeopt) {
|
||||
FLAG_SET_DEFAULT(UseRTMDeopt, false);
|
||||
}
|
||||
if (PrintPreciseRTMLockingStatistics) {
|
||||
FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
|
||||
}
|
||||
}
|
||||
|
||||
// This machine does not allow unaligned memory accesses
|
||||
if (UseUnalignedAccesses) {
|
||||
@ -180,6 +247,27 @@ void VM_Version::initialize() {
|
||||
}
|
||||
}
|
||||
|
||||
bool VM_Version::use_biased_locking() {
|
||||
#if INCLUDE_RTM_OPT
|
||||
// RTM locking is most useful when there is high lock contention and
|
||||
// low data contention. With high lock contention the lock is usually
|
||||
// inflated and biased locking is not suitable for that case.
|
||||
// RTM locking code requires that biased locking is off.
|
||||
// Note: we can't switch off UseBiasedLocking in get_processor_features()
|
||||
// because it is used by Thread::allocate() which is called before
|
||||
// VM_Version::initialize().
|
||||
if (UseRTMLocking && UseBiasedLocking) {
|
||||
if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
|
||||
FLAG_SET_DEFAULT(UseBiasedLocking, false);
|
||||
} else {
|
||||
warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
|
||||
UseBiasedLocking = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return UseBiasedLocking;
|
||||
}
|
||||
|
||||
void VM_Version::print_features() {
|
||||
tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
|
||||
}
|
||||
@ -443,16 +531,19 @@ void VM_Version::determine_features() {
|
||||
// Don't use R0 in ldarx.
|
||||
// Keep R3_ARG1 unmodified, it contains &field (see below).
|
||||
// Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
|
||||
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
|
||||
a->fsqrts(F3, F4); // code[1] -> fsqrts_m
|
||||
a->isel(R7, R5, R6, 0); // code[2] -> isel_m
|
||||
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
|
||||
a->cmpb(R7, R5, R6); // code[4] -> bcmp
|
||||
//a->mftgpr(R7, F3); // code[5] -> mftgpr
|
||||
a->popcntb(R7, R5); // code[6] -> popcntb
|
||||
a->popcntw(R7, R5); // code[7] -> popcntw
|
||||
a->fcfids(F3, F4); // code[8] -> fcfids
|
||||
a->vand(VR0, VR0, VR0); // code[9] -> vand
|
||||
a->fsqrt(F3, F4); // code[0] -> fsqrt_m
|
||||
a->fsqrts(F3, F4); // code[1] -> fsqrts_m
|
||||
a->isel(R7, R5, R6, 0); // code[2] -> isel_m
|
||||
a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
|
||||
a->cmpb(R7, R5, R6); // code[4] -> cmpb
|
||||
a->popcntb(R7, R5); // code[5] -> popcntb
|
||||
a->popcntw(R7, R5); // code[6] -> popcntw
|
||||
a->fcfids(F3, F4); // code[7] -> fcfids
|
||||
a->vand(VR0, VR0, VR0); // code[8] -> vand
|
||||
a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m
|
||||
a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher
|
||||
a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb
|
||||
a->tcheck(0); // code[12] -> tcheck
|
||||
a->blr();
|
||||
|
||||
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
|
||||
@ -491,11 +582,14 @@ void VM_Version::determine_features() {
|
||||
if (code[feature_cntr++]) features |= isel_m;
|
||||
if (code[feature_cntr++]) features |= lxarxeh_m;
|
||||
if (code[feature_cntr++]) features |= cmpb_m;
|
||||
//if(code[feature_cntr++])features |= mftgpr_m;
|
||||
if (code[feature_cntr++]) features |= popcntb_m;
|
||||
if (code[feature_cntr++]) features |= popcntw_m;
|
||||
if (code[feature_cntr++]) features |= fcfids_m;
|
||||
if (code[feature_cntr++]) features |= vand_m;
|
||||
if (code[feature_cntr++]) features |= lqarx_m;
|
||||
if (code[feature_cntr++]) features |= vcipher_m;
|
||||
if (code[feature_cntr++]) features |= vpmsumb_m;
|
||||
if (code[feature_cntr++]) features |= tcheck_m;
|
||||
|
||||
// Print the detection code.
|
||||
if (PrintAssembly) {
|
||||
@ -507,6 +601,69 @@ void VM_Version::determine_features() {
|
||||
_features = features;
|
||||
}
|
||||
|
||||
// Power 8: Configure Data Stream Control Register.
|
||||
void VM_Version::config_dscr() {
|
||||
assert(has_tcheck(), "Only execute on Power 8 or later!");
|
||||
|
||||
// 7 InstWords for each call (function descriptor + blr instruction).
|
||||
const int code_size = (2+2*7)*BytesPerInstWord;
|
||||
|
||||
// Allocate space for the code.
|
||||
ResourceMark rm;
|
||||
CodeBuffer cb("config_dscr", code_size, 0);
|
||||
MacroAssembler* a = new MacroAssembler(&cb);
|
||||
|
||||
// Emit code.
|
||||
uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
|
||||
uint32_t *code = (uint32_t *)a->pc();
|
||||
a->mfdscr(R3);
|
||||
a->blr();
|
||||
|
||||
void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
|
||||
a->mtdscr(R3);
|
||||
a->blr();
|
||||
|
||||
uint32_t *code_end = (uint32_t *)a->pc();
|
||||
a->flush();
|
||||
|
||||
// Print the detection code.
|
||||
if (PrintAssembly) {
|
||||
ttyLocker ttyl;
|
||||
tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
|
||||
Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
|
||||
}
|
||||
|
||||
// Apply the configuration if needed.
|
||||
uint64_t dscr_val = (*get_dscr)();
|
||||
if (Verbose) {
|
||||
tty->print_cr("dscr value was 0x%lx" , dscr_val);
|
||||
}
|
||||
bool change_requested = false;
|
||||
if (DSCR_PPC64 != (uintx)-1) {
|
||||
dscr_val = DSCR_PPC64;
|
||||
change_requested = true;
|
||||
}
|
||||
if (DSCR_DPFD_PPC64 <= 7) {
|
||||
uint64_t mask = 0x7;
|
||||
if ((dscr_val & mask) != DSCR_DPFD_PPC64) {
|
||||
dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64);
|
||||
change_requested = true;
|
||||
}
|
||||
}
|
||||
if (DSCR_URG_PPC64 <= 7) {
|
||||
uint64_t mask = 0x7 << 6;
|
||||
if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
|
||||
dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
|
||||
change_requested = true;
|
||||
}
|
||||
}
|
||||
if (change_requested) {
|
||||
(*set_dscr)(dscr_val);
|
||||
if (Verbose) {
|
||||
tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int saved_features = 0;
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -41,7 +41,10 @@ protected:
|
||||
popcntw,
|
||||
fcfids,
|
||||
vand,
|
||||
dcba,
|
||||
lqarx,
|
||||
vcipher,
|
||||
vpmsumb,
|
||||
tcheck,
|
||||
num_features // last entry to count features
|
||||
};
|
||||
enum Feature_Flag_Set {
|
||||
@ -55,7 +58,10 @@ protected:
|
||||
popcntw_m = (1 << popcntw),
|
||||
fcfids_m = (1 << fcfids ),
|
||||
vand_m = (1 << vand ),
|
||||
dcba_m = (1 << dcba ),
|
||||
lqarx_m = (1 << lqarx ),
|
||||
vcipher_m = (1 << vcipher),
|
||||
vpmsumb_m = (1 << vpmsumb),
|
||||
tcheck_m = (1 << tcheck ),
|
||||
all_features_m = -1
|
||||
};
|
||||
static int _features;
|
||||
@ -65,12 +71,16 @@ protected:
|
||||
|
||||
static void print_features();
|
||||
static void determine_features(); // also measures cache line size
|
||||
static void config_dscr(); // Power 8: Configure Data Stream Control Register.
|
||||
static void determine_section_size();
|
||||
static void power6_micro_bench();
|
||||
public:
|
||||
// Initialization
|
||||
static void initialize();
|
||||
|
||||
// Override Abstract_VM_Version implementation
|
||||
static bool use_biased_locking();
|
||||
|
||||
static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
|
||||
// CPU instruction support
|
||||
static bool has_fsqrt() { return (_features & fsqrt_m) != 0; }
|
||||
@ -82,7 +92,10 @@ public:
|
||||
static bool has_popcntw() { return (_features & popcntw_m) != 0; }
|
||||
static bool has_fcfids() { return (_features & fcfids_m) != 0; }
|
||||
static bool has_vand() { return (_features & vand_m) != 0; }
|
||||
static bool has_dcba() { return (_features & dcba_m) != 0; }
|
||||
static bool has_lqarx() { return (_features & lqarx_m) != 0; }
|
||||
static bool has_vcipher() { return (_features & vcipher_m) != 0; }
|
||||
static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
|
||||
static bool has_tcheck() { return (_features & tcheck_m) != 0; }
|
||||
|
||||
static const char* cpu_features() { return _features_str; }
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2014 SAP AG. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2012, 2015 SAP AG. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,7 +24,6 @@
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/macroAssembler.inline.hpp"
|
||||
#include "code/vtableStubs.hpp"
|
||||
#include "interp_masm_ppc_64.hpp"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user