This commit is contained in:
Roland Westrelin 2012-12-21 01:39:34 -08:00
commit 42c79d741b
48 changed files with 1562 additions and 1231 deletions

View File

@ -298,7 +298,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
for (int i = 0; i < _bytes_to_copy; i++) {
address ptr = (address)(_pc_start + i);
int a_byte = (*ptr) & 0xFF;
__ a_byte (a_byte);
__ emit_int8 (a_byte);
}
}
@ -340,10 +340,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
// Emit the patch record. We need to emit a full word, so emit an extra empty byte
__ a_byte(0);
__ a_byte(being_initialized_entry_offset);
__ a_byte(bytes_to_skip);
__ a_byte(_bytes_to_copy);
__ emit_int8(0);
__ emit_int8(being_initialized_entry_offset);
__ emit_int8(bytes_to_skip);
__ emit_int8(_bytes_to_copy);
address patch_info_pc = __ pc();
assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");

View File

@ -100,34 +100,6 @@ const char* Argument::name() const {
bool AbstractAssembler::pd_check_instruction_mark() { return false; }
#endif
void MacroAssembler::print_instruction(int inst) {
const char* s;
switch (inv_op(inst)) {
default: s = "????"; break;
case call_op: s = "call"; break;
case branch_op:
switch (inv_op2(inst)) {
case fb_op2: s = "fb"; break;
case fbp_op2: s = "fbp"; break;
case br_op2: s = "br"; break;
case bp_op2: s = "bp"; break;
case cb_op2: s = "cb"; break;
case bpr_op2: {
if (is_cbcond(inst)) {
s = is_cxb(inst) ? "cxb" : "cwb";
} else {
s = "bpr";
}
break;
}
default: s = "????"; break;
}
}
::tty->print("%s", s);
}
// Patch instruction inst at offset inst_pos to refer to dest_pos
// and return the resulting instruction.
// We should have pcs, not offsets, but since all is relative, it will work out

View File

@ -603,7 +603,6 @@ class MacroAssembler : public Assembler {
friend class Label;
protected:
static void print_instruction(int inst);
static int patched_branch(int dest_pos, int inst, int inst_pos);
static int branch_destination(int inst, int pos);
@ -759,9 +758,6 @@ class MacroAssembler : public Assembler {
// Required platform-specific helpers for Label::patch_instructions.
// They _shadow_ the declarations in AbstractAssembler, which are undefined.
void pd_patch_instruction(address branch, address target);
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch);
#endif
// sethi Macro handles optimizations and relocations
private:

View File

@ -43,14 +43,6 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target)
stub_inst = patched_branch(target - branch, stub_inst, 0);
}
#ifndef PRODUCT
inline void MacroAssembler::pd_print_patched_instruction(address branch) {
jint stub_inst = *(jint*) branch;
print_instruction(stub_inst);
::tty->print("%s", " (unresolved)");
}
#endif // PRODUCT
// Use the right loads/stores for the platform
inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
#ifdef _LP64

View File

@ -10224,7 +10224,7 @@ instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result
//---------- Zeros Count Instructions ------------------------------------------
instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountLeadingZerosI src));
effect(TEMP dst, TEMP tmp, KILL cr);
@ -10321,7 +10321,7 @@ instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountTrailingZerosI src));
effect(TEMP dst, KILL cr);
@ -10364,19 +10364,21 @@ instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{
//---------- Population Count Instructions -------------------------------------
instruct popCountI(iRegI dst, iRegI src) %{
instruct popCountI(iRegIsafe dst, iRegI src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
format %{ "POPC $src, $dst" %}
format %{ "SRL $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t"
"POPC $dst, $dst" %}
ins_encode %{
__ popc($src$$Register, $dst$$Register);
__ srl($src$$Register, G0, $dst$$Register);
__ popc($dst$$Register, $dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL(iRegI dst, iRegL src) %{
instruct popCountL(iRegIsafe dst, iRegL src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));

View File

@ -434,7 +434,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe
// the frame is greater than one page in size, so check against
// the bottom of the stack
__ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
__ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check);
// the stack will overflow, throw an exception

File diff suppressed because it is too large Load Diff

View File

@ -313,10 +313,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
#endif
} else {
// make a copy the code which is going to be patched.
for ( int i = 0; i < _bytes_to_copy; i++) {
for (int i = 0; i < _bytes_to_copy; i++) {
address ptr = (address)(_pc_start + i);
int a_byte = (*ptr) & 0xFF;
__ a_byte (a_byte);
__ emit_int8(a_byte);
*ptr = 0x90; // make the site look like a nop
}
}
@ -363,11 +363,11 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
// emit the offsets needed to find the code to patch
int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
__ a_byte(0xB8);
__ a_byte(0);
__ a_byte(being_initialized_entry_offset);
__ a_byte(bytes_to_skip);
__ a_byte(_bytes_to_copy);
__ emit_int8((unsigned char)0xB8);
__ emit_int8(0);
__ emit_int8(being_initialized_entry_offset);
__ emit_int8(bytes_to_skip);
__ emit_int8(_bytes_to_copy);
address patch_info_pc = __ pc();
assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");

View File

@ -1023,7 +1023,7 @@ void MacroAssembler::lea(Address dst, AddressLiteral adr) {
void MacroAssembler::leave() {
// %%% is this really better? Why not on 32bit too?
emit_byte(0xC9); // LEAVE
emit_int8((unsigned char)0xC9); // LEAVE
}
void MacroAssembler::lneg(Register hi, Register lo) {
@ -2112,11 +2112,11 @@ void MacroAssembler::fat_nop() {
if (UseAddressNop) {
addr_nop_5();
} else {
emit_byte(0x26); // es:
emit_byte(0x2e); // cs:
emit_byte(0x64); // fs:
emit_byte(0x65); // gs:
emit_byte(0x90);
emit_int8(0x26); // es:
emit_int8(0x2e); // cs:
emit_int8(0x64); // fs:
emit_int8(0x65); // gs:
emit_int8((unsigned char)0x90);
}
}
@ -2534,12 +2534,12 @@ void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
int offs = (intptr_t)dst.target() - ((intptr_t)pc());
if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
// 0111 tttn #8-bit disp
emit_byte(0x70 | cc);
emit_byte((offs - short_size) & 0xFF);
emit_int8(0x70 | cc);
emit_int8((offs - short_size) & 0xFF);
} else {
// 0000 1111 1000 tttn #32-bit disp
emit_byte(0x0F);
emit_byte(0x80 | cc);
emit_int8(0x0F);
emit_int8((unsigned char)(0x80 | cc));
emit_long(offs - long_size);
}
} else {
@ -3085,7 +3085,8 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::pshufb(dst, as_Address(src));
} else {

View File

@ -126,25 +126,6 @@ class MacroAssembler: public Assembler {
}
}
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch) {
const char* s;
unsigned char op = branch[0];
if (op == 0xE8) {
s = "call";
} else if (op == 0xE9 || op == 0xEB) {
s = "jmp";
} else if ((op & 0xF0) == 0x70) {
s = "jcc";
} else if (op == 0x0F) {
s = "jcc";
} else {
s = "????";
}
tty->print("%s (unresolved)", s);
}
#endif
// The following 4 methods return the offset of the appropriate move instruction
// Support for fast byte/short loading with zero extension (depending on particular CPU)

View File

@ -2174,13 +2174,13 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_encryptBlock() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
Label L_doLast;
address start = __ pc();
const Register from = rsi; // source array address
const Register from = rdx; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register keylen = rax;
@ -2189,47 +2189,74 @@ class StubGenerator: public StubCodeGenerator {
const Address key_param (rbp, 8+8);
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const XMMRegister xmm_key_shuf_mask = xmm2;
const XMMRegister xmm_key_shuf_mask = xmm1;
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ movptr(from , from_param);
__ movptr(to , to_param);
__ movptr(key , key_param);
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ movptr(from, from_param);
__ movptr(key, key_param);
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// keylen = # of 32-bit words, convert to 128-bit words
__ shrl(keylen, 2);
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
__ movptr(to, to_param);
// For encryption, the java expanded key ordering is just what we need
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp);
for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
}
load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 0);
__ jcc(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
__ subl(keylen, 2);
__ jcc(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp1);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesenclast(xmm_result, xmm_temp);
__ aesenc(xmm_result, xmm_temp1);
__ aesenclast(xmm_result, xmm_temp2);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -2245,13 +2272,13 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_decryptBlock() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
Label L_doLast;
address start = __ pc();
const Register from = rsi; // source array address
const Register from = rdx; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register keylen = rax;
@ -2260,51 +2287,76 @@ class StubGenerator: public StubCodeGenerator {
const Address key_param (rbp, 8+8);
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const XMMRegister xmm_key_shuf_mask = xmm2;
const XMMRegister xmm_key_shuf_mask = xmm1;
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ movptr(from , from_param);
__ movptr(to , to_param);
__ movptr(key , key_param);
__ movptr(from, from_param);
__ movptr(key, key_param);
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// keylen = # of 32-bit words, convert to 128-bit words
__ shrl(keylen, 2);
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0));
__ movptr(to, to_param);
// for decryption java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// we don't know if the key is aligned, hence not using load-execute form
load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp);
for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
}
__ cmpl(keylen, 0);
__ jcc(Assembler::equal, L_doLast);
// only in 192 and 256 bit keys
aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
__ subl(keylen, 2);
__ jcc(Assembler::equal, L_doLast);
// only in 256 bit keys
aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
// for decryption the aesdeclast operation is always on key+0x00
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
__ aesdeclast(xmm_result, xmm_temp);
__ aesdeclast(xmm_result, xmm_temp3);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -2340,7 +2392,7 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg4 - input length
//
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
address start = __ pc();
@ -2393,7 +2445,7 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::notEqual, L_key_192_256);
// 128 bit code follows here
__ movptr(pos, 0);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_128);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
@ -2423,15 +2475,15 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
__ BIND(L_key_192_256);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
__ BIND(L_key_192_256);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
__ cmpl(rax, 52);
__ jcc(Assembler::notEqual, L_key_256);
// 192-bit code follows here (could be changed to use more xmm registers)
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_192);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_192);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
@ -2452,11 +2504,11 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::notEqual, L_loopTop_192);
__ jmp(L_exit);
__ BIND(L_key_256);
__ BIND(L_key_256);
// 256-bit code follows here (could be changed to use more xmm registers)
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_256);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
@ -2495,7 +2547,7 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_cipherBlockChaining_decryptAESCrypt() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
address start = __ pc();
@ -2556,9 +2608,9 @@ class StubGenerator: public StubCodeGenerator {
// 128-bit code follows here, parallelized
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_128);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_128);
__ cmpptr(len_reg, 0); // any blocks left??
__ jcc(Assembler::equal, L_exit);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
@ -2597,7 +2649,7 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::notEqual, L_key_256);
// 192-bit code follows here (could be optimized to use parallelism)
__ movptr(pos, 0);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_192);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
@ -2622,7 +2674,7 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_key_256);
// 256-bit code follows here (could be optimized to use parallelism)
__ movptr(pos, 0);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_256);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input

View File

@ -2953,21 +2953,6 @@ class StubGenerator: public StubCodeGenerator {
}
}
// aesenc using specified key+offset
// can optionally specify that the shuffle mask is already in an xmmregister
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
load_key(xmmtmp, key, offset, xmm_shuf_mask);
__ aesenc(xmmdst, xmmtmp);
}
// aesdec using specified key+offset
// can optionally specify that the shuffle mask is already in an xmmregister
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
load_key(xmmtmp, key, offset, xmm_shuf_mask);
__ aesdec(xmmdst, xmmtmp);
}
// Arguments:
//
// Inputs:
@ -2976,7 +2961,7 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_encryptBlock() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
Label L_doLast;
@ -2988,15 +2973,17 @@ class StubGenerator: public StubCodeGenerator {
const Register keylen = rax;
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const XMMRegister xmm_key_shuf_mask = xmm2;
const XMMRegister xmm_key_shuf_mask = xmm1;
// On win64 xmm6-xmm15 must be preserved so don't use them.
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// keylen = # of 32-bit words, convert to 128-bit words
__ shrl(keylen, 2);
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
@ -3004,25 +2991,53 @@ class StubGenerator: public StubCodeGenerator {
// For encryption, the java expanded key ordering is just what we need
// we don't know if the key is aligned, hence not using load-execute form
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp);
for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
}
load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 0);
__ jcc(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
__ subl(keylen, 2);
__ jcc(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp1);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesenclast(xmm_result, xmm_temp);
__ aesenc(xmm_result, xmm_temp1);
__ aesenclast(xmm_result, xmm_temp2);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -3040,7 +3055,7 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_decryptBlock() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
Label L_doLast;
@ -3052,15 +3067,17 @@ class StubGenerator: public StubCodeGenerator {
const Register keylen = rax;
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const XMMRegister xmm_key_shuf_mask = xmm2;
const XMMRegister xmm_key_shuf_mask = xmm1;
// On win64 xmm6-xmm15 must be preserved so don't use them.
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// keylen = # of 32-bit words, convert to 128-bit words
__ shrl(keylen, 2);
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0));
@ -3068,29 +3085,55 @@ class StubGenerator: public StubCodeGenerator {
// for decryption java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// we don't know if the key is aligned, hence not using load-execute form
load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp);
for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
}
__ cmpl(keylen, 0);
__ jcc(Assembler::equal, L_doLast);
// only in 192 and 256 bit keys
aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
__ subl(keylen, 2);
__ jcc(Assembler::equal, L_doLast);
// only in 256 bit keys
aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
// for decryption the aesdeclast operation is always on key+0x00
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
__ aesdeclast(xmm_result, xmm_temp);
__ aesdeclast(xmm_result, xmm_temp3);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
@ -3109,7 +3152,7 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg4 - input length
//
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
address start = __ pc();
@ -3133,16 +3176,19 @@ class StubGenerator: public StubCodeGenerator {
const XMMRegister xmm_temp = xmm1;
// keys 0-10 preloaded into xmm2-xmm12
const int XMM_REG_NUM_KEY_FIRST = 2;
const int XMM_REG_NUM_KEY_LAST = 12;
const int XMM_REG_NUM_KEY_LAST = 15;
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
// on win64, fill len_reg from stack position
__ movl(len_reg, len_mem);
// save the xmm registers which must be preserved 6-12
// save the xmm registers which must be preserved 6-15
__ subptr(rsp, -rsp_after_call_off * wordSize);
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
__ movdqu(xmm_save(i), as_XMMRegister(i));
@ -3151,12 +3197,11 @@ class StubGenerator: public StubCodeGenerator {
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
// load up xmm regs 2 thru 12 with key 0x00 - 0xa0
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
// load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
offset += 0x10;
}
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
@ -3167,16 +3212,15 @@ class StubGenerator: public StubCodeGenerator {
// 128 bit code follows here
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_128);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ pxor (xmm_result, xmm_key0); // do the aes rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
__ aesenc(xmm_result, as_XMMRegister(rnum));
}
__ aesenclast(xmm_result, xmm_key10);
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ addptr(pos, AESBlockSize);
@ -3198,24 +3242,23 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_key_192_256);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(rax, 52);
__ jcc(Assembler::notEqual, L_key_256);
// 192-bit code follows here (could be changed to use more xmm registers)
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_192);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ pxor (xmm_result, xmm_key0); // do the aes rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
__ aesenc(xmm_result, as_XMMRegister(rnum));
}
aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
load_key(xmm_temp, key, 0xc0);
__ aesenclast(xmm_result, xmm_temp);
__ aesenclast(xmm_result, xmm_key12);
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ addptr(pos, AESBlockSize);
@ -3225,22 +3268,19 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_key_256);
// 256-bit code follows here (could be changed to use more xmm registers)
load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_256);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ pxor (xmm_result, xmm_key0); // do the aes rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
__ aesenc(xmm_result, as_XMMRegister(rnum));
}
aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
load_key(xmm_temp, key, 0xe0);
__ aesenclast(xmm_result, xmm_temp);
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ addptr(pos, AESBlockSize);
@ -3267,7 +3307,7 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
address start = __ pc();
@ -3288,12 +3328,10 @@ class StubGenerator: public StubCodeGenerator {
#endif
const Register pos = rax;
// xmm register assignments for the loops below
const XMMRegister xmm_result = xmm0;
// keys 0-10 preloaded into xmm2-xmm12
const int XMM_REG_NUM_KEY_FIRST = 5;
const int XMM_REG_NUM_KEY_LAST = 15;
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
__ enter(); // required for proper stackwalking of RuntimeStub frame
@ -3312,13 +3350,14 @@ class StubGenerator: public StubCodeGenerator {
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
// load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
offset += 0x10;
}
load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block
// registers holding the four results in the parallelized loop
const XMMRegister xmm_result0 = xmm0;
const XMMRegister xmm_result1 = xmm2;
@ -3376,8 +3415,12 @@ class StubGenerator: public StubCodeGenerator {
__ jmp(L_multiBlock_loopTop_128);
// registers used in the non-parallelized loops
// xmm register assignments for the loops below
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_prev_block_cipher_save = xmm2;
const XMMRegister xmm_temp = xmm3;
const XMMRegister xmm_key11 = xmm3;
const XMMRegister xmm_key12 = xmm4;
const XMMRegister xmm_temp = xmm4;
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_128);
@ -3415,12 +3458,15 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_key_192_256);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
load_key(xmm_key11, key, 0xb0);
__ cmpl(rax, 52);
__ jcc(Assembler::notEqual, L_key_256);
// 192-bit code follows here (could be optimized to use parallelism)
load_key(xmm_key12, key, 0xc0); // 192-bit key goes up to c0
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_192);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
__ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
@ -3428,14 +3474,13 @@ class StubGenerator: public StubCodeGenerator {
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
__ aesdec(xmm_result, as_XMMRegister(rnum));
}
aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0
aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
__ aesdec(xmm_result, xmm_key11);
__ aesdec(xmm_result, xmm_key12);
__ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
__ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
@ -3445,23 +3490,26 @@ class StubGenerator: public StubCodeGenerator {
// 256-bit code follows here (could be optimized to use parallelism)
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_256);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
__ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
__ aesdec(xmm_result, as_XMMRegister(rnum));
}
aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0
aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
__ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
__ aesdec(xmm_result, xmm_key11);
load_key(xmm_temp, key, 0xc0);
__ aesdec(xmm_result, xmm_temp);
load_key(xmm_temp, key, 0xd0);
__ aesdec(xmm_result, xmm_temp);
load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0
__ aesdec(xmm_result, xmm_temp);
__ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
__ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
// no need to store r to memory until we exit
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);

View File

@ -489,8 +489,8 @@ void VM_Version::get_processor_features() {
}
// The AES intrinsic stubs require AES instruction support (of course)
// but also require AVX and sse3 modes for instructions it use.
if (UseAES && (UseAVX > 0) && (UseSSE > 2)) {
// but also require sse3 mode for instructions it use.
if (UseAES && (UseSSE > 2)) {
if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
UseAESIntrinsics = true;
}

View File

@ -56,15 +56,9 @@ void Assembler::pd_patch_instruction(address branch, address target) {
ShouldNotCallThis();
}
#ifndef PRODUCT
void Assembler::pd_print_patched_instruction(address branch) {
ShouldNotCallThis();
}
#endif // PRODUCT
void MacroAssembler::align(int modulus) {
while (offset() % modulus != 0)
emit_byte(AbstractAssembler::code_fill_byte());
emit_int8(AbstractAssembler::code_fill_byte());
}
void MacroAssembler::bang_stack_with_offset(int offset) {
@ -72,8 +66,7 @@ void MacroAssembler::bang_stack_with_offset(int offset) {
}
void MacroAssembler::advance(int bytes) {
_code_pos += bytes;
sync();
code_section()->set_end(code_section()->end() + bytes);
}
RegisterOrConstant MacroAssembler::delayed_value_impl(

View File

@ -37,9 +37,6 @@ class Assembler : public AbstractAssembler {
public:
void pd_patch_instruction(address branch, address target);
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch);
#endif // PRODUCT
};
class MacroAssembler : public Assembler {

View File

@ -116,7 +116,7 @@ void MacroAssembler::get_thread(Register thread) {
ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode ();
if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) { // T1
// Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset]
emit_byte (segment);
emit_int8 (segment);
// ExternalAddress doesn't work because it can't take NULL
AddressLiteral null(0, relocInfo::none);
movptr (thread, null);
@ -125,7 +125,7 @@ void MacroAssembler::get_thread(Register thread) {
} else
if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) { // T2
// mov r, gs:[tlsOffset]
emit_byte (segment);
emit_int8 (segment);
AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none);
movptr (thread, tls_off);
return ;

View File

@ -30,7 +30,7 @@
void MacroAssembler::int3() {
emit_byte(0xCC);
emit_int8((unsigned char)0xCC);
}
#ifndef _LP64

View File

@ -109,37 +109,6 @@ void AbstractAssembler::flush() {
ICache::invalidate_range(addr_at(0), offset());
}
void AbstractAssembler::a_byte(int x) {
emit_byte(x);
}
void AbstractAssembler::a_long(jint x) {
emit_long(x);
}
// Labels refer to positions in the (to be) generated code. There are bound
// and unbound
//
// Bound labels refer to known positions in the already generated code.
// offset() is the position the label refers to.
//
// Unbound labels refer to unknown positions in the code to be generated; it
// may contain a list of unresolved displacements that refer to it
#ifndef PRODUCT
void AbstractAssembler::print(Label& L) {
if (L.is_bound()) {
tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect());
} else if (L.is_unbound()) {
L.print_instructions((MacroAssembler*)this);
} else {
tty->print_cr("label in inconsistent state (loc = %d)", L.loc());
}
}
#endif // PRODUCT
void AbstractAssembler::bind(Label& L) {
if (L.is_bound()) {
// Assembler can bind a label more than once to the same place.
@ -342,28 +311,3 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
#endif
return offset < 0 || os::vm_page_size() <= offset;
}
#ifndef PRODUCT
void Label::print_instructions(MacroAssembler* masm) const {
CodeBuffer* cb = masm->code();
for (int i = 0; i < _patch_index; ++i) {
int branch_loc;
if (i >= PatchCacheSize) {
branch_loc = _patch_overflow->at(i - PatchCacheSize);
} else {
branch_loc = _patches[i];
}
int branch_pos = CodeBuffer::locator_pos(branch_loc);
int branch_sect = CodeBuffer::locator_sect(branch_loc);
address branch = cb->locator_address(branch_loc);
tty->print_cr("unbound label");
tty->print("@ %d|%d ", branch_pos, branch_sect);
if (branch_sect == CodeBuffer::SECT_CONSTS) {
tty->print_cr(PTR_FORMAT, *(address*)branch);
continue;
}
masm->pd_print_patched_instruction(branch);
tty->cr();
}
}
#endif // ndef PRODUCT

View File

@ -216,17 +216,6 @@ class AbstractAssembler : public ResourceObj {
bool isByte(int x) const { return 0 <= x && x < 0x100; }
bool isShiftCount(int x) const { return 0 <= x && x < 32; }
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
void emit_float( jfloat x) { code_section()->emit_float( x); }
void emit_double( jdouble x) { code_section()->emit_double( x); }
void emit_address(address x) { code_section()->emit_address(x); }
void emit_byte(int x) { emit_int8 (x); } // deprecated
void emit_word(int x) { emit_int16(x); } // deprecated
void emit_long(jint x) { emit_int32(x); } // deprecated
// Instruction boundaries (required when emitting relocatable values).
@ -277,9 +266,6 @@ class AbstractAssembler : public ResourceObj {
};
#endif
// Label functions
void print(Label& L);
public:
// Creation
@ -288,6 +274,15 @@ class AbstractAssembler : public ResourceObj {
// ensure buf contains all code (call this before using/copying the code)
void flush();
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
void emit_float( jfloat x) { code_section()->emit_float( x); }
void emit_double( jdouble x) { code_section()->emit_double( x); }
void emit_address(address x) { code_section()->emit_address(x); }
// min and max values for signed immediate ranges
static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1)) ; }
static int max_simm(int nbits) { return (intptr_t(1) << (nbits - 1)) - 1; }
@ -327,8 +322,6 @@ class AbstractAssembler : public ResourceObj {
void clear_inst_mark() { code_section()->clear_mark(); }
// Constants in code
void a_byte(int x);
void a_long(jint x);
void relocate(RelocationHolder const& rspec, int format = 0) {
assert(!pd_check_instruction_mark()
|| inst_mark() == NULL || inst_mark() == code_section()->end(),
@ -441,15 +434,6 @@ class AbstractAssembler : public ResourceObj {
*/
void pd_patch_instruction(address branch, address target);
#ifndef PRODUCT
/**
* Platform-dependent method of printing an instruction that needs to be
* patched.
*
* @param branch the instruction to be patched in the buffer.
*/
static void pd_print_patched_instruction(address branch);
#endif // PRODUCT
};
#ifdef TARGET_ARCH_x86

View File

@ -3442,6 +3442,11 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
preserves_state = true;
break;
case vmIntrinsics::_loadFence :
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence :
break;
default : return false; // do not inline
}
// create intrinsic node

View File

@ -2977,6 +2977,16 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
do_CompareAndSwap(x, longType);
break;
case vmIntrinsics::_loadFence :
if (os::is_MP()) __ membar_acquire();
break;
case vmIntrinsics::_storeFence:
if (os::is_MP()) __ membar_release();
break;
case vmIntrinsics::_fullFence :
if (os::is_MP()) __ membar();
break;
case vmIntrinsics::_Reference_get:
do_Reference_get(x);
break;

View File

@ -366,10 +366,12 @@ bool ciField::will_link(ciInstanceKlass* accessing_klass,
// ------------------------------------------------------------------
// ciField::print
void ciField::print() {
tty->print("<ciField ");
tty->print("<ciField name=");
_holder->print_name();
tty->print(".");
_name->print_symbol();
tty->print(" signature=");
_signature->print_symbol();
tty->print(" offset=%d type=", _offset);
if (_type != NULL) _type->print_name();
else tty->print("(reference)");

View File

@ -169,16 +169,18 @@ void ClassLoaderData::add_dependency(Handle dependency, TRAPS) {
ok = (objArrayOop)ok->obj_at(1);
}
// Must handle over GC points
assert (last != NULL, "dependencies should be initialized");
objArrayHandle last_handle(THREAD, last);
// Create a new dependency node with fields for (class_loader or mirror, next)
objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
deps->obj_at_put(0, dependency());
// Must handle over more GC points
// Must handle over GC points
objArrayHandle new_dependency(THREAD, deps);
// Add the dependency under lock
assert (last != NULL, "dependencies should be initialized");
objArrayHandle last_handle(THREAD, last);
locked_add_dependency(last_handle, new_dependency);
}

View File

@ -756,6 +756,15 @@
do_intrinsic(_unpark, sun_misc_Unsafe, unpark_name, unpark_signature, F_RN) \
do_name( unpark_name, "unpark") \
do_alias( unpark_signature, /*(LObject;)V*/ object_void_signature) \
do_intrinsic(_loadFence, sun_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \
do_name( loadFence_name, "loadFence") \
do_alias( loadFence_signature, void_method_signature) \
do_intrinsic(_storeFence, sun_misc_Unsafe, storeFence_name, storeFence_signature, F_RN) \
do_name( storeFence_name, "storeFence") \
do_alias( storeFence_signature, void_method_signature) \
do_intrinsic(_fullFence, sun_misc_Unsafe, fullFence_name, fullFence_signature, F_RN) \
do_name( fullFence_name, "fullFence") \
do_alias( fullFence_signature, void_method_signature) \
\
/* unsafe memory references (there are a lot of them...) */ \
do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \
@ -897,12 +906,14 @@
do_intrinsic(_getAndAddLong, sun_misc_Unsafe, getAndAddLong_name, getAndAddLong_signature, F_R) \
do_name( getAndAddLong_name, "getAndAddLong") \
do_signature(getAndAddLong_signature, "(Ljava/lang/Object;JJ)J" ) \
do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSet_name, getAndSetInt_signature, F_R) \
do_name( getAndSet_name, "getAndSet") \
do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSetInt_name, getAndSetInt_signature, F_R) \
do_name( getAndSetInt_name, "getAndSetInt") \
do_alias( getAndSetInt_signature, /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature) \
do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSet_name, getAndSetLong_signature, F_R) \
do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSetLong_name, getAndSetLong_signature, F_R) \
do_name( getAndSetLong_name, "getAndSetLong") \
do_alias( getAndSetLong_signature, /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature) \
do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSet_name, getAndSetObject_signature, F_R) \
do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSetObject_name, getAndSetObject_signature, F_R)\
do_name( getAndSetObject_name, "getAndSetObject") \
do_signature(getAndSetObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
\
/* prefetch_signature is shared by all prefetch variants */ \

View File

@ -538,6 +538,7 @@ void CompilerOracle::parse_from_line(char* line) {
if (match != NULL) {
if (!_quiet) {
ResourceMark rm;
tty->print("CompilerOracle: %s ", command_names[command]);
match->print();
}

View File

@ -189,6 +189,11 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
set_req(1, addx);
set_req(2, a22);
progress = this;
PhaseIterGVN *igvn = phase->is_IterGVN();
if (add2->outcnt() == 0 && igvn) {
// add disconnected.
igvn->_worklist.push(add2);
}
}
}
@ -624,6 +629,11 @@ Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if( t22->singleton() && (t22 != Type::TOP) ) { // Right input is an add of a constant?
set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1))));
set_req(Offset, add->in(2));
PhaseIterGVN *igvn = phase->is_IterGVN();
if (add->outcnt() == 0 && igvn) {
// add disconnected.
igvn->_worklist.push((Node*)add);
}
return this; // Made progress
}
}

View File

@ -403,7 +403,7 @@ const char* InlineTree::check_can_parse(ciMethod* callee) {
//------------------------------print_inlining---------------------------------
// Really, the failure_msg can be a success message also.
void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
if (callee_method == NULL) tty->print(" callee not monotonic or profiled");
if (Verbose && callee_method) {
const InlineTree *top = this;

View File

@ -274,6 +274,9 @@ class LateInlineCallGenerator : public DirectCallGenerator {
virtual void do_late_inline();
virtual JVMState* generate(JVMState* jvms) {
Compile *C = Compile::current();
C->print_inlining_skip(this);
// Record that this call site should be revisited once the main
// parse is finished.
Compile::current()->add_late_inline(this);
@ -284,7 +287,6 @@ class LateInlineCallGenerator : public DirectCallGenerator {
// as is done for allocations and macro expansion.
return DirectCallGenerator::generate(jvms);
}
};
@ -307,7 +309,9 @@ void LateInlineCallGenerator::do_late_inline() {
// Make sure the state is a MergeMem for parsing.
if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
C->initial_gvn()->set_type_bottom(mem);
map->set_req(TypeFunc::Memory, mem);
}
// Make enough space for the expression stack and transfer the incoming arguments
@ -320,6 +324,8 @@ void LateInlineCallGenerator::do_late_inline() {
}
}
C->print_inlining_insert(this);
CompileLog* log = C->log();
if (log != NULL) {
log->head("late_inline method='%d'", log->identify(method()));
@ -608,7 +614,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
if (cg != NULL && cg->is_inline())
return cg;
} else {
if (PrintInlining) CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
if (PrintInlining) C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
}
}
break;

View File

@ -147,9 +147,9 @@ class CallGenerator : public ResourceObj {
CallGenerator* cg);
virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
if (PrintInlining)
CompileTask::print_inlining(callee, inline_level, bci, msg);
C->print_inlining(callee, inline_level, bci, msg);
}
};

View File

@ -751,7 +751,7 @@ void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj
projs->fallthrough_ioproj = pn;
for (DUIterator j = pn->outs(); pn->has_out(j); j++) {
Node* e = pn->out(j);
if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) {
if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) {
assert(projs->exobj == NULL, "only one");
projs->exobj = e;
}

View File

@ -1566,6 +1566,10 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
Node* n = in(j); // Get the input
if (rc == NULL || phase->type(rc) == Type::TOP) {
if (n != top) { // Not already top?
PhaseIterGVN *igvn = phase->is_IterGVN();
if (can_reshape && igvn != NULL) {
igvn->_worklist.push(r);
}
set_req(j, top); // Nuke it down
progress = this; // Record progress
}

View File

@ -610,7 +610,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
_trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
_printer(IdealGraphPrinter::printer()),
#endif
_congraph(NULL) {
_congraph(NULL),
_print_inlining_list(NULL),
_print_inlining(0) {
C = this;
CompileWrapper cw(this);
@ -666,6 +668,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
PhaseGVN gvn(node_arena(), estimated_size);
set_initial_gvn(&gvn);
if (PrintInlining) {
_print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
}
{ // Scope for timing the parser
TracePhase t3("parse", &_t_parser, true);
@ -754,6 +759,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
}
}
assert(_late_inlines.length() == 0, "should have been processed");
dump_inlining();
print_method("Before RemoveUseless", 3);
@ -899,7 +905,9 @@ Compile::Compile( ciEnv* ci_env,
#endif
_dead_node_list(comp_arena()),
_dead_node_count(0),
_congraph(NULL) {
_congraph(NULL),
_print_inlining_list(NULL),
_print_inlining(0) {
C = this;
#ifndef PRODUCT
@ -3351,3 +3359,11 @@ void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n
cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
}
}
void Compile::dump_inlining() {
if (PrintInlining) {
for (int i = 0; i < _print_inlining_list->length(); i++) {
tty->print(_print_inlining_list->at(i).ss()->as_string());
}
}
}

View File

@ -30,6 +30,7 @@
#include "code/debugInfoRec.hpp"
#include "code/exceptionHandlerTable.hpp"
#include "compiler/compilerOracle.hpp"
#include "compiler/compileBroker.hpp"
#include "libadt/dict.hpp"
#include "libadt/port.hpp"
#include "libadt/vectset.hpp"
@ -369,6 +370,61 @@ class Compile : public Phase {
GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after
// main parsing has finished.
// Inlining may not happen in parse order which would make
// PrintInlining output confusing. Keep track of PrintInlining
// pieces in order.
class PrintInliningBuffer : public ResourceObj {
private:
CallGenerator* _cg;
stringStream* _ss;
public:
PrintInliningBuffer()
: _cg(NULL) { _ss = new stringStream(); }
stringStream* ss() const { return _ss; }
CallGenerator* cg() const { return _cg; }
void set_cg(CallGenerator* cg) { _cg = cg; }
};
GrowableArray<PrintInliningBuffer>* _print_inlining_list;
int _print_inlining;
public:
outputStream* print_inlining_stream() const {
return _print_inlining_list->at(_print_inlining).ss();
}
void print_inlining_skip(CallGenerator* cg) {
if (PrintInlining) {
_print_inlining_list->at(_print_inlining).set_cg(cg);
_print_inlining++;
_print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer());
}
}
void print_inlining_insert(CallGenerator* cg) {
if (PrintInlining) {
for (int i = 0; i < _print_inlining_list->length(); i++) {
if (_print_inlining_list->at(i).cg() == cg) {
_print_inlining_list->insert_before(i+1, PrintInliningBuffer());
_print_inlining = i+1;
_print_inlining_list->at(i).set_cg(NULL);
return;
}
}
ShouldNotReachHere();
}
}
void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
stringStream ss;
CompileTask::print_inlining(&ss, method, inline_level, bci, msg);
print_inlining_stream()->print(ss.as_string());
}
private:
// Matching, CFG layout, allocation, code generation
PhaseCFG* _cfg; // Results of CFG finding
bool _select_24_bit_instr; // We selected an instruction with a 24-bit result
@ -591,7 +647,7 @@ class Compile : public Phase {
void reset_dead_node_list() { _dead_node_list.Reset();
_dead_node_count = 0;
}
uint live_nodes() {
uint live_nodes() const {
int val = _unique - _dead_node_count;
assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count));
return (uint) val;
@ -702,7 +758,7 @@ class Compile : public Phase {
void identify_useful_nodes(Unique_Node_List &useful);
void update_dead_node_list(Unique_Node_List &useful);
void remove_useless_nodes (Unique_Node_List &useful);
void remove_useless_nodes (Unique_Node_List &useful);
WarmCallInfo* warm_calls() const { return _warm_calls; }
void set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
@ -711,6 +767,8 @@ class Compile : public Phase {
// Record this CallGenerator for inlining at the end of parsing.
void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
void dump_inlining();
// Matching, CFG layout, allocation, code generation
PhaseCFG* cfg() { return _cfg; }
bool select_24_bit_instr() const { return _select_24_bit_instr; }

View File

@ -40,19 +40,24 @@
#include "prims/nativeLookup.hpp"
#include "runtime/sharedRuntime.hpp"
void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) {
outputStream* out = tty;
if (!PrintInlining) {
if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
method->print_short_name();
tty->cr();
}
CompileTask::print_inlining(prof_method, depth, bci);
} else {
out = C->print_inlining_stream();
}
CompileTask::print_inline_indent(depth);
tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
prof_klass->name()->print_symbol();
tty->cr();
CompileTask::print_inline_indent(depth, out);
out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
stringStream ss;
prof_klass->name()->print_symbol_on(&ss);
out->print(ss.as_string());
out->cr();
}
}
@ -233,13 +238,13 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
}
if (miss_cg != NULL) {
if (next_hit_cg != NULL) {
trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
// We don't need to record dependency on a receiver here and below.
// Whenever we inline, the dependency is added by Parse::Parse().
miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
}
if (miss_cg != NULL) {
trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
if (cg != NULL) return cg;
}

View File

@ -1771,11 +1771,21 @@ void GraphKit::replace_call(CallNode* call, Node* result) {
CallProjections callprojs;
call->extract_projections(&callprojs, true);
// Replace all the old call edges with the edges from the inlining result
C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control));
C->gvn_replace_by(callprojs.fallthrough_memproj, final_state->in(TypeFunc::Memory));
C->gvn_replace_by(callprojs.fallthrough_ioproj, final_state->in(TypeFunc::I_O));
Node* init_mem = call->in(TypeFunc::Memory);
Node* final_mem = final_state->in(TypeFunc::Memory);
Node* final_ctl = final_state->in(TypeFunc::Control);
Node* final_io = final_state->in(TypeFunc::I_O);
// Replace all the old call edges with the edges from the inlining result
if (callprojs.fallthrough_catchproj != NULL) {
C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl);
}
if (callprojs.fallthrough_memproj != NULL) {
C->gvn_replace_by(callprojs.fallthrough_memproj, final_mem);
}
if (callprojs.fallthrough_ioproj != NULL) {
C->gvn_replace_by(callprojs.fallthrough_ioproj, final_io);
}
// Replace the result with the new result if it exists and is used
if (callprojs.resproj != NULL && result != NULL) {
@ -2980,7 +2990,7 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) );
// create memory projection for i_o
set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
make_slow_call_ex(allocx, env()->Throwable_klass(), true);
// create a memory projection as for the normal control path
Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory));

View File

@ -282,6 +282,7 @@ class LibraryCallKit : public GraphKit {
typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind);
bool inline_unsafe_ordered_store(BasicType type);
bool inline_unsafe_fence(vmIntrinsics::ID id);
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_number_methods(vmIntrinsics::ID id);
bool inline_reference_get();
@ -334,6 +335,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
case vmIntrinsics::_getAndSetInt:
case vmIntrinsics::_getAndSetLong:
case vmIntrinsics::_getAndSetObject:
case vmIntrinsics::_loadFence:
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence:
break; // InlineNatives does not control String.compareTo
case vmIntrinsics::_Reference_get:
break; // InlineNatives does not control Reference.get
@ -536,7 +540,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
// Try to inline the intrinsic.
if (kit.try_to_inline()) {
if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
if (C->log()) {
@ -555,7 +559,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
if (jvms->has_method()) {
// Not a root compile.
const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
CompileTask::print_inlining(callee, jvms->depth() - 1, bci, msg);
C->print_inlining(callee, jvms->depth() - 1, bci, msg);
} else {
// Root compile
tty->print("Did not generate intrinsic %s%s at bci:%d in",
@ -585,7 +589,7 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
Node* slow_ctl = kit.try_to_predicate();
if (!kit.failing()) {
if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
if (C->log()) {
@ -602,12 +606,12 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
if (jvms->has_method()) {
// Not a root compile.
const char* msg = "failed to generate predicate for intrinsic";
CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
} else {
// Root compile
tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " (virtual)" : ""), bci);
C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " (virtual)" : ""), bci);
}
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
@ -732,6 +736,10 @@ bool LibraryCallKit::try_to_inline() {
case vmIntrinsics::_getAndSetLong: return inline_unsafe_load_store(T_LONG, LS_xchg);
case vmIntrinsics::_getAndSetObject: return inline_unsafe_load_store(T_OBJECT, LS_xchg);
case vmIntrinsics::_loadFence:
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence: return inline_unsafe_fence(intrinsic_id());
case vmIntrinsics::_currentThread: return inline_native_currentThread();
case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted();
@ -2840,6 +2848,26 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
return true;
}
bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
// Regardless of form, don't allow previous ld/st to move down,
// then issue acquire, release, or volatile mem_bar.
insert_mem_bar(Op_MemBarCPUOrder);
switch(id) {
case vmIntrinsics::_loadFence:
insert_mem_bar(Op_MemBarAcquire);
return true;
case vmIntrinsics::_storeFence:
insert_mem_bar(Op_MemBarRelease);
return true;
case vmIntrinsics::_fullFence:
insert_mem_bar(Op_MemBarVolatile);
return true;
default:
fatal_unexpected_iid(id);
return false;
}
}
//----------------------------inline_unsafe_allocate---------------------------
// public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
bool LibraryCallKit::inline_unsafe_allocate() {
@ -2952,14 +2980,23 @@ bool LibraryCallKit::inline_native_isInterrupted() {
// We only go to the fast case code if we pass two guards.
// Paths which do not pass are accumulated in the slow_region.
enum {
no_int_result_path = 1, // t == Thread.current() && !TLS._osthread._interrupted
no_clear_result_path = 2, // t == Thread.current() && TLS._osthread._interrupted && !clear_int
slow_result_path = 3, // slow path: t.isInterrupted(clear_int)
PATH_LIMIT
};
// Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
// out of the function.
insert_mem_bar(Op_MemBarCPUOrder);
RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
RegionNode* slow_region = new (C) RegionNode(1);
record_for_igvn(slow_region);
RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow
PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
enum { no_int_result_path = 1,
no_clear_result_path = 2,
slow_result_path = 3
};
// (a) Receiving thread must be the current thread.
Node* rec_thr = argument(0);
@ -2968,14 +3005,13 @@ bool LibraryCallKit::inline_native_isInterrupted() {
Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
if (!known_current_thread)
generate_slow_guard(bol_thr, slow_region);
generate_slow_guard(bol_thr, slow_region);
// (b) Interrupt bit on TLS must be false.
Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
// Set the control input on the field _interrupted read to prevent it floating up.
Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
@ -3020,22 +3056,20 @@ bool LibraryCallKit::inline_native_isInterrupted() {
Node* slow_val = set_results_for_java_call(slow_call);
// this->control() comes from set_results_for_java_call
// If we know that the result of the slow call will be true, tell the optimizer!
if (known_current_thread) slow_val = intcon(1);
Node* fast_io = slow_call->in(TypeFunc::I_O);
Node* fast_mem = slow_call->in(TypeFunc::Memory);
// These two phis are pre-filled with copies of of the fast IO and Memory
Node* io_phi = PhiNode::make(result_rgn, fast_io, Type::ABIO);
Node* mem_phi = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
PhiNode* result_mem = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
PhiNode* result_io = PhiNode::make(result_rgn, fast_io, Type::ABIO);
result_rgn->init_req(slow_result_path, control());
io_phi ->init_req(slow_result_path, i_o());
mem_phi ->init_req(slow_result_path, reset_memory());
result_io ->init_req(slow_result_path, i_o());
result_mem->init_req(slow_result_path, reset_memory());
result_val->init_req(slow_result_path, slow_val);
set_all_memory( _gvn.transform(mem_phi) );
set_i_o( _gvn.transform(io_phi) );
set_all_memory(_gvn.transform(result_mem));
set_i_o( _gvn.transform(result_io));
}
C->set_has_split_ifs(true); // Has chance for split-if optimization
@ -3319,7 +3353,7 @@ bool LibraryCallKit::inline_native_subtype_check() {
Node* arg = args[which_arg];
arg = null_check(arg);
if (stopped()) break;
args[which_arg] = _gvn.transform(arg);
args[which_arg] = arg;
Node* p = basic_plus_adr(arg, class_klass_offset);
Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);

View File

@ -509,6 +509,7 @@ void Parse::do_multianewarray() {
makecon(TypeKlassPtr::make(array_klass)),
dims);
}
make_slow_call_ex(c, env()->Throwable_klass(), false);
Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms));

View File

@ -989,7 +989,7 @@ JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* t
// since we're notifying the VM on every catch.
// Force deoptimization and the rest of the lookup
// will be fine.
deoptimize_caller_frame(thread, true);
deoptimize_caller_frame(thread);
}
// Check the stack guard pages. If enabled, look for handler in this frame;
@ -1143,19 +1143,24 @@ const TypeFunc *OptoRuntime::rethrow_Type() {
void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
// Deoptimize frame
if (doit) {
// Called from within the owner thread, so no need for safepoint
RegisterMap reg_map(thread);
frame stub_frame = thread->last_frame();
assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
frame caller_frame = stub_frame.sender(&reg_map);
// Deoptimize the caller frame.
Deoptimization::deoptimize_frame(thread, caller_frame.id());
// Deoptimize the caller before continuing, as the compiled
// exception handler table may not be valid.
if (!StressCompiledExceptionHandlers && doit) {
deoptimize_caller_frame(thread);
}
}
void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
// Called from within the owner thread, so no need for safepoint
RegisterMap reg_map(thread);
frame stub_frame = thread->last_frame();
assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
frame caller_frame = stub_frame.sender(&reg_map);
// Deoptimize the caller frame.
Deoptimization::deoptimize_frame(thread, caller_frame.id());
}
bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) {
// Called from within the owner thread, so no need for safepoint

View File

@ -174,6 +174,7 @@ private:
static address handle_exception_C (JavaThread* thread);
static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc );
static void deoptimize_caller_frame (JavaThread *thread);
static void deoptimize_caller_frame (JavaThread *thread, bool doit);
static bool is_deoptimized_caller_frame (JavaThread *thread);

View File

@ -744,7 +744,9 @@ bool StringConcat::validate_control_flow() {
ctrl_path.push(cn);
ctrl_path.push(cn->proj_out(0));
ctrl_path.push(cn->proj_out(0)->unique_out());
ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) {
ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
}
} else {
ShouldNotReachHere();
}
@ -762,6 +764,12 @@ bool StringConcat::validate_control_flow() {
} else if (ptr->is_IfTrue()) {
IfNode* iff = ptr->in(0)->as_If();
BoolNode* b = iff->in(1)->isa_Bool();
if (b == NULL) {
fail = true;
break;
}
Node* cmp = b->in(1);
Node* v1 = cmp->in(1);
Node* v2 = cmp->in(2);
@ -1408,71 +1416,76 @@ void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
Deoptimization::Action_make_not_entrant);
}
// length now contains the number of characters needed for the
// char[] so create a new AllocateArray for the char[]
Node* char_array = NULL;
{
PreserveReexecuteState preexecs(&kit);
// The original jvms is for an allocation of either a String or
// StringBuffer so no stack adjustment is necessary for proper
// reexecution. If we deoptimize in the slow path the bytecode
// will be reexecuted and the char[] allocation will be thrown away.
kit.jvms()->set_should_reexecute(true);
char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
length, 1);
}
Node* result;
if (!kit.stopped()) {
// Mark the allocation so that zeroing is skipped since the code
// below will overwrite the entire array
AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
char_alloc->maybe_set_complete(_gvn);
// Now copy the string representations into the final char[]
Node* start = __ intcon(0);
for (int argi = 0; argi < sc->num_arguments(); argi++) {
Node* arg = sc->argument(argi);
switch (sc->mode(argi)) {
case StringConcat::IntMode: {
Node* end = __ AddI(start, string_sizes->in(argi));
// getChars words backwards so pass the ending point as well as the start
int_getChars(kit, arg, char_array, start, end);
start = end;
break;
}
case StringConcat::StringNullCheckMode:
case StringConcat::StringMode: {
start = copy_string(kit, arg, char_array, start);
break;
}
case StringConcat::CharMode: {
__ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
arg, T_CHAR, char_adr_idx);
start = __ AddI(start, __ intcon(1));
break;
}
default:
ShouldNotReachHere();
// length now contains the number of characters needed for the
// char[] so create a new AllocateArray for the char[]
Node* char_array = NULL;
{
PreserveReexecuteState preexecs(&kit);
// The original jvms is for an allocation of either a String or
// StringBuffer so no stack adjustment is necessary for proper
// reexecution. If we deoptimize in the slow path the bytecode
// will be reexecuted and the char[] allocation will be thrown away.
kit.jvms()->set_should_reexecute(true);
char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
length, 1);
}
}
// If we're not reusing an existing String allocation then allocate one here.
Node* result = sc->string_alloc();
if (result == NULL) {
PreserveReexecuteState preexecs(&kit);
// The original jvms is for an allocation of either a String or
// StringBuffer so no stack adjustment is necessary for proper
// reexecution.
kit.jvms()->set_should_reexecute(true);
result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
}
// Mark the allocation so that zeroing is skipped since the code
// below will overwrite the entire array
AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
char_alloc->maybe_set_complete(_gvn);
// Intialize the string
if (java_lang_String::has_offset_field()) {
kit.store_String_offset(kit.control(), result, __ intcon(0));
kit.store_String_length(kit.control(), result, length);
}
kit.store_String_value(kit.control(), result, char_array);
// Now copy the string representations into the final char[]
Node* start = __ intcon(0);
for (int argi = 0; argi < sc->num_arguments(); argi++) {
Node* arg = sc->argument(argi);
switch (sc->mode(argi)) {
case StringConcat::IntMode: {
Node* end = __ AddI(start, string_sizes->in(argi));
// getChars words backwards so pass the ending point as well as the start
int_getChars(kit, arg, char_array, start, end);
start = end;
break;
}
case StringConcat::StringNullCheckMode:
case StringConcat::StringMode: {
start = copy_string(kit, arg, char_array, start);
break;
}
case StringConcat::CharMode: {
__ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
arg, T_CHAR, char_adr_idx);
start = __ AddI(start, __ intcon(1));
break;
}
default:
ShouldNotReachHere();
}
}
// If we're not reusing an existing String allocation then allocate one here.
result = sc->string_alloc();
if (result == NULL) {
PreserveReexecuteState preexecs(&kit);
// The original jvms is for an allocation of either a String or
// StringBuffer so no stack adjustment is necessary for proper
// reexecution.
kit.jvms()->set_should_reexecute(true);
result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
}
// Intialize the string
if (java_lang_String::has_offset_field()) {
kit.store_String_offset(kit.control(), result, __ intcon(0));
kit.store_String_length(kit.control(), result, length);
}
kit.store_String_value(kit.control(), result, char_array);
} else {
result = C->top();
}
// hook up the outgoing control and result
kit.replace_call(sc->end(), result);

View File

@ -1168,8 +1168,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject
// Walk all nmethods depending on this call site.
MutexLocker mu(Compile_lock, thread);
Universe::flush_dependents_on(call_site, target);
java_lang_invoke_CallSite::set_target(call_site(), target());
}
java_lang_invoke_CallSite::set_target(call_site(), target());
}
JVM_END
@ -1180,8 +1180,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobjec
// Walk all nmethods depending on this call site.
MutexLocker mu(Compile_lock, thread);
Universe::flush_dependents_on(call_site, target);
java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
}
java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
}
JVM_END

View File

@ -468,6 +468,21 @@ UNSAFE_ENTRY(void, Unsafe_SetOrderedLong(JNIEnv *env, jobject unsafe, jobject ob
#endif
UNSAFE_END
UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe))
UnsafeWrapper("Unsafe_LoadFence");
OrderAccess::acquire();
UNSAFE_END
UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe))
UnsafeWrapper("Unsafe_StoreFence");
OrderAccess::release();
UNSAFE_END
UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe))
UnsafeWrapper("Unsafe_FullFence");
OrderAccess::fence();
UNSAFE_END
////// Data in the C heap.
// Note: These do not throw NullPointerException for bad pointers.
@ -1550,6 +1565,9 @@ static JNINativeMethod methods[] = {
{CC"putOrderedObject", CC"("OBJ"J"OBJ")V", FN_PTR(Unsafe_SetOrderedObject)},
{CC"putOrderedInt", CC"("OBJ"JI)V", FN_PTR(Unsafe_SetOrderedInt)},
{CC"putOrderedLong", CC"("OBJ"JJ)V", FN_PTR(Unsafe_SetOrderedLong)},
{CC"loadFence", CC"()V", FN_PTR(Unsafe_LoadFence)},
{CC"storeFence", CC"()V", FN_PTR(Unsafe_StoreFence)},
{CC"fullFence", CC"()V", FN_PTR(Unsafe_FullFence)},
{CC"park", CC"(ZJ)V", FN_PTR(Unsafe_Park)},
{CC"unpark", CC"("OBJ")V", FN_PTR(Unsafe_Unpark)}

View File

@ -922,6 +922,9 @@ class CommandLineFlags {
develop(bool, PrintExceptionHandlers, false, \
"Print exception handler tables for all nmethods when generated") \
\
develop(bool, StressCompiledExceptionHandlers, false, \
"Exercise compiled exception handlers") \
\
develop(bool, InterceptOSException, false, \
"Starts debugger when an implicit OS (e.g., NULL) " \
"exception happens") \

View File

@ -2190,7 +2190,7 @@ void JavaThread::send_thread_stop(oop java_throwable) {
// BiasedLocking needs an updated RegisterMap for the revoke monitors pass
RegisterMap reg_map(this, UseBiasedLocking);
frame compiled_frame = f.sender(&reg_map);
if (compiled_frame.can_be_deoptimized()) {
if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) {
Deoptimization::deoptimize(this, compiled_frame, &reg_map);
}
}

View File

@ -54,7 +54,6 @@ abstract public class TestAESBase {
String paddingStr = "PKCS5Padding";
AlgorithmParameters algParams;
SecretKey key;
int ivLen;
static int numThreads = 0;
int threadId;
@ -68,7 +67,7 @@ abstract public class TestAESBase {
public void prepare() {
try {
System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput);
int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
byte keyBytes[] = new byte[keyLenBytes];
@ -90,10 +89,14 @@ abstract public class TestAESBase {
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
if (mode.equals("CBC")) {
int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
} else {
algParams = cipher.getParameters();
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
}
algParams = cipher.getParameters();
dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
if (threadId == 0) {

View File

@ -27,7 +27,8 @@
* @bug 7184394
* @summary add intrinsics to use AES instructions
*
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain
*
* @author Tom Deneau
*/

View File

@ -0,0 +1,94 @@
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test Test8004741.java
* @bug 8004741
* @summary Missing compiled exception handle table entry for multidimensional array allocation
* @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741
*
*/
import java.util.*;
public class Test8004741 extends Thread {
static int[][] test(int a, int b) throws Exception {
int[][] ar = null;
try {
ar = new int[a][b];
} catch (Error e) {
System.out.println("test got Error");
passed = true;
throw(e);
} catch (Exception e) {
System.out.println("test got Exception");
throw(e);
}
return ar;
}
static boolean passed = false;
public void run() {
System.out.println("test started");
try {
while(true) {
test(2,20000);
}
} catch (ThreadDeath e) {
System.out.println("test got ThreadDeath");
passed = true;
} catch (Error e) {
e.printStackTrace();
System.out.println("test got Error");
} catch (Exception e) {
e.printStackTrace();
System.out.println("test got Exception");
}
}
public static void main(String[] args) throws Exception {
for (int n = 0; n < 11000; n++) {
test(2, 20);
}
// First test exception catch
Test8004741 t = new Test8004741();
passed = false;
t.start();
Thread.sleep(1000);
t.stop();
Thread.sleep(5000);
t.join();
if (passed) {
System.out.println("PASSED");
} else {
System.out.println("FAILED");
System.exit(97);
}
}
};

View File

@ -0,0 +1,50 @@
/*
* Copyright 2012 SAP AG. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8005033
* @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow.
* @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033
* @author Richard Reingruber richard DOT reingruber AT sap DOT com
*/
public class Test8005033 {
public static int MINUS_ONE = -1;
public static void main(String[] args) {
System.out.println("EXECUTING test.");
Integer.bitCount(1); // load class
int expectedBitCount = 0;
int calculatedBitCount = testBitCount();
if (expectedBitCount != calculatedBitCount) {
throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount);
}
System.out.println("SUCCESSFULLY passed test.");
}
// testBitCount will be compiled using the Integer.bitCount() intrinsic if possible
private static int testBitCount() {
return Integer.bitCount(MINUS_ONE+1); // -1 + 1 => int overflow
}
}