mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-17 03:13:11 +00:00
Merge
This commit is contained in:
commit
42c79d741b
@ -298,7 +298,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
|
||||
for (int i = 0; i < _bytes_to_copy; i++) {
|
||||
address ptr = (address)(_pc_start + i);
|
||||
int a_byte = (*ptr) & 0xFF;
|
||||
__ a_byte (a_byte);
|
||||
__ emit_int8 (a_byte);
|
||||
}
|
||||
}
|
||||
|
||||
@ -340,10 +340,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
|
||||
int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
|
||||
|
||||
// Emit the patch record. We need to emit a full word, so emit an extra empty byte
|
||||
__ a_byte(0);
|
||||
__ a_byte(being_initialized_entry_offset);
|
||||
__ a_byte(bytes_to_skip);
|
||||
__ a_byte(_bytes_to_copy);
|
||||
__ emit_int8(0);
|
||||
__ emit_int8(being_initialized_entry_offset);
|
||||
__ emit_int8(bytes_to_skip);
|
||||
__ emit_int8(_bytes_to_copy);
|
||||
address patch_info_pc = __ pc();
|
||||
assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
|
||||
|
||||
|
||||
@ -100,34 +100,6 @@ const char* Argument::name() const {
|
||||
bool AbstractAssembler::pd_check_instruction_mark() { return false; }
|
||||
#endif
|
||||
|
||||
|
||||
void MacroAssembler::print_instruction(int inst) {
|
||||
const char* s;
|
||||
switch (inv_op(inst)) {
|
||||
default: s = "????"; break;
|
||||
case call_op: s = "call"; break;
|
||||
case branch_op:
|
||||
switch (inv_op2(inst)) {
|
||||
case fb_op2: s = "fb"; break;
|
||||
case fbp_op2: s = "fbp"; break;
|
||||
case br_op2: s = "br"; break;
|
||||
case bp_op2: s = "bp"; break;
|
||||
case cb_op2: s = "cb"; break;
|
||||
case bpr_op2: {
|
||||
if (is_cbcond(inst)) {
|
||||
s = is_cxb(inst) ? "cxb" : "cwb";
|
||||
} else {
|
||||
s = "bpr";
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: s = "????"; break;
|
||||
}
|
||||
}
|
||||
::tty->print("%s", s);
|
||||
}
|
||||
|
||||
|
||||
// Patch instruction inst at offset inst_pos to refer to dest_pos
|
||||
// and return the resulting instruction.
|
||||
// We should have pcs, not offsets, but since all is relative, it will work out
|
||||
|
||||
@ -603,7 +603,6 @@ class MacroAssembler : public Assembler {
|
||||
friend class Label;
|
||||
|
||||
protected:
|
||||
static void print_instruction(int inst);
|
||||
static int patched_branch(int dest_pos, int inst, int inst_pos);
|
||||
static int branch_destination(int inst, int pos);
|
||||
|
||||
@ -759,9 +758,6 @@ class MacroAssembler : public Assembler {
|
||||
// Required platform-specific helpers for Label::patch_instructions.
|
||||
// They _shadow_ the declarations in AbstractAssembler, which are undefined.
|
||||
void pd_patch_instruction(address branch, address target);
|
||||
#ifndef PRODUCT
|
||||
static void pd_print_patched_instruction(address branch);
|
||||
#endif
|
||||
|
||||
// sethi Macro handles optimizations and relocations
|
||||
private:
|
||||
|
||||
@ -43,14 +43,6 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target)
|
||||
stub_inst = patched_branch(target - branch, stub_inst, 0);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
inline void MacroAssembler::pd_print_patched_instruction(address branch) {
|
||||
jint stub_inst = *(jint*) branch;
|
||||
print_instruction(stub_inst);
|
||||
::tty->print("%s", " (unresolved)");
|
||||
}
|
||||
#endif // PRODUCT
|
||||
|
||||
// Use the right loads/stores for the platform
|
||||
inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
|
||||
#ifdef _LP64
|
||||
|
||||
@ -10224,7 +10224,7 @@ instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result
|
||||
|
||||
//---------- Zeros Count Instructions ------------------------------------------
|
||||
|
||||
instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
|
||||
instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{
|
||||
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
|
||||
match(Set dst (CountLeadingZerosI src));
|
||||
effect(TEMP dst, TEMP tmp, KILL cr);
|
||||
@ -10321,7 +10321,7 @@ instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
|
||||
instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{
|
||||
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
|
||||
match(Set dst (CountTrailingZerosI src));
|
||||
effect(TEMP dst, KILL cr);
|
||||
@ -10364,19 +10364,21 @@ instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{
|
||||
|
||||
//---------- Population Count Instructions -------------------------------------
|
||||
|
||||
instruct popCountI(iRegI dst, iRegI src) %{
|
||||
instruct popCountI(iRegIsafe dst, iRegI src) %{
|
||||
predicate(UsePopCountInstruction);
|
||||
match(Set dst (PopCountI src));
|
||||
|
||||
format %{ "POPC $src, $dst" %}
|
||||
format %{ "SRL $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t"
|
||||
"POPC $dst, $dst" %}
|
||||
ins_encode %{
|
||||
__ popc($src$$Register, $dst$$Register);
|
||||
__ srl($src$$Register, G0, $dst$$Register);
|
||||
__ popc($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
// Note: Long.bitCount(long) returns an int.
|
||||
instruct popCountL(iRegI dst, iRegL src) %{
|
||||
instruct popCountL(iRegIsafe dst, iRegL src) %{
|
||||
predicate(UsePopCountInstruction);
|
||||
match(Set dst (PopCountL src));
|
||||
|
||||
|
||||
@ -434,7 +434,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe
|
||||
|
||||
// the frame is greater than one page in size, so check against
|
||||
// the bottom of the stack
|
||||
__ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
|
||||
__ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check);
|
||||
|
||||
// the stack will overflow, throw an exception
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -313,10 +313,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
|
||||
#endif
|
||||
} else {
|
||||
// make a copy the code which is going to be patched.
|
||||
for ( int i = 0; i < _bytes_to_copy; i++) {
|
||||
for (int i = 0; i < _bytes_to_copy; i++) {
|
||||
address ptr = (address)(_pc_start + i);
|
||||
int a_byte = (*ptr) & 0xFF;
|
||||
__ a_byte (a_byte);
|
||||
__ emit_int8(a_byte);
|
||||
*ptr = 0x90; // make the site look like a nop
|
||||
}
|
||||
}
|
||||
@ -363,11 +363,11 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
|
||||
// emit the offsets needed to find the code to patch
|
||||
int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
|
||||
|
||||
__ a_byte(0xB8);
|
||||
__ a_byte(0);
|
||||
__ a_byte(being_initialized_entry_offset);
|
||||
__ a_byte(bytes_to_skip);
|
||||
__ a_byte(_bytes_to_copy);
|
||||
__ emit_int8((unsigned char)0xB8);
|
||||
__ emit_int8(0);
|
||||
__ emit_int8(being_initialized_entry_offset);
|
||||
__ emit_int8(bytes_to_skip);
|
||||
__ emit_int8(_bytes_to_copy);
|
||||
address patch_info_pc = __ pc();
|
||||
assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
|
||||
|
||||
|
||||
@ -1023,7 +1023,7 @@ void MacroAssembler::lea(Address dst, AddressLiteral adr) {
|
||||
|
||||
void MacroAssembler::leave() {
|
||||
// %%% is this really better? Why not on 32bit too?
|
||||
emit_byte(0xC9); // LEAVE
|
||||
emit_int8((unsigned char)0xC9); // LEAVE
|
||||
}
|
||||
|
||||
void MacroAssembler::lneg(Register hi, Register lo) {
|
||||
@ -2112,11 +2112,11 @@ void MacroAssembler::fat_nop() {
|
||||
if (UseAddressNop) {
|
||||
addr_nop_5();
|
||||
} else {
|
||||
emit_byte(0x26); // es:
|
||||
emit_byte(0x2e); // cs:
|
||||
emit_byte(0x64); // fs:
|
||||
emit_byte(0x65); // gs:
|
||||
emit_byte(0x90);
|
||||
emit_int8(0x26); // es:
|
||||
emit_int8(0x2e); // cs:
|
||||
emit_int8(0x64); // fs:
|
||||
emit_int8(0x65); // gs:
|
||||
emit_int8((unsigned char)0x90);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2534,12 +2534,12 @@ void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
|
||||
int offs = (intptr_t)dst.target() - ((intptr_t)pc());
|
||||
if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
|
||||
// 0111 tttn #8-bit disp
|
||||
emit_byte(0x70 | cc);
|
||||
emit_byte((offs - short_size) & 0xFF);
|
||||
emit_int8(0x70 | cc);
|
||||
emit_int8((offs - short_size) & 0xFF);
|
||||
} else {
|
||||
// 0000 1111 1000 tttn #32-bit disp
|
||||
emit_byte(0x0F);
|
||||
emit_byte(0x80 | cc);
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)(0x80 | cc));
|
||||
emit_long(offs - long_size);
|
||||
}
|
||||
} else {
|
||||
@ -3085,7 +3085,8 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
|
||||
|
||||
void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
|
||||
// Used in sign-bit flipping with aligned address.
|
||||
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
|
||||
bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
|
||||
assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes");
|
||||
if (reachable(src)) {
|
||||
Assembler::pshufb(dst, as_Address(src));
|
||||
} else {
|
||||
|
||||
@ -126,25 +126,6 @@ class MacroAssembler: public Assembler {
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
static void pd_print_patched_instruction(address branch) {
|
||||
const char* s;
|
||||
unsigned char op = branch[0];
|
||||
if (op == 0xE8) {
|
||||
s = "call";
|
||||
} else if (op == 0xE9 || op == 0xEB) {
|
||||
s = "jmp";
|
||||
} else if ((op & 0xF0) == 0x70) {
|
||||
s = "jcc";
|
||||
} else if (op == 0x0F) {
|
||||
s = "jcc";
|
||||
} else {
|
||||
s = "????";
|
||||
}
|
||||
tty->print("%s (unresolved)", s);
|
||||
}
|
||||
#endif
|
||||
|
||||
// The following 4 methods return the offset of the appropriate move instruction
|
||||
|
||||
// Support for fast byte/short loading with zero extension (depending on particular CPU)
|
||||
|
||||
@ -2174,13 +2174,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
//
|
||||
address generate_aescrypt_encryptBlock() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
|
||||
Label L_doLast;
|
||||
address start = __ pc();
|
||||
|
||||
const Register from = rsi; // source array address
|
||||
const Register from = rdx; // source array address
|
||||
const Register to = rdx; // destination array address
|
||||
const Register key = rcx; // key array address
|
||||
const Register keylen = rax;
|
||||
@ -2189,47 +2189,74 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Address key_param (rbp, 8+8);
|
||||
|
||||
const XMMRegister xmm_result = xmm0;
|
||||
const XMMRegister xmm_temp = xmm1;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm2;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1;
|
||||
const XMMRegister xmm_temp1 = xmm2;
|
||||
const XMMRegister xmm_temp2 = xmm3;
|
||||
const XMMRegister xmm_temp3 = xmm4;
|
||||
const XMMRegister xmm_temp4 = xmm5;
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ push(rsi);
|
||||
__ movptr(from , from_param);
|
||||
__ movptr(to , to_param);
|
||||
__ movptr(key , key_param);
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ movptr(from, from_param);
|
||||
__ movptr(key, key_param);
|
||||
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
// keylen = # of 32-bit words, convert to 128-bit words
|
||||
__ shrl(keylen, 2);
|
||||
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
|
||||
__ movptr(to, to_param);
|
||||
|
||||
// For encryption, the java expanded key ordering is just what we need
|
||||
|
||||
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
|
||||
__ pxor(xmm_result, xmm_temp);
|
||||
for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
|
||||
aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
|
||||
}
|
||||
load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
|
||||
__ cmpl(keylen, 0);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
__ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
|
||||
__ subl(keylen, 2);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
__ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
|
||||
__ pxor(xmm_result, xmm_temp1);
|
||||
|
||||
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
__ aesenc(xmm_result, xmm_temp3);
|
||||
__ aesenc(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
__ aesenc(xmm_result, xmm_temp3);
|
||||
__ aesenc(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 44);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 52);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
|
||||
|
||||
__ BIND(L_doLast);
|
||||
__ aesenclast(xmm_result, xmm_temp);
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenclast(xmm_result, xmm_temp2);
|
||||
__ movdqu(Address(to, 0), xmm_result); // store the result
|
||||
__ xorptr(rax, rax); // return 0
|
||||
__ pop(rsi);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
@ -2245,13 +2272,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
//
|
||||
address generate_aescrypt_decryptBlock() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
|
||||
Label L_doLast;
|
||||
address start = __ pc();
|
||||
|
||||
const Register from = rsi; // source array address
|
||||
const Register from = rdx; // source array address
|
||||
const Register to = rdx; // destination array address
|
||||
const Register key = rcx; // key array address
|
||||
const Register keylen = rax;
|
||||
@ -2260,51 +2287,76 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Address key_param (rbp, 8+8);
|
||||
|
||||
const XMMRegister xmm_result = xmm0;
|
||||
const XMMRegister xmm_temp = xmm1;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm2;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1;
|
||||
const XMMRegister xmm_temp1 = xmm2;
|
||||
const XMMRegister xmm_temp2 = xmm3;
|
||||
const XMMRegister xmm_temp3 = xmm4;
|
||||
const XMMRegister xmm_temp4 = xmm5;
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ push(rsi);
|
||||
__ movptr(from , from_param);
|
||||
__ movptr(to , to_param);
|
||||
__ movptr(key , key_param);
|
||||
__ movptr(from, from_param);
|
||||
__ movptr(key, key_param);
|
||||
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
// keylen = # of 32-bit words, convert to 128-bit words
|
||||
__ shrl(keylen, 2);
|
||||
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_result, Address(from, 0));
|
||||
__ movptr(to, to_param);
|
||||
|
||||
// for decryption java expanded key ordering is rotated one position from what we want
|
||||
// so we start from 0x10 here and hit 0x00 last
|
||||
// we don't know if the key is aligned, hence not using load-execute form
|
||||
load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
|
||||
__ pxor (xmm_result, xmm_temp);
|
||||
for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
|
||||
aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
|
||||
}
|
||||
__ cmpl(keylen, 0);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
// only in 192 and 256 bit keys
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
|
||||
__ subl(keylen, 2);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
// only in 256 bit keys
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
|
||||
|
||||
__ pxor (xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
__ aesdec(xmm_result, xmm_temp3);
|
||||
__ aesdec(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
|
||||
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
__ aesdec(xmm_result, xmm_temp3);
|
||||
__ aesdec(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 44);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 52);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
|
||||
|
||||
__ BIND(L_doLast);
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
|
||||
// for decryption the aesdeclast operation is always on key+0x00
|
||||
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
|
||||
__ aesdeclast(xmm_result, xmm_temp);
|
||||
|
||||
__ aesdeclast(xmm_result, xmm_temp3);
|
||||
__ movdqu(Address(to, 0), xmm_result); // store the result
|
||||
|
||||
__ xorptr(rax, rax); // return 0
|
||||
__ pop(rsi);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
@ -2340,7 +2392,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
address generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
|
||||
address start = __ pc();
|
||||
@ -2393,7 +2445,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ jcc(Assembler::notEqual, L_key_192_256);
|
||||
|
||||
// 128 bit code follows here
|
||||
__ movptr(pos, 0);
|
||||
__ movl(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loopTop_128);
|
||||
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
|
||||
@ -2423,15 +2475,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
__ BIND(L_key_192_256);
|
||||
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
|
||||
__ BIND(L_key_192_256);
|
||||
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
|
||||
__ cmpl(rax, 52);
|
||||
__ jcc(Assembler::notEqual, L_key_256);
|
||||
|
||||
// 192-bit code follows here (could be changed to use more xmm registers)
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loopTop_192);
|
||||
__ movl(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loopTop_192);
|
||||
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
|
||||
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
|
||||
|
||||
@ -2452,11 +2504,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ jcc(Assembler::notEqual, L_loopTop_192);
|
||||
__ jmp(L_exit);
|
||||
|
||||
__ BIND(L_key_256);
|
||||
__ BIND(L_key_256);
|
||||
// 256-bit code follows here (could be changed to use more xmm registers)
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loopTop_256);
|
||||
__ movl(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loopTop_256);
|
||||
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
|
||||
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
|
||||
|
||||
@ -2495,7 +2547,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//
|
||||
|
||||
address generate_cipherBlockChaining_decryptAESCrypt() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
|
||||
address start = __ pc();
|
||||
@ -2556,9 +2608,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
|
||||
// 128-bit code follows here, parallelized
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_singleBlock_loopTop_128);
|
||||
__ movl(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_singleBlock_loopTop_128);
|
||||
__ cmpptr(len_reg, 0); // any blocks left??
|
||||
__ jcc(Assembler::equal, L_exit);
|
||||
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
|
||||
@ -2597,7 +2649,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ jcc(Assembler::notEqual, L_key_256);
|
||||
|
||||
// 192-bit code follows here (could be optimized to use parallelism)
|
||||
__ movptr(pos, 0);
|
||||
__ movl(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_singleBlock_loopTop_192);
|
||||
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
|
||||
@ -2622,7 +2674,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ BIND(L_key_256);
|
||||
// 256-bit code follows here (could be optimized to use parallelism)
|
||||
__ movptr(pos, 0);
|
||||
__ movl(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_singleBlock_loopTop_256);
|
||||
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
|
||||
|
||||
@ -2953,21 +2953,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
// aesenc using specified key+offset
|
||||
// can optionally specify that the shuffle mask is already in an xmmregister
|
||||
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
|
||||
load_key(xmmtmp, key, offset, xmm_shuf_mask);
|
||||
__ aesenc(xmmdst, xmmtmp);
|
||||
}
|
||||
|
||||
// aesdec using specified key+offset
|
||||
// can optionally specify that the shuffle mask is already in an xmmregister
|
||||
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
|
||||
load_key(xmmtmp, key, offset, xmm_shuf_mask);
|
||||
__ aesdec(xmmdst, xmmtmp);
|
||||
}
|
||||
|
||||
|
||||
// Arguments:
|
||||
//
|
||||
// Inputs:
|
||||
@ -2976,7 +2961,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
//
|
||||
address generate_aescrypt_encryptBlock() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
|
||||
Label L_doLast;
|
||||
@ -2988,15 +2973,17 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register keylen = rax;
|
||||
|
||||
const XMMRegister xmm_result = xmm0;
|
||||
const XMMRegister xmm_temp = xmm1;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm2;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1;
|
||||
// On win64 xmm6-xmm15 must be preserved so don't use them.
|
||||
const XMMRegister xmm_temp1 = xmm2;
|
||||
const XMMRegister xmm_temp2 = xmm3;
|
||||
const XMMRegister xmm_temp3 = xmm4;
|
||||
const XMMRegister xmm_temp4 = xmm5;
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
// keylen = # of 32-bit words, convert to 128-bit words
|
||||
__ shrl(keylen, 2);
|
||||
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
|
||||
@ -3004,25 +2991,53 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// For encryption, the java expanded key ordering is just what we need
|
||||
// we don't know if the key is aligned, hence not using load-execute form
|
||||
|
||||
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
|
||||
__ pxor(xmm_result, xmm_temp);
|
||||
for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
|
||||
aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
|
||||
}
|
||||
load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
|
||||
__ cmpl(keylen, 0);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
__ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
|
||||
__ subl(keylen, 2);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
__ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
|
||||
__ pxor(xmm_result, xmm_temp1);
|
||||
|
||||
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
__ aesenc(xmm_result, xmm_temp3);
|
||||
__ aesenc(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
__ aesenc(xmm_result, xmm_temp3);
|
||||
__ aesenc(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 44);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 52);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenc(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
|
||||
|
||||
__ BIND(L_doLast);
|
||||
__ aesenclast(xmm_result, xmm_temp);
|
||||
__ aesenc(xmm_result, xmm_temp1);
|
||||
__ aesenclast(xmm_result, xmm_temp2);
|
||||
__ movdqu(Address(to, 0), xmm_result); // store the result
|
||||
__ xorptr(rax, rax); // return 0
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -3040,7 +3055,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
//
|
||||
address generate_aescrypt_decryptBlock() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
|
||||
Label L_doLast;
|
||||
@ -3052,15 +3067,17 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register keylen = rax;
|
||||
|
||||
const XMMRegister xmm_result = xmm0;
|
||||
const XMMRegister xmm_temp = xmm1;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm2;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1;
|
||||
// On win64 xmm6-xmm15 must be preserved so don't use them.
|
||||
const XMMRegister xmm_temp1 = xmm2;
|
||||
const XMMRegister xmm_temp2 = xmm3;
|
||||
const XMMRegister xmm_temp3 = xmm4;
|
||||
const XMMRegister xmm_temp4 = xmm5;
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
// keylen = # of 32-bit words, convert to 128-bit words
|
||||
__ shrl(keylen, 2);
|
||||
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_result, Address(from, 0));
|
||||
@ -3068,29 +3085,55 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// for decryption java expanded key ordering is rotated one position from what we want
|
||||
// so we start from 0x10 here and hit 0x00 last
|
||||
// we don't know if the key is aligned, hence not using load-execute form
|
||||
load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
|
||||
__ pxor (xmm_result, xmm_temp);
|
||||
for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
|
||||
aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
|
||||
}
|
||||
__ cmpl(keylen, 0);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
// only in 192 and 256 bit keys
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
|
||||
__ subl(keylen, 2);
|
||||
__ jcc(Assembler::equal, L_doLast);
|
||||
// only in 256 bit keys
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
|
||||
|
||||
__ pxor (xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
__ aesdec(xmm_result, xmm_temp3);
|
||||
__ aesdec(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
|
||||
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
__ aesdec(xmm_result, xmm_temp3);
|
||||
__ aesdec(xmm_result, xmm_temp4);
|
||||
|
||||
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 44);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(keylen, 52);
|
||||
__ jccb(Assembler::equal, L_doLast);
|
||||
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
|
||||
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
|
||||
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
|
||||
|
||||
__ BIND(L_doLast);
|
||||
__ aesdec(xmm_result, xmm_temp1);
|
||||
__ aesdec(xmm_result, xmm_temp2);
|
||||
|
||||
// for decryption the aesdeclast operation is always on key+0x00
|
||||
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
|
||||
__ aesdeclast(xmm_result, xmm_temp);
|
||||
|
||||
__ aesdeclast(xmm_result, xmm_temp3);
|
||||
__ movdqu(Address(to, 0), xmm_result); // store the result
|
||||
|
||||
__ xorptr(rax, rax); // return 0
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
@ -3109,7 +3152,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
address generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
|
||||
address start = __ pc();
|
||||
@ -3133,16 +3176,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const XMMRegister xmm_temp = xmm1;
|
||||
// keys 0-10 preloaded into xmm2-xmm12
|
||||
const int XMM_REG_NUM_KEY_FIRST = 2;
|
||||
const int XMM_REG_NUM_KEY_LAST = 12;
|
||||
const int XMM_REG_NUM_KEY_LAST = 15;
|
||||
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
|
||||
const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
|
||||
const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
|
||||
const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
|
||||
const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
|
||||
const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
// on win64, fill len_reg from stack position
|
||||
__ movl(len_reg, len_mem);
|
||||
// save the xmm registers which must be preserved 6-12
|
||||
// save the xmm registers which must be preserved 6-15
|
||||
__ subptr(rsp, -rsp_after_call_off * wordSize);
|
||||
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
||||
__ movdqu(xmm_save(i), as_XMMRegister(i));
|
||||
@ -3151,12 +3197,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
// load up xmm regs 2 thru 12 with key 0x00 - 0xa0
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
// load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
|
||||
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
|
||||
offset += 0x10;
|
||||
}
|
||||
|
||||
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
|
||||
|
||||
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
|
||||
@ -3167,16 +3212,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// 128 bit code follows here
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_loopTop_128);
|
||||
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
|
||||
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
|
||||
|
||||
__ pxor (xmm_result, xmm_key0); // do the aes rounds
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
|
||||
__ aesenc(xmm_result, as_XMMRegister(rnum));
|
||||
}
|
||||
__ aesenclast(xmm_result, xmm_key10);
|
||||
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
// no need to store r to memory until we exit
|
||||
__ addptr(pos, AESBlockSize);
|
||||
@ -3198,24 +3242,23 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ BIND(L_key_192_256);
|
||||
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
|
||||
load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
|
||||
load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
|
||||
__ cmpl(rax, 52);
|
||||
__ jcc(Assembler::notEqual, L_key_256);
|
||||
|
||||
// 192-bit code follows here (could be changed to use more xmm registers)
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_loopTop_192);
|
||||
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
|
||||
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
|
||||
|
||||
__ pxor (xmm_result, xmm_key0); // do the aes rounds
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
|
||||
__ aesenc(xmm_result, as_XMMRegister(rnum));
|
||||
}
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
|
||||
load_key(xmm_temp, key, 0xc0);
|
||||
__ aesenclast(xmm_result, xmm_temp);
|
||||
|
||||
__ aesenclast(xmm_result, xmm_key12);
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
// no need to store r to memory until we exit
|
||||
__ addptr(pos, AESBlockSize);
|
||||
@ -3225,22 +3268,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ BIND(L_key_256);
|
||||
// 256-bit code follows here (could be changed to use more xmm registers)
|
||||
load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_loopTop_256);
|
||||
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
|
||||
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
|
||||
|
||||
__ pxor (xmm_result, xmm_key0); // do the aes rounds
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
|
||||
__ aesenc(xmm_result, as_XMMRegister(rnum));
|
||||
}
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
|
||||
aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
|
||||
load_key(xmm_temp, key, 0xe0);
|
||||
__ aesenclast(xmm_result, xmm_temp);
|
||||
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
// no need to store r to memory until we exit
|
||||
__ addptr(pos, AESBlockSize);
|
||||
@ -3267,7 +3307,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//
|
||||
|
||||
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
|
||||
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
|
||||
address start = __ pc();
|
||||
@ -3288,12 +3328,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
#endif
|
||||
const Register pos = rax;
|
||||
|
||||
// xmm register assignments for the loops below
|
||||
const XMMRegister xmm_result = xmm0;
|
||||
// keys 0-10 preloaded into xmm2-xmm12
|
||||
const int XMM_REG_NUM_KEY_FIRST = 5;
|
||||
const int XMM_REG_NUM_KEY_LAST = 15;
|
||||
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
|
||||
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
|
||||
const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -3312,13 +3350,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
// load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
|
||||
offset += 0x10;
|
||||
}
|
||||
load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
|
||||
|
||||
const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block
|
||||
|
||||
// registers holding the four results in the parallelized loop
|
||||
const XMMRegister xmm_result0 = xmm0;
|
||||
const XMMRegister xmm_result1 = xmm2;
|
||||
@ -3376,8 +3415,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ jmp(L_multiBlock_loopTop_128);
|
||||
|
||||
// registers used in the non-parallelized loops
|
||||
// xmm register assignments for the loops below
|
||||
const XMMRegister xmm_result = xmm0;
|
||||
const XMMRegister xmm_prev_block_cipher_save = xmm2;
|
||||
const XMMRegister xmm_temp = xmm3;
|
||||
const XMMRegister xmm_key11 = xmm3;
|
||||
const XMMRegister xmm_key12 = xmm4;
|
||||
const XMMRegister xmm_temp = xmm4;
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_singleBlock_loopTop_128);
|
||||
@ -3415,12 +3458,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ BIND(L_key_192_256);
|
||||
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
|
||||
load_key(xmm_key11, key, 0xb0);
|
||||
__ cmpl(rax, 52);
|
||||
__ jcc(Assembler::notEqual, L_key_256);
|
||||
|
||||
// 192-bit code follows here (could be optimized to use parallelism)
|
||||
load_key(xmm_key12, key, 0xc0); // 192-bit key goes up to c0
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_singleBlock_loopTop_192);
|
||||
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
|
||||
__ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
|
||||
@ -3428,14 +3474,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
|
||||
__ aesdec(xmm_result, as_XMMRegister(rnum));
|
||||
}
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
|
||||
__ aesdec(xmm_result, xmm_key11);
|
||||
__ aesdec(xmm_result, xmm_key12);
|
||||
__ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
|
||||
__ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
// no need to store r to memory until we exit
|
||||
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
|
||||
|
||||
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
|
||||
__ addptr(pos, AESBlockSize);
|
||||
__ subptr(len_reg, AESBlockSize);
|
||||
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
|
||||
@ -3445,23 +3490,26 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// 256-bit code follows here (could be optimized to use parallelism)
|
||||
__ movptr(pos, 0);
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_singleBlock_loopTop_256);
|
||||
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
|
||||
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
|
||||
__ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
|
||||
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
|
||||
__ aesdec(xmm_result, as_XMMRegister(rnum));
|
||||
}
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
|
||||
aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
|
||||
__ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
|
||||
__ aesdec(xmm_result, xmm_key11);
|
||||
load_key(xmm_temp, key, 0xc0);
|
||||
__ aesdec(xmm_result, xmm_temp);
|
||||
load_key(xmm_temp, key, 0xd0);
|
||||
__ aesdec(xmm_result, xmm_temp);
|
||||
load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0
|
||||
__ aesdec(xmm_result, xmm_temp);
|
||||
__ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
|
||||
__ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
// no need to store r to memory until we exit
|
||||
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
|
||||
|
||||
__ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
|
||||
__ addptr(pos, AESBlockSize);
|
||||
__ subptr(len_reg, AESBlockSize);
|
||||
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
|
||||
|
||||
@ -489,8 +489,8 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
|
||||
// The AES intrinsic stubs require AES instruction support (of course)
|
||||
// but also require AVX and sse3 modes for instructions it use.
|
||||
if (UseAES && (UseAVX > 0) && (UseSSE > 2)) {
|
||||
// but also require sse3 mode for instructions it use.
|
||||
if (UseAES && (UseSSE > 2)) {
|
||||
if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
|
||||
UseAESIntrinsics = true;
|
||||
}
|
||||
|
||||
@ -56,15 +56,9 @@ void Assembler::pd_patch_instruction(address branch, address target) {
|
||||
ShouldNotCallThis();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void Assembler::pd_print_patched_instruction(address branch) {
|
||||
ShouldNotCallThis();
|
||||
}
|
||||
#endif // PRODUCT
|
||||
|
||||
void MacroAssembler::align(int modulus) {
|
||||
while (offset() % modulus != 0)
|
||||
emit_byte(AbstractAssembler::code_fill_byte());
|
||||
emit_int8(AbstractAssembler::code_fill_byte());
|
||||
}
|
||||
|
||||
void MacroAssembler::bang_stack_with_offset(int offset) {
|
||||
@ -72,8 +66,7 @@ void MacroAssembler::bang_stack_with_offset(int offset) {
|
||||
}
|
||||
|
||||
void MacroAssembler::advance(int bytes) {
|
||||
_code_pos += bytes;
|
||||
sync();
|
||||
code_section()->set_end(code_section()->end() + bytes);
|
||||
}
|
||||
|
||||
RegisterOrConstant MacroAssembler::delayed_value_impl(
|
||||
|
||||
@ -37,9 +37,6 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
public:
|
||||
void pd_patch_instruction(address branch, address target);
|
||||
#ifndef PRODUCT
|
||||
static void pd_print_patched_instruction(address branch);
|
||||
#endif // PRODUCT
|
||||
};
|
||||
|
||||
class MacroAssembler : public Assembler {
|
||||
|
||||
@ -116,7 +116,7 @@ void MacroAssembler::get_thread(Register thread) {
|
||||
ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode ();
|
||||
if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) { // T1
|
||||
// Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset]
|
||||
emit_byte (segment);
|
||||
emit_int8 (segment);
|
||||
// ExternalAddress doesn't work because it can't take NULL
|
||||
AddressLiteral null(0, relocInfo::none);
|
||||
movptr (thread, null);
|
||||
@ -125,7 +125,7 @@ void MacroAssembler::get_thread(Register thread) {
|
||||
} else
|
||||
if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) { // T2
|
||||
// mov r, gs:[tlsOffset]
|
||||
emit_byte (segment);
|
||||
emit_int8 (segment);
|
||||
AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none);
|
||||
movptr (thread, tls_off);
|
||||
return ;
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
|
||||
|
||||
void MacroAssembler::int3() {
|
||||
emit_byte(0xCC);
|
||||
emit_int8((unsigned char)0xCC);
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
|
||||
@ -109,37 +109,6 @@ void AbstractAssembler::flush() {
|
||||
ICache::invalidate_range(addr_at(0), offset());
|
||||
}
|
||||
|
||||
|
||||
void AbstractAssembler::a_byte(int x) {
|
||||
emit_byte(x);
|
||||
}
|
||||
|
||||
|
||||
void AbstractAssembler::a_long(jint x) {
|
||||
emit_long(x);
|
||||
}
|
||||
|
||||
// Labels refer to positions in the (to be) generated code. There are bound
|
||||
// and unbound
|
||||
//
|
||||
// Bound labels refer to known positions in the already generated code.
|
||||
// offset() is the position the label refers to.
|
||||
//
|
||||
// Unbound labels refer to unknown positions in the code to be generated; it
|
||||
// may contain a list of unresolved displacements that refer to it
|
||||
#ifndef PRODUCT
|
||||
void AbstractAssembler::print(Label& L) {
|
||||
if (L.is_bound()) {
|
||||
tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect());
|
||||
} else if (L.is_unbound()) {
|
||||
L.print_instructions((MacroAssembler*)this);
|
||||
} else {
|
||||
tty->print_cr("label in inconsistent state (loc = %d)", L.loc());
|
||||
}
|
||||
}
|
||||
#endif // PRODUCT
|
||||
|
||||
|
||||
void AbstractAssembler::bind(Label& L) {
|
||||
if (L.is_bound()) {
|
||||
// Assembler can bind a label more than once to the same place.
|
||||
@ -342,28 +311,3 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
|
||||
#endif
|
||||
return offset < 0 || os::vm_page_size() <= offset;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void Label::print_instructions(MacroAssembler* masm) const {
|
||||
CodeBuffer* cb = masm->code();
|
||||
for (int i = 0; i < _patch_index; ++i) {
|
||||
int branch_loc;
|
||||
if (i >= PatchCacheSize) {
|
||||
branch_loc = _patch_overflow->at(i - PatchCacheSize);
|
||||
} else {
|
||||
branch_loc = _patches[i];
|
||||
}
|
||||
int branch_pos = CodeBuffer::locator_pos(branch_loc);
|
||||
int branch_sect = CodeBuffer::locator_sect(branch_loc);
|
||||
address branch = cb->locator_address(branch_loc);
|
||||
tty->print_cr("unbound label");
|
||||
tty->print("@ %d|%d ", branch_pos, branch_sect);
|
||||
if (branch_sect == CodeBuffer::SECT_CONSTS) {
|
||||
tty->print_cr(PTR_FORMAT, *(address*)branch);
|
||||
continue;
|
||||
}
|
||||
masm->pd_print_patched_instruction(branch);
|
||||
tty->cr();
|
||||
}
|
||||
}
|
||||
#endif // ndef PRODUCT
|
||||
|
||||
@ -216,17 +216,6 @@ class AbstractAssembler : public ResourceObj {
|
||||
bool isByte(int x) const { return 0 <= x && x < 0x100; }
|
||||
bool isShiftCount(int x) const { return 0 <= x && x < 32; }
|
||||
|
||||
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
|
||||
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
|
||||
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
|
||||
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
|
||||
|
||||
void emit_float( jfloat x) { code_section()->emit_float( x); }
|
||||
void emit_double( jdouble x) { code_section()->emit_double( x); }
|
||||
void emit_address(address x) { code_section()->emit_address(x); }
|
||||
|
||||
void emit_byte(int x) { emit_int8 (x); } // deprecated
|
||||
void emit_word(int x) { emit_int16(x); } // deprecated
|
||||
void emit_long(jint x) { emit_int32(x); } // deprecated
|
||||
|
||||
// Instruction boundaries (required when emitting relocatable values).
|
||||
@ -277,9 +266,6 @@ class AbstractAssembler : public ResourceObj {
|
||||
};
|
||||
#endif
|
||||
|
||||
// Label functions
|
||||
void print(Label& L);
|
||||
|
||||
public:
|
||||
|
||||
// Creation
|
||||
@ -288,6 +274,15 @@ class AbstractAssembler : public ResourceObj {
|
||||
// ensure buf contains all code (call this before using/copying the code)
|
||||
void flush();
|
||||
|
||||
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
|
||||
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
|
||||
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
|
||||
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
|
||||
|
||||
void emit_float( jfloat x) { code_section()->emit_float( x); }
|
||||
void emit_double( jdouble x) { code_section()->emit_double( x); }
|
||||
void emit_address(address x) { code_section()->emit_address(x); }
|
||||
|
||||
// min and max values for signed immediate ranges
|
||||
static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1)) ; }
|
||||
static int max_simm(int nbits) { return (intptr_t(1) << (nbits - 1)) - 1; }
|
||||
@ -327,8 +322,6 @@ class AbstractAssembler : public ResourceObj {
|
||||
void clear_inst_mark() { code_section()->clear_mark(); }
|
||||
|
||||
// Constants in code
|
||||
void a_byte(int x);
|
||||
void a_long(jint x);
|
||||
void relocate(RelocationHolder const& rspec, int format = 0) {
|
||||
assert(!pd_check_instruction_mark()
|
||||
|| inst_mark() == NULL || inst_mark() == code_section()->end(),
|
||||
@ -441,15 +434,6 @@ class AbstractAssembler : public ResourceObj {
|
||||
*/
|
||||
void pd_patch_instruction(address branch, address target);
|
||||
|
||||
#ifndef PRODUCT
|
||||
/**
|
||||
* Platform-dependent method of printing an instruction that needs to be
|
||||
* patched.
|
||||
*
|
||||
* @param branch the instruction to be patched in the buffer.
|
||||
*/
|
||||
static void pd_print_patched_instruction(address branch);
|
||||
#endif // PRODUCT
|
||||
};
|
||||
|
||||
#ifdef TARGET_ARCH_x86
|
||||
|
||||
@ -3442,6 +3442,11 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
|
||||
preserves_state = true;
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_loadFence :
|
||||
case vmIntrinsics::_storeFence:
|
||||
case vmIntrinsics::_fullFence :
|
||||
break;
|
||||
|
||||
default : return false; // do not inline
|
||||
}
|
||||
// create intrinsic node
|
||||
|
||||
@ -2977,6 +2977,16 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
|
||||
do_CompareAndSwap(x, longType);
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_loadFence :
|
||||
if (os::is_MP()) __ membar_acquire();
|
||||
break;
|
||||
case vmIntrinsics::_storeFence:
|
||||
if (os::is_MP()) __ membar_release();
|
||||
break;
|
||||
case vmIntrinsics::_fullFence :
|
||||
if (os::is_MP()) __ membar();
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_Reference_get:
|
||||
do_Reference_get(x);
|
||||
break;
|
||||
|
||||
@ -366,10 +366,12 @@ bool ciField::will_link(ciInstanceKlass* accessing_klass,
|
||||
// ------------------------------------------------------------------
|
||||
// ciField::print
|
||||
void ciField::print() {
|
||||
tty->print("<ciField ");
|
||||
tty->print("<ciField name=");
|
||||
_holder->print_name();
|
||||
tty->print(".");
|
||||
_name->print_symbol();
|
||||
tty->print(" signature=");
|
||||
_signature->print_symbol();
|
||||
tty->print(" offset=%d type=", _offset);
|
||||
if (_type != NULL) _type->print_name();
|
||||
else tty->print("(reference)");
|
||||
|
||||
@ -169,16 +169,18 @@ void ClassLoaderData::add_dependency(Handle dependency, TRAPS) {
|
||||
ok = (objArrayOop)ok->obj_at(1);
|
||||
}
|
||||
|
||||
// Must handle over GC points
|
||||
assert (last != NULL, "dependencies should be initialized");
|
||||
objArrayHandle last_handle(THREAD, last);
|
||||
|
||||
// Create a new dependency node with fields for (class_loader or mirror, next)
|
||||
objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
|
||||
deps->obj_at_put(0, dependency());
|
||||
|
||||
// Must handle over more GC points
|
||||
// Must handle over GC points
|
||||
objArrayHandle new_dependency(THREAD, deps);
|
||||
|
||||
// Add the dependency under lock
|
||||
assert (last != NULL, "dependencies should be initialized");
|
||||
objArrayHandle last_handle(THREAD, last);
|
||||
locked_add_dependency(last_handle, new_dependency);
|
||||
}
|
||||
|
||||
|
||||
@ -756,6 +756,15 @@
|
||||
do_intrinsic(_unpark, sun_misc_Unsafe, unpark_name, unpark_signature, F_RN) \
|
||||
do_name( unpark_name, "unpark") \
|
||||
do_alias( unpark_signature, /*(LObject;)V*/ object_void_signature) \
|
||||
do_intrinsic(_loadFence, sun_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \
|
||||
do_name( loadFence_name, "loadFence") \
|
||||
do_alias( loadFence_signature, void_method_signature) \
|
||||
do_intrinsic(_storeFence, sun_misc_Unsafe, storeFence_name, storeFence_signature, F_RN) \
|
||||
do_name( storeFence_name, "storeFence") \
|
||||
do_alias( storeFence_signature, void_method_signature) \
|
||||
do_intrinsic(_fullFence, sun_misc_Unsafe, fullFence_name, fullFence_signature, F_RN) \
|
||||
do_name( fullFence_name, "fullFence") \
|
||||
do_alias( fullFence_signature, void_method_signature) \
|
||||
\
|
||||
/* unsafe memory references (there are a lot of them...) */ \
|
||||
do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \
|
||||
@ -897,12 +906,14 @@
|
||||
do_intrinsic(_getAndAddLong, sun_misc_Unsafe, getAndAddLong_name, getAndAddLong_signature, F_R) \
|
||||
do_name( getAndAddLong_name, "getAndAddLong") \
|
||||
do_signature(getAndAddLong_signature, "(Ljava/lang/Object;JJ)J" ) \
|
||||
do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSet_name, getAndSetInt_signature, F_R) \
|
||||
do_name( getAndSet_name, "getAndSet") \
|
||||
do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSetInt_name, getAndSetInt_signature, F_R) \
|
||||
do_name( getAndSetInt_name, "getAndSetInt") \
|
||||
do_alias( getAndSetInt_signature, /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature) \
|
||||
do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSet_name, getAndSetLong_signature, F_R) \
|
||||
do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSetLong_name, getAndSetLong_signature, F_R) \
|
||||
do_name( getAndSetLong_name, "getAndSetLong") \
|
||||
do_alias( getAndSetLong_signature, /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature) \
|
||||
do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSet_name, getAndSetObject_signature, F_R) \
|
||||
do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSetObject_name, getAndSetObject_signature, F_R)\
|
||||
do_name( getAndSetObject_name, "getAndSetObject") \
|
||||
do_signature(getAndSetObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
|
||||
\
|
||||
/* prefetch_signature is shared by all prefetch variants */ \
|
||||
|
||||
@ -538,6 +538,7 @@ void CompilerOracle::parse_from_line(char* line) {
|
||||
|
||||
if (match != NULL) {
|
||||
if (!_quiet) {
|
||||
ResourceMark rm;
|
||||
tty->print("CompilerOracle: %s ", command_names[command]);
|
||||
match->print();
|
||||
}
|
||||
|
||||
@ -189,6 +189,11 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
set_req(1, addx);
|
||||
set_req(2, a22);
|
||||
progress = this;
|
||||
PhaseIterGVN *igvn = phase->is_IterGVN();
|
||||
if (add2->outcnt() == 0 && igvn) {
|
||||
// add disconnected.
|
||||
igvn->_worklist.push(add2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -624,6 +629,11 @@ Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
if( t22->singleton() && (t22 != Type::TOP) ) { // Right input is an add of a constant?
|
||||
set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1))));
|
||||
set_req(Offset, add->in(2));
|
||||
PhaseIterGVN *igvn = phase->is_IterGVN();
|
||||
if (add->outcnt() == 0 && igvn) {
|
||||
// add disconnected.
|
||||
igvn->_worklist.push((Node*)add);
|
||||
}
|
||||
return this; // Made progress
|
||||
}
|
||||
}
|
||||
|
||||
@ -403,7 +403,7 @@ const char* InlineTree::check_can_parse(ciMethod* callee) {
|
||||
//------------------------------print_inlining---------------------------------
|
||||
// Really, the failure_msg can be a success message also.
|
||||
void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
|
||||
CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
|
||||
C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
|
||||
if (callee_method == NULL) tty->print(" callee not monotonic or profiled");
|
||||
if (Verbose && callee_method) {
|
||||
const InlineTree *top = this;
|
||||
|
||||
@ -274,6 +274,9 @@ class LateInlineCallGenerator : public DirectCallGenerator {
|
||||
virtual void do_late_inline();
|
||||
|
||||
virtual JVMState* generate(JVMState* jvms) {
|
||||
Compile *C = Compile::current();
|
||||
C->print_inlining_skip(this);
|
||||
|
||||
// Record that this call site should be revisited once the main
|
||||
// parse is finished.
|
||||
Compile::current()->add_late_inline(this);
|
||||
@ -284,7 +287,6 @@ class LateInlineCallGenerator : public DirectCallGenerator {
|
||||
// as is done for allocations and macro expansion.
|
||||
return DirectCallGenerator::generate(jvms);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -307,7 +309,9 @@ void LateInlineCallGenerator::do_late_inline() {
|
||||
|
||||
// Make sure the state is a MergeMem for parsing.
|
||||
if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
|
||||
map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
|
||||
Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
|
||||
C->initial_gvn()->set_type_bottom(mem);
|
||||
map->set_req(TypeFunc::Memory, mem);
|
||||
}
|
||||
|
||||
// Make enough space for the expression stack and transfer the incoming arguments
|
||||
@ -320,6 +324,8 @@ void LateInlineCallGenerator::do_late_inline() {
|
||||
}
|
||||
}
|
||||
|
||||
C->print_inlining_insert(this);
|
||||
|
||||
CompileLog* log = C->log();
|
||||
if (log != NULL) {
|
||||
log->head("late_inline method='%d'", log->identify(method()));
|
||||
@ -608,7 +614,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
|
||||
if (cg != NULL && cg->is_inline())
|
||||
return cg;
|
||||
} else {
|
||||
if (PrintInlining) CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
|
||||
if (PrintInlining) C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@ -147,9 +147,9 @@ class CallGenerator : public ResourceObj {
|
||||
CallGenerator* cg);
|
||||
virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
|
||||
|
||||
static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
|
||||
static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
|
||||
if (PrintInlining)
|
||||
CompileTask::print_inlining(callee, inline_level, bci, msg);
|
||||
C->print_inlining(callee, inline_level, bci, msg);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -751,7 +751,7 @@ void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj
|
||||
projs->fallthrough_ioproj = pn;
|
||||
for (DUIterator j = pn->outs(); pn->has_out(j); j++) {
|
||||
Node* e = pn->out(j);
|
||||
if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) {
|
||||
if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) {
|
||||
assert(projs->exobj == NULL, "only one");
|
||||
projs->exobj = e;
|
||||
}
|
||||
|
||||
@ -1566,6 +1566,10 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
Node* n = in(j); // Get the input
|
||||
if (rc == NULL || phase->type(rc) == Type::TOP) {
|
||||
if (n != top) { // Not already top?
|
||||
PhaseIterGVN *igvn = phase->is_IterGVN();
|
||||
if (can_reshape && igvn != NULL) {
|
||||
igvn->_worklist.push(r);
|
||||
}
|
||||
set_req(j, top); // Nuke it down
|
||||
progress = this; // Record progress
|
||||
}
|
||||
|
||||
@ -610,7 +610,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
|
||||
_trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
|
||||
_printer(IdealGraphPrinter::printer()),
|
||||
#endif
|
||||
_congraph(NULL) {
|
||||
_congraph(NULL),
|
||||
_print_inlining_list(NULL),
|
||||
_print_inlining(0) {
|
||||
C = this;
|
||||
|
||||
CompileWrapper cw(this);
|
||||
@ -666,6 +668,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
|
||||
PhaseGVN gvn(node_arena(), estimated_size);
|
||||
set_initial_gvn(&gvn);
|
||||
|
||||
if (PrintInlining) {
|
||||
_print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
|
||||
}
|
||||
{ // Scope for timing the parser
|
||||
TracePhase t3("parse", &_t_parser, true);
|
||||
|
||||
@ -754,6 +759,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
|
||||
}
|
||||
}
|
||||
assert(_late_inlines.length() == 0, "should have been processed");
|
||||
dump_inlining();
|
||||
|
||||
print_method("Before RemoveUseless", 3);
|
||||
|
||||
@ -899,7 +905,9 @@ Compile::Compile( ciEnv* ci_env,
|
||||
#endif
|
||||
_dead_node_list(comp_arena()),
|
||||
_dead_node_count(0),
|
||||
_congraph(NULL) {
|
||||
_congraph(NULL),
|
||||
_print_inlining_list(NULL),
|
||||
_print_inlining(0) {
|
||||
C = this;
|
||||
|
||||
#ifndef PRODUCT
|
||||
@ -3351,3 +3359,11 @@ void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n
|
||||
cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
|
||||
}
|
||||
}
|
||||
|
||||
void Compile::dump_inlining() {
|
||||
if (PrintInlining) {
|
||||
for (int i = 0; i < _print_inlining_list->length(); i++) {
|
||||
tty->print(_print_inlining_list->at(i).ss()->as_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "code/debugInfoRec.hpp"
|
||||
#include "code/exceptionHandlerTable.hpp"
|
||||
#include "compiler/compilerOracle.hpp"
|
||||
#include "compiler/compileBroker.hpp"
|
||||
#include "libadt/dict.hpp"
|
||||
#include "libadt/port.hpp"
|
||||
#include "libadt/vectset.hpp"
|
||||
@ -369,6 +370,61 @@ class Compile : public Phase {
|
||||
GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after
|
||||
// main parsing has finished.
|
||||
|
||||
// Inlining may not happen in parse order which would make
|
||||
// PrintInlining output confusing. Keep track of PrintInlining
|
||||
// pieces in order.
|
||||
class PrintInliningBuffer : public ResourceObj {
|
||||
private:
|
||||
CallGenerator* _cg;
|
||||
stringStream* _ss;
|
||||
|
||||
public:
|
||||
PrintInliningBuffer()
|
||||
: _cg(NULL) { _ss = new stringStream(); }
|
||||
|
||||
stringStream* ss() const { return _ss; }
|
||||
CallGenerator* cg() const { return _cg; }
|
||||
void set_cg(CallGenerator* cg) { _cg = cg; }
|
||||
};
|
||||
|
||||
GrowableArray<PrintInliningBuffer>* _print_inlining_list;
|
||||
int _print_inlining;
|
||||
|
||||
public:
|
||||
|
||||
outputStream* print_inlining_stream() const {
|
||||
return _print_inlining_list->at(_print_inlining).ss();
|
||||
}
|
||||
|
||||
void print_inlining_skip(CallGenerator* cg) {
|
||||
if (PrintInlining) {
|
||||
_print_inlining_list->at(_print_inlining).set_cg(cg);
|
||||
_print_inlining++;
|
||||
_print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer());
|
||||
}
|
||||
}
|
||||
|
||||
void print_inlining_insert(CallGenerator* cg) {
|
||||
if (PrintInlining) {
|
||||
for (int i = 0; i < _print_inlining_list->length(); i++) {
|
||||
if (_print_inlining_list->at(i).cg() == cg) {
|
||||
_print_inlining_list->insert_before(i+1, PrintInliningBuffer());
|
||||
_print_inlining = i+1;
|
||||
_print_inlining_list->at(i).set_cg(NULL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
|
||||
stringStream ss;
|
||||
CompileTask::print_inlining(&ss, method, inline_level, bci, msg);
|
||||
print_inlining_stream()->print(ss.as_string());
|
||||
}
|
||||
|
||||
private:
|
||||
// Matching, CFG layout, allocation, code generation
|
||||
PhaseCFG* _cfg; // Results of CFG finding
|
||||
bool _select_24_bit_instr; // We selected an instruction with a 24-bit result
|
||||
@ -591,7 +647,7 @@ class Compile : public Phase {
|
||||
void reset_dead_node_list() { _dead_node_list.Reset();
|
||||
_dead_node_count = 0;
|
||||
}
|
||||
uint live_nodes() {
|
||||
uint live_nodes() const {
|
||||
int val = _unique - _dead_node_count;
|
||||
assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count));
|
||||
return (uint) val;
|
||||
@ -702,7 +758,7 @@ class Compile : public Phase {
|
||||
|
||||
void identify_useful_nodes(Unique_Node_List &useful);
|
||||
void update_dead_node_list(Unique_Node_List &useful);
|
||||
void remove_useless_nodes (Unique_Node_List &useful);
|
||||
void remove_useless_nodes (Unique_Node_List &useful);
|
||||
|
||||
WarmCallInfo* warm_calls() const { return _warm_calls; }
|
||||
void set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
|
||||
@ -711,6 +767,8 @@ class Compile : public Phase {
|
||||
// Record this CallGenerator for inlining at the end of parsing.
|
||||
void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
|
||||
|
||||
void dump_inlining();
|
||||
|
||||
// Matching, CFG layout, allocation, code generation
|
||||
PhaseCFG* cfg() { return _cfg; }
|
||||
bool select_24_bit_instr() const { return _select_24_bit_instr; }
|
||||
|
||||
@ -40,19 +40,24 @@
|
||||
#include "prims/nativeLookup.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
|
||||
void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
|
||||
void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
|
||||
if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) {
|
||||
outputStream* out = tty;
|
||||
if (!PrintInlining) {
|
||||
if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
|
||||
method->print_short_name();
|
||||
tty->cr();
|
||||
}
|
||||
CompileTask::print_inlining(prof_method, depth, bci);
|
||||
} else {
|
||||
out = C->print_inlining_stream();
|
||||
}
|
||||
CompileTask::print_inline_indent(depth);
|
||||
tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
|
||||
prof_klass->name()->print_symbol();
|
||||
tty->cr();
|
||||
CompileTask::print_inline_indent(depth, out);
|
||||
out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
|
||||
stringStream ss;
|
||||
prof_klass->name()->print_symbol_on(&ss);
|
||||
out->print(ss.as_string());
|
||||
out->cr();
|
||||
}
|
||||
}
|
||||
|
||||
@ -233,13 +238,13 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
|
||||
}
|
||||
if (miss_cg != NULL) {
|
||||
if (next_hit_cg != NULL) {
|
||||
trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
|
||||
trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
|
||||
// We don't need to record dependency on a receiver here and below.
|
||||
// Whenever we inline, the dependency is added by Parse::Parse().
|
||||
miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
|
||||
}
|
||||
if (miss_cg != NULL) {
|
||||
trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
|
||||
trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
|
||||
CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
|
||||
if (cg != NULL) return cg;
|
||||
}
|
||||
|
||||
@ -1771,11 +1771,21 @@ void GraphKit::replace_call(CallNode* call, Node* result) {
|
||||
CallProjections callprojs;
|
||||
call->extract_projections(&callprojs, true);
|
||||
|
||||
// Replace all the old call edges with the edges from the inlining result
|
||||
C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control));
|
||||
C->gvn_replace_by(callprojs.fallthrough_memproj, final_state->in(TypeFunc::Memory));
|
||||
C->gvn_replace_by(callprojs.fallthrough_ioproj, final_state->in(TypeFunc::I_O));
|
||||
Node* init_mem = call->in(TypeFunc::Memory);
|
||||
Node* final_mem = final_state->in(TypeFunc::Memory);
|
||||
Node* final_ctl = final_state->in(TypeFunc::Control);
|
||||
Node* final_io = final_state->in(TypeFunc::I_O);
|
||||
|
||||
// Replace all the old call edges with the edges from the inlining result
|
||||
if (callprojs.fallthrough_catchproj != NULL) {
|
||||
C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl);
|
||||
}
|
||||
if (callprojs.fallthrough_memproj != NULL) {
|
||||
C->gvn_replace_by(callprojs.fallthrough_memproj, final_mem);
|
||||
}
|
||||
if (callprojs.fallthrough_ioproj != NULL) {
|
||||
C->gvn_replace_by(callprojs.fallthrough_ioproj, final_io);
|
||||
}
|
||||
|
||||
// Replace the result with the new result if it exists and is used
|
||||
if (callprojs.resproj != NULL && result != NULL) {
|
||||
@ -2980,7 +2990,7 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
|
||||
set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) );
|
||||
// create memory projection for i_o
|
||||
set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
|
||||
make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
|
||||
make_slow_call_ex(allocx, env()->Throwable_klass(), true);
|
||||
|
||||
// create a memory projection as for the normal control path
|
||||
Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory));
|
||||
|
||||
@ -282,6 +282,7 @@ class LibraryCallKit : public GraphKit {
|
||||
typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
|
||||
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind);
|
||||
bool inline_unsafe_ordered_store(BasicType type);
|
||||
bool inline_unsafe_fence(vmIntrinsics::ID id);
|
||||
bool inline_fp_conversions(vmIntrinsics::ID id);
|
||||
bool inline_number_methods(vmIntrinsics::ID id);
|
||||
bool inline_reference_get();
|
||||
@ -334,6 +335,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
||||
case vmIntrinsics::_getAndSetInt:
|
||||
case vmIntrinsics::_getAndSetLong:
|
||||
case vmIntrinsics::_getAndSetObject:
|
||||
case vmIntrinsics::_loadFence:
|
||||
case vmIntrinsics::_storeFence:
|
||||
case vmIntrinsics::_fullFence:
|
||||
break; // InlineNatives does not control String.compareTo
|
||||
case vmIntrinsics::_Reference_get:
|
||||
break; // InlineNatives does not control Reference.get
|
||||
@ -536,7 +540,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
|
||||
// Try to inline the intrinsic.
|
||||
if (kit.try_to_inline()) {
|
||||
if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
|
||||
CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
|
||||
C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
|
||||
}
|
||||
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
|
||||
if (C->log()) {
|
||||
@ -555,7 +559,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
|
||||
if (jvms->has_method()) {
|
||||
// Not a root compile.
|
||||
const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
|
||||
CompileTask::print_inlining(callee, jvms->depth() - 1, bci, msg);
|
||||
C->print_inlining(callee, jvms->depth() - 1, bci, msg);
|
||||
} else {
|
||||
// Root compile
|
||||
tty->print("Did not generate intrinsic %s%s at bci:%d in",
|
||||
@ -585,7 +589,7 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
|
||||
Node* slow_ctl = kit.try_to_predicate();
|
||||
if (!kit.failing()) {
|
||||
if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
|
||||
CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
|
||||
C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
|
||||
}
|
||||
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
|
||||
if (C->log()) {
|
||||
@ -602,12 +606,12 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
|
||||
if (jvms->has_method()) {
|
||||
// Not a root compile.
|
||||
const char* msg = "failed to generate predicate for intrinsic";
|
||||
CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
|
||||
C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
|
||||
} else {
|
||||
// Root compile
|
||||
tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
|
||||
vmIntrinsics::name_at(intrinsic_id()),
|
||||
(is_virtual() ? " (virtual)" : ""), bci);
|
||||
C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
|
||||
vmIntrinsics::name_at(intrinsic_id()),
|
||||
(is_virtual() ? " (virtual)" : ""), bci);
|
||||
}
|
||||
}
|
||||
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
|
||||
@ -732,6 +736,10 @@ bool LibraryCallKit::try_to_inline() {
|
||||
case vmIntrinsics::_getAndSetLong: return inline_unsafe_load_store(T_LONG, LS_xchg);
|
||||
case vmIntrinsics::_getAndSetObject: return inline_unsafe_load_store(T_OBJECT, LS_xchg);
|
||||
|
||||
case vmIntrinsics::_loadFence:
|
||||
case vmIntrinsics::_storeFence:
|
||||
case vmIntrinsics::_fullFence: return inline_unsafe_fence(intrinsic_id());
|
||||
|
||||
case vmIntrinsics::_currentThread: return inline_native_currentThread();
|
||||
case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted();
|
||||
|
||||
@ -2840,6 +2848,26 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
|
||||
// Regardless of form, don't allow previous ld/st to move down,
|
||||
// then issue acquire, release, or volatile mem_bar.
|
||||
insert_mem_bar(Op_MemBarCPUOrder);
|
||||
switch(id) {
|
||||
case vmIntrinsics::_loadFence:
|
||||
insert_mem_bar(Op_MemBarAcquire);
|
||||
return true;
|
||||
case vmIntrinsics::_storeFence:
|
||||
insert_mem_bar(Op_MemBarRelease);
|
||||
return true;
|
||||
case vmIntrinsics::_fullFence:
|
||||
insert_mem_bar(Op_MemBarVolatile);
|
||||
return true;
|
||||
default:
|
||||
fatal_unexpected_iid(id);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------inline_unsafe_allocate---------------------------
|
||||
// public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
|
||||
bool LibraryCallKit::inline_unsafe_allocate() {
|
||||
@ -2952,14 +2980,23 @@ bool LibraryCallKit::inline_native_isInterrupted() {
|
||||
|
||||
// We only go to the fast case code if we pass two guards.
|
||||
// Paths which do not pass are accumulated in the slow_region.
|
||||
|
||||
enum {
|
||||
no_int_result_path = 1, // t == Thread.current() && !TLS._osthread._interrupted
|
||||
no_clear_result_path = 2, // t == Thread.current() && TLS._osthread._interrupted && !clear_int
|
||||
slow_result_path = 3, // slow path: t.isInterrupted(clear_int)
|
||||
PATH_LIMIT
|
||||
};
|
||||
|
||||
// Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
|
||||
// out of the function.
|
||||
insert_mem_bar(Op_MemBarCPUOrder);
|
||||
|
||||
RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
|
||||
PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
|
||||
|
||||
RegionNode* slow_region = new (C) RegionNode(1);
|
||||
record_for_igvn(slow_region);
|
||||
RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow
|
||||
PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
|
||||
enum { no_int_result_path = 1,
|
||||
no_clear_result_path = 2,
|
||||
slow_result_path = 3
|
||||
};
|
||||
|
||||
// (a) Receiving thread must be the current thread.
|
||||
Node* rec_thr = argument(0);
|
||||
@ -2968,14 +3005,13 @@ bool LibraryCallKit::inline_native_isInterrupted() {
|
||||
Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
|
||||
Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
|
||||
|
||||
bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
|
||||
if (!known_current_thread)
|
||||
generate_slow_guard(bol_thr, slow_region);
|
||||
generate_slow_guard(bol_thr, slow_region);
|
||||
|
||||
// (b) Interrupt bit on TLS must be false.
|
||||
Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
|
||||
Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
|
||||
p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
|
||||
|
||||
// Set the control input on the field _interrupted read to prevent it floating up.
|
||||
Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
|
||||
Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
|
||||
@ -3020,22 +3056,20 @@ bool LibraryCallKit::inline_native_isInterrupted() {
|
||||
Node* slow_val = set_results_for_java_call(slow_call);
|
||||
// this->control() comes from set_results_for_java_call
|
||||
|
||||
// If we know that the result of the slow call will be true, tell the optimizer!
|
||||
if (known_current_thread) slow_val = intcon(1);
|
||||
|
||||
Node* fast_io = slow_call->in(TypeFunc::I_O);
|
||||
Node* fast_mem = slow_call->in(TypeFunc::Memory);
|
||||
|
||||
// These two phis are pre-filled with copies of of the fast IO and Memory
|
||||
Node* io_phi = PhiNode::make(result_rgn, fast_io, Type::ABIO);
|
||||
Node* mem_phi = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
|
||||
PhiNode* result_mem = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
|
||||
PhiNode* result_io = PhiNode::make(result_rgn, fast_io, Type::ABIO);
|
||||
|
||||
result_rgn->init_req(slow_result_path, control());
|
||||
io_phi ->init_req(slow_result_path, i_o());
|
||||
mem_phi ->init_req(slow_result_path, reset_memory());
|
||||
result_io ->init_req(slow_result_path, i_o());
|
||||
result_mem->init_req(slow_result_path, reset_memory());
|
||||
result_val->init_req(slow_result_path, slow_val);
|
||||
|
||||
set_all_memory( _gvn.transform(mem_phi) );
|
||||
set_i_o( _gvn.transform(io_phi) );
|
||||
set_all_memory(_gvn.transform(result_mem));
|
||||
set_i_o( _gvn.transform(result_io));
|
||||
}
|
||||
|
||||
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
||||
@ -3319,7 +3353,7 @@ bool LibraryCallKit::inline_native_subtype_check() {
|
||||
Node* arg = args[which_arg];
|
||||
arg = null_check(arg);
|
||||
if (stopped()) break;
|
||||
args[which_arg] = _gvn.transform(arg);
|
||||
args[which_arg] = arg;
|
||||
|
||||
Node* p = basic_plus_adr(arg, class_klass_offset);
|
||||
Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);
|
||||
|
||||
@ -509,6 +509,7 @@ void Parse::do_multianewarray() {
|
||||
makecon(TypeKlassPtr::make(array_klass)),
|
||||
dims);
|
||||
}
|
||||
make_slow_call_ex(c, env()->Throwable_klass(), false);
|
||||
|
||||
Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms));
|
||||
|
||||
|
||||
@ -989,7 +989,7 @@ JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* t
|
||||
// since we're notifying the VM on every catch.
|
||||
// Force deoptimization and the rest of the lookup
|
||||
// will be fine.
|
||||
deoptimize_caller_frame(thread, true);
|
||||
deoptimize_caller_frame(thread);
|
||||
}
|
||||
|
||||
// Check the stack guard pages. If enabled, look for handler in this frame;
|
||||
@ -1143,19 +1143,24 @@ const TypeFunc *OptoRuntime::rethrow_Type() {
|
||||
|
||||
|
||||
void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
|
||||
// Deoptimize frame
|
||||
if (doit) {
|
||||
// Called from within the owner thread, so no need for safepoint
|
||||
RegisterMap reg_map(thread);
|
||||
frame stub_frame = thread->last_frame();
|
||||
assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
|
||||
frame caller_frame = stub_frame.sender(®_map);
|
||||
|
||||
// Deoptimize the caller frame.
|
||||
Deoptimization::deoptimize_frame(thread, caller_frame.id());
|
||||
// Deoptimize the caller before continuing, as the compiled
|
||||
// exception handler table may not be valid.
|
||||
if (!StressCompiledExceptionHandlers && doit) {
|
||||
deoptimize_caller_frame(thread);
|
||||
}
|
||||
}
|
||||
|
||||
void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
|
||||
// Called from within the owner thread, so no need for safepoint
|
||||
RegisterMap reg_map(thread);
|
||||
frame stub_frame = thread->last_frame();
|
||||
assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
|
||||
frame caller_frame = stub_frame.sender(®_map);
|
||||
|
||||
// Deoptimize the caller frame.
|
||||
Deoptimization::deoptimize_frame(thread, caller_frame.id());
|
||||
}
|
||||
|
||||
|
||||
bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) {
|
||||
// Called from within the owner thread, so no need for safepoint
|
||||
|
||||
@ -174,6 +174,7 @@ private:
|
||||
static address handle_exception_C (JavaThread* thread);
|
||||
static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
|
||||
static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc );
|
||||
static void deoptimize_caller_frame (JavaThread *thread);
|
||||
static void deoptimize_caller_frame (JavaThread *thread, bool doit);
|
||||
static bool is_deoptimized_caller_frame (JavaThread *thread);
|
||||
|
||||
|
||||
@ -744,7 +744,9 @@ bool StringConcat::validate_control_flow() {
|
||||
ctrl_path.push(cn);
|
||||
ctrl_path.push(cn->proj_out(0));
|
||||
ctrl_path.push(cn->proj_out(0)->unique_out());
|
||||
ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
|
||||
if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) {
|
||||
ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
|
||||
}
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@ -762,6 +764,12 @@ bool StringConcat::validate_control_flow() {
|
||||
} else if (ptr->is_IfTrue()) {
|
||||
IfNode* iff = ptr->in(0)->as_If();
|
||||
BoolNode* b = iff->in(1)->isa_Bool();
|
||||
|
||||
if (b == NULL) {
|
||||
fail = true;
|
||||
break;
|
||||
}
|
||||
|
||||
Node* cmp = b->in(1);
|
||||
Node* v1 = cmp->in(1);
|
||||
Node* v2 = cmp->in(2);
|
||||
@ -1408,71 +1416,76 @@ void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
|
||||
Deoptimization::Action_make_not_entrant);
|
||||
}
|
||||
|
||||
// length now contains the number of characters needed for the
|
||||
// char[] so create a new AllocateArray for the char[]
|
||||
Node* char_array = NULL;
|
||||
{
|
||||
PreserveReexecuteState preexecs(&kit);
|
||||
// The original jvms is for an allocation of either a String or
|
||||
// StringBuffer so no stack adjustment is necessary for proper
|
||||
// reexecution. If we deoptimize in the slow path the bytecode
|
||||
// will be reexecuted and the char[] allocation will be thrown away.
|
||||
kit.jvms()->set_should_reexecute(true);
|
||||
char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
|
||||
length, 1);
|
||||
}
|
||||
Node* result;
|
||||
if (!kit.stopped()) {
|
||||
|
||||
// Mark the allocation so that zeroing is skipped since the code
|
||||
// below will overwrite the entire array
|
||||
AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
|
||||
char_alloc->maybe_set_complete(_gvn);
|
||||
|
||||
// Now copy the string representations into the final char[]
|
||||
Node* start = __ intcon(0);
|
||||
for (int argi = 0; argi < sc->num_arguments(); argi++) {
|
||||
Node* arg = sc->argument(argi);
|
||||
switch (sc->mode(argi)) {
|
||||
case StringConcat::IntMode: {
|
||||
Node* end = __ AddI(start, string_sizes->in(argi));
|
||||
// getChars words backwards so pass the ending point as well as the start
|
||||
int_getChars(kit, arg, char_array, start, end);
|
||||
start = end;
|
||||
break;
|
||||
}
|
||||
case StringConcat::StringNullCheckMode:
|
||||
case StringConcat::StringMode: {
|
||||
start = copy_string(kit, arg, char_array, start);
|
||||
break;
|
||||
}
|
||||
case StringConcat::CharMode: {
|
||||
__ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
|
||||
arg, T_CHAR, char_adr_idx);
|
||||
start = __ AddI(start, __ intcon(1));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
// length now contains the number of characters needed for the
|
||||
// char[] so create a new AllocateArray for the char[]
|
||||
Node* char_array = NULL;
|
||||
{
|
||||
PreserveReexecuteState preexecs(&kit);
|
||||
// The original jvms is for an allocation of either a String or
|
||||
// StringBuffer so no stack adjustment is necessary for proper
|
||||
// reexecution. If we deoptimize in the slow path the bytecode
|
||||
// will be reexecuted and the char[] allocation will be thrown away.
|
||||
kit.jvms()->set_should_reexecute(true);
|
||||
char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
|
||||
length, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// If we're not reusing an existing String allocation then allocate one here.
|
||||
Node* result = sc->string_alloc();
|
||||
if (result == NULL) {
|
||||
PreserveReexecuteState preexecs(&kit);
|
||||
// The original jvms is for an allocation of either a String or
|
||||
// StringBuffer so no stack adjustment is necessary for proper
|
||||
// reexecution.
|
||||
kit.jvms()->set_should_reexecute(true);
|
||||
result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
|
||||
}
|
||||
// Mark the allocation so that zeroing is skipped since the code
|
||||
// below will overwrite the entire array
|
||||
AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
|
||||
char_alloc->maybe_set_complete(_gvn);
|
||||
|
||||
// Intialize the string
|
||||
if (java_lang_String::has_offset_field()) {
|
||||
kit.store_String_offset(kit.control(), result, __ intcon(0));
|
||||
kit.store_String_length(kit.control(), result, length);
|
||||
}
|
||||
kit.store_String_value(kit.control(), result, char_array);
|
||||
// Now copy the string representations into the final char[]
|
||||
Node* start = __ intcon(0);
|
||||
for (int argi = 0; argi < sc->num_arguments(); argi++) {
|
||||
Node* arg = sc->argument(argi);
|
||||
switch (sc->mode(argi)) {
|
||||
case StringConcat::IntMode: {
|
||||
Node* end = __ AddI(start, string_sizes->in(argi));
|
||||
// getChars words backwards so pass the ending point as well as the start
|
||||
int_getChars(kit, arg, char_array, start, end);
|
||||
start = end;
|
||||
break;
|
||||
}
|
||||
case StringConcat::StringNullCheckMode:
|
||||
case StringConcat::StringMode: {
|
||||
start = copy_string(kit, arg, char_array, start);
|
||||
break;
|
||||
}
|
||||
case StringConcat::CharMode: {
|
||||
__ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
|
||||
arg, T_CHAR, char_adr_idx);
|
||||
start = __ AddI(start, __ intcon(1));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
// If we're not reusing an existing String allocation then allocate one here.
|
||||
result = sc->string_alloc();
|
||||
if (result == NULL) {
|
||||
PreserveReexecuteState preexecs(&kit);
|
||||
// The original jvms is for an allocation of either a String or
|
||||
// StringBuffer so no stack adjustment is necessary for proper
|
||||
// reexecution.
|
||||
kit.jvms()->set_should_reexecute(true);
|
||||
result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
|
||||
}
|
||||
|
||||
// Intialize the string
|
||||
if (java_lang_String::has_offset_field()) {
|
||||
kit.store_String_offset(kit.control(), result, __ intcon(0));
|
||||
kit.store_String_length(kit.control(), result, length);
|
||||
}
|
||||
kit.store_String_value(kit.control(), result, char_array);
|
||||
} else {
|
||||
result = C->top();
|
||||
}
|
||||
// hook up the outgoing control and result
|
||||
kit.replace_call(sc->end(), result);
|
||||
|
||||
|
||||
@ -1168,8 +1168,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject
|
||||
// Walk all nmethods depending on this call site.
|
||||
MutexLocker mu(Compile_lock, thread);
|
||||
Universe::flush_dependents_on(call_site, target);
|
||||
java_lang_invoke_CallSite::set_target(call_site(), target());
|
||||
}
|
||||
java_lang_invoke_CallSite::set_target(call_site(), target());
|
||||
}
|
||||
JVM_END
|
||||
|
||||
@ -1180,8 +1180,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobjec
|
||||
// Walk all nmethods depending on this call site.
|
||||
MutexLocker mu(Compile_lock, thread);
|
||||
Universe::flush_dependents_on(call_site, target);
|
||||
java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
|
||||
}
|
||||
java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
|
||||
}
|
||||
JVM_END
|
||||
|
||||
|
||||
@ -468,6 +468,21 @@ UNSAFE_ENTRY(void, Unsafe_SetOrderedLong(JNIEnv *env, jobject unsafe, jobject ob
|
||||
#endif
|
||||
UNSAFE_END
|
||||
|
||||
UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe))
|
||||
UnsafeWrapper("Unsafe_LoadFence");
|
||||
OrderAccess::acquire();
|
||||
UNSAFE_END
|
||||
|
||||
UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe))
|
||||
UnsafeWrapper("Unsafe_StoreFence");
|
||||
OrderAccess::release();
|
||||
UNSAFE_END
|
||||
|
||||
UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe))
|
||||
UnsafeWrapper("Unsafe_FullFence");
|
||||
OrderAccess::fence();
|
||||
UNSAFE_END
|
||||
|
||||
////// Data in the C heap.
|
||||
|
||||
// Note: These do not throw NullPointerException for bad pointers.
|
||||
@ -1550,6 +1565,9 @@ static JNINativeMethod methods[] = {
|
||||
{CC"putOrderedObject", CC"("OBJ"J"OBJ")V", FN_PTR(Unsafe_SetOrderedObject)},
|
||||
{CC"putOrderedInt", CC"("OBJ"JI)V", FN_PTR(Unsafe_SetOrderedInt)},
|
||||
{CC"putOrderedLong", CC"("OBJ"JJ)V", FN_PTR(Unsafe_SetOrderedLong)},
|
||||
{CC"loadFence", CC"()V", FN_PTR(Unsafe_LoadFence)},
|
||||
{CC"storeFence", CC"()V", FN_PTR(Unsafe_StoreFence)},
|
||||
{CC"fullFence", CC"()V", FN_PTR(Unsafe_FullFence)},
|
||||
{CC"park", CC"(ZJ)V", FN_PTR(Unsafe_Park)},
|
||||
{CC"unpark", CC"("OBJ")V", FN_PTR(Unsafe_Unpark)}
|
||||
|
||||
|
||||
@ -922,6 +922,9 @@ class CommandLineFlags {
|
||||
develop(bool, PrintExceptionHandlers, false, \
|
||||
"Print exception handler tables for all nmethods when generated") \
|
||||
\
|
||||
develop(bool, StressCompiledExceptionHandlers, false, \
|
||||
"Exercise compiled exception handlers") \
|
||||
\
|
||||
develop(bool, InterceptOSException, false, \
|
||||
"Starts debugger when an implicit OS (e.g., NULL) " \
|
||||
"exception happens") \
|
||||
|
||||
@ -2190,7 +2190,7 @@ void JavaThread::send_thread_stop(oop java_throwable) {
|
||||
// BiasedLocking needs an updated RegisterMap for the revoke monitors pass
|
||||
RegisterMap reg_map(this, UseBiasedLocking);
|
||||
frame compiled_frame = f.sender(®_map);
|
||||
if (compiled_frame.can_be_deoptimized()) {
|
||||
if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) {
|
||||
Deoptimization::deoptimize(this, compiled_frame, ®_map);
|
||||
}
|
||||
}
|
||||
|
||||
@ -54,7 +54,6 @@ abstract public class TestAESBase {
|
||||
String paddingStr = "PKCS5Padding";
|
||||
AlgorithmParameters algParams;
|
||||
SecretKey key;
|
||||
int ivLen;
|
||||
|
||||
static int numThreads = 0;
|
||||
int threadId;
|
||||
@ -68,7 +67,7 @@ abstract public class TestAESBase {
|
||||
|
||||
public void prepare() {
|
||||
try {
|
||||
System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
|
||||
System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput);
|
||||
|
||||
int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
|
||||
byte keyBytes[] = new byte[keyLenBytes];
|
||||
@ -90,10 +89,14 @@ abstract public class TestAESBase {
|
||||
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
|
||||
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
|
||||
|
||||
ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
|
||||
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
|
||||
|
||||
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
|
||||
if (mode.equals("CBC")) {
|
||||
int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
|
||||
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
|
||||
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
|
||||
} else {
|
||||
algParams = cipher.getParameters();
|
||||
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
|
||||
}
|
||||
algParams = cipher.getParameters();
|
||||
dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
|
||||
if (threadId == 0) {
|
||||
|
||||
@ -27,7 +27,8 @@
|
||||
* @bug 7184394
|
||||
* @summary add intrinsics to use AES instructions
|
||||
*
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain
|
||||
*
|
||||
* @author Tom Deneau
|
||||
*/
|
||||
|
||||
94
hotspot/test/compiler/8004741/Test8004741.java
Normal file
94
hotspot/test/compiler/8004741/Test8004741.java
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test Test8004741.java
|
||||
* @bug 8004741
|
||||
* @summary Missing compiled exception handle table entry for multidimensional array allocation
|
||||
* @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741
|
||||
*
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class Test8004741 extends Thread {
|
||||
|
||||
static int[][] test(int a, int b) throws Exception {
|
||||
int[][] ar = null;
|
||||
try {
|
||||
ar = new int[a][b];
|
||||
} catch (Error e) {
|
||||
System.out.println("test got Error");
|
||||
passed = true;
|
||||
throw(e);
|
||||
} catch (Exception e) {
|
||||
System.out.println("test got Exception");
|
||||
throw(e);
|
||||
}
|
||||
return ar;
|
||||
}
|
||||
|
||||
static boolean passed = false;
|
||||
|
||||
public void run() {
|
||||
System.out.println("test started");
|
||||
try {
|
||||
while(true) {
|
||||
test(2,20000);
|
||||
}
|
||||
} catch (ThreadDeath e) {
|
||||
System.out.println("test got ThreadDeath");
|
||||
passed = true;
|
||||
} catch (Error e) {
|
||||
e.printStackTrace();
|
||||
System.out.println("test got Error");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.out.println("test got Exception");
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
for (int n = 0; n < 11000; n++) {
|
||||
test(2, 20);
|
||||
}
|
||||
|
||||
// First test exception catch
|
||||
Test8004741 t = new Test8004741();
|
||||
|
||||
passed = false;
|
||||
t.start();
|
||||
Thread.sleep(1000);
|
||||
t.stop();
|
||||
|
||||
Thread.sleep(5000);
|
||||
t.join();
|
||||
if (passed) {
|
||||
System.out.println("PASSED");
|
||||
} else {
|
||||
System.out.println("FAILED");
|
||||
System.exit(97);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
50
hotspot/test/compiler/8005033/Test8005033.java
Normal file
50
hotspot/test/compiler/8005033/Test8005033.java
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright 2012 SAP AG. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8005033
|
||||
* @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow.
|
||||
* @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033
|
||||
* @author Richard Reingruber richard DOT reingruber AT sap DOT com
|
||||
*/
|
||||
|
||||
public class Test8005033 {
|
||||
public static int MINUS_ONE = -1;
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("EXECUTING test.");
|
||||
Integer.bitCount(1); // load class
|
||||
int expectedBitCount = 0;
|
||||
int calculatedBitCount = testBitCount();
|
||||
if (expectedBitCount != calculatedBitCount) {
|
||||
throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount);
|
||||
}
|
||||
System.out.println("SUCCESSFULLY passed test.");
|
||||
}
|
||||
|
||||
// testBitCount will be compiled using the Integer.bitCount() intrinsic if possible
|
||||
private static int testBitCount() {
|
||||
return Integer.bitCount(MINUS_ONE+1); // -1 + 1 => int overflow
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user