8258134: assert(size == calc_size) failed: incorrect size calculation on x86_32 with AVX512 machines

Reviewed-by: kvn, thartmann
This commit is contained in:
Jie Fu 2020-12-18 12:50:19 +00:00
parent 38593a4f2a
commit 45a150b8dc
4 changed files with 21 additions and 113 deletions

View File

@ -374,7 +374,7 @@ OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
}
// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
extern int vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
#undef __
@ -435,13 +435,13 @@ private:
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
_spill_offset -= reg_data._size;
vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
vec_spill_helper(__ code(), false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
}
void xmm_register_restore(const XMMRegisterData& reg_data) {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
vec_spill_helper(__ code(), true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
_spill_offset += reg_data._size;
}

View File

@ -2129,18 +2129,14 @@ static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_
}
// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
assert(ireg == Op_VecS || // 32bit vector
(src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
"no non-adjacent vector moves" );
if (cbuf) {
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS: // copy whole register
case Op_VecD:
@ -2172,14 +2168,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
default:
ShouldNotReachHere();
}
int size = __ offset() - offset;
#ifdef ASSERT
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == 4, "incorrect size calculattion");
#endif
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
switch (ireg) {
case Op_VecS:
case Op_VecD:
@ -2195,18 +2185,12 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
}
#endif
}
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
return (UseAVX > 2) ? 6 : 4;
}
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
if (cbuf) {
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
if (is_load) {
switch (ireg) {
case Op_VecS:
@ -2284,15 +2268,8 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
ShouldNotReachHere();
}
}
int size = __ offset() - offset;
#ifdef ASSERT
int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
#endif
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
if (is_load) {
switch (ireg) {
case Op_VecS:
@ -2332,47 +2309,6 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
}
#endif
}
bool is_single_byte = false;
int vec_len = 0;
if ((UseAVX > 2) && (stack_offset != 0)) {
int tuple_type = Assembler::EVEX_FVM;
int input_size = Assembler::EVEX_32bit;
switch (ireg) {
case Op_VecS:
tuple_type = Assembler::EVEX_T1S;
break;
case Op_VecD:
tuple_type = Assembler::EVEX_T1S;
input_size = Assembler::EVEX_64bit;
break;
case Op_VecX:
break;
case Op_VecY:
vec_len = 1;
break;
case Op_VecZ:
vec_len = 2;
break;
}
is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
}
int offset_size = 0;
int size = 5;
if (UseAVX > 2 ) {
if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encoding
} else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
} else {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encodding
}
} else {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
}
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
return size+offset_size;
}
static inline jlong replicate8_imm(int con, int width) {

View File

@ -945,41 +945,16 @@ static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
}
// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);
static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
int dst_offset, uint ireg, outputStream* st) {
int calc_size = 0;
int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
switch (ireg) {
case Op_VecS:
calc_size = 3+src_offset_size + 3+dst_offset_size;
break;
case Op_VecD: {
calc_size = 3+src_offset_size + 3+dst_offset_size;
int tmp_src_offset = src_offset + 4;
int tmp_dst_offset = dst_offset + 4;
src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
calc_size += 3+src_offset_size + 3+dst_offset_size;
break;
}
case Op_VecX:
case Op_VecY:
case Op_VecZ:
calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
break;
default:
ShouldNotReachHere();
}
if (cbuf) {
MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS:
__ pushl(Address(rsp, src_offset));
@ -1012,11 +987,8 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
default:
ShouldNotReachHere();
}
int size = __ offset() - offset;
assert(size == calc_size, "incorrect size calculation");
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
switch (ireg) {
case Op_VecS:
st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
@ -1056,7 +1028,6 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
}
#endif
}
return calc_size;
}
uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
@ -1088,18 +1059,19 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
// mem -> mem
int src_offset = ra_->reg2offset(src_first);
int dst_offset = ra_->reg2offset(dst_first);
return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
int stack_offset = ra_->reg2offset(src_first);
return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}
return 0;
}
// --------------------------------------

View File

@ -1033,10 +1033,10 @@ static enum RC rc_class(OptoReg::Name reg)
}
// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
@ -1151,13 +1151,13 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
int dst_offset = ra_->reg2offset(dst_first);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
int stack_offset = ra_->reg2offset(src_first);
vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}