mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-14 18:03:44 +00:00
8353192: C2: Clean up x86 backend after 32-bit x86 removal
Reviewed-by: kvn, epeter
This commit is contained in:
parent
9ee5590328
commit
250eb743c1
@ -43,22 +43,8 @@ void C2SafepointPollStub::emit(C2_MacroAssembler& masm) {
|
||||
|
||||
__ bind(entry());
|
||||
InternalAddress safepoint_pc(masm.pc() - masm.offset() + _safepoint_offset);
|
||||
#ifdef _LP64
|
||||
__ lea(rscratch1, safepoint_pc);
|
||||
__ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
|
||||
#else
|
||||
const Register tmp1 = rcx;
|
||||
const Register tmp2 = rdx;
|
||||
__ push(tmp1);
|
||||
__ push(tmp2);
|
||||
|
||||
__ lea(tmp1, safepoint_pc);
|
||||
__ get_thread(tmp2);
|
||||
__ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
|
||||
|
||||
__ pop(tmp2);
|
||||
__ pop(tmp1);
|
||||
#endif
|
||||
__ jump(callback_addr);
|
||||
}
|
||||
|
||||
|
||||
@ -107,16 +107,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
|
||||
movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
// If method sets FPU control word do it now
|
||||
if (fp_mode_24b) {
|
||||
fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
|
||||
}
|
||||
if (UseSSE >= 2 && VerifyFPU) {
|
||||
verify_FPU(0, "FPU stack must be clean on entry");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ASSERT
|
||||
if (VerifyStackAtCalls) {
|
||||
Label L;
|
||||
@ -133,7 +123,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
|
||||
|
||||
if (!is_stub) {
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
#ifdef _LP64
|
||||
// We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
|
||||
Label dummy_slow_path;
|
||||
Label dummy_continuation;
|
||||
@ -147,10 +136,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
|
||||
continuation = &stub->continuation();
|
||||
}
|
||||
bs->nmethod_entry_barrier(this, slow_path, continuation);
|
||||
#else
|
||||
// Don't bother with out-of-line nmethod entry barrier stub for x86_32.
|
||||
bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -299,7 +284,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
|
||||
// Locked by current thread if difference with current SP is less than one page.
|
||||
subptr(tmpReg, rsp);
|
||||
// Next instruction set ZFlag == 1 (Success) if difference is less then one page.
|
||||
andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
|
||||
andptr(tmpReg, (int32_t) (7 - (int)os::vm_page_size()) );
|
||||
movptr(Address(boxReg, 0), tmpReg);
|
||||
}
|
||||
jmp(DONE_LABEL);
|
||||
@ -307,10 +292,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
|
||||
bind(IsInflated);
|
||||
// The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
|
||||
|
||||
#ifndef _LP64
|
||||
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
|
||||
orl(boxReg, 1); // set ICC.ZF=0 to indicate failure
|
||||
#else
|
||||
// Unconditionally set box->_displaced_header = markWord::unused_mark().
|
||||
// Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
|
||||
movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
|
||||
@ -329,7 +310,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
|
||||
jccb(Assembler::notEqual, NO_COUNT); // If not recursive, ZF = 0 at this point (fail)
|
||||
incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
|
||||
#endif // _LP64
|
||||
bind(DONE_LABEL);
|
||||
|
||||
// ZFlag == 1 count in fast path
|
||||
@ -338,10 +318,8 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
|
||||
|
||||
bind(COUNT);
|
||||
if (LockingMode == LM_LEGACY) {
|
||||
#ifdef _LP64
|
||||
// Count monitors in fast path
|
||||
increment(Address(thread, JavaThread::held_monitor_count_offset()));
|
||||
#endif
|
||||
}
|
||||
xorl(tmpReg, tmpReg); // Set ZF == 1
|
||||
|
||||
@ -404,11 +382,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
|
||||
|
||||
// It's inflated.
|
||||
|
||||
#ifndef _LP64
|
||||
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
|
||||
orl(boxReg, 1); // set ICC.ZF=0 to indicate failure
|
||||
jmpb(DONE_LABEL);
|
||||
#else
|
||||
// Despite our balanced locking property we still check that m->_owner == Self
|
||||
// as java routines or native JNI code called by this thread might
|
||||
// have released the lock.
|
||||
@ -462,7 +435,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
|
||||
bind (LSuccess);
|
||||
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
|
||||
jmpb (DONE_LABEL);
|
||||
#endif // _LP64
|
||||
|
||||
if (LockingMode == LM_LEGACY) {
|
||||
bind (Stacked);
|
||||
@ -482,9 +454,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
|
||||
|
||||
if (LockingMode == LM_LEGACY) {
|
||||
// Count monitors in fast path
|
||||
#ifdef _LP64
|
||||
decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
|
||||
#endif
|
||||
}
|
||||
|
||||
xorl(tmpReg, tmpReg); // Set ZF == 1
|
||||
@ -563,11 +533,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
|
||||
{ // Handle inflated monitor.
|
||||
bind(inflated);
|
||||
|
||||
#ifndef _LP64
|
||||
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
|
||||
orl(box, 1); // set ICC.ZF=0 to indicate failure
|
||||
jmpb(slow_path);
|
||||
#else
|
||||
const Register monitor = t;
|
||||
|
||||
if (!UseObjectMonitorTable) {
|
||||
@ -633,7 +598,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
|
||||
increment(recursions_address);
|
||||
|
||||
bind(monitor_locked);
|
||||
#endif // _LP64
|
||||
}
|
||||
|
||||
bind(locked);
|
||||
@ -746,11 +710,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
|
||||
|
||||
bind(inflated);
|
||||
|
||||
#ifndef _LP64
|
||||
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
|
||||
orl(t, 1); // set ICC.ZF=0 to indicate failure
|
||||
jmpb(slow_path);
|
||||
#else
|
||||
if (!UseObjectMonitorTable) {
|
||||
assert(mark == monitor, "should be the same here");
|
||||
} else {
|
||||
@ -800,7 +759,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
|
||||
// Recursive unlock.
|
||||
bind(recursive);
|
||||
decrement(recursions_address);
|
||||
#endif // _LP64
|
||||
}
|
||||
|
||||
bind(unlocked);
|
||||
@ -1522,7 +1480,6 @@ void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt,
|
||||
XMMRegister dst, Register base,
|
||||
Register idx_base,
|
||||
@ -1561,7 +1518,6 @@ void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // _LP64
|
||||
|
||||
void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst,
|
||||
Register base, Register idx_base,
|
||||
@ -1633,7 +1589,7 @@ void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst,
|
||||
if (mask == noreg) {
|
||||
vgather8b_offset(elem_ty, temp_dst, base, idx_base, offset, rtmp, vlen_enc);
|
||||
} else {
|
||||
LP64_ONLY(vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc));
|
||||
vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc);
|
||||
}
|
||||
// TEMP_PERM_VEC(temp_dst) = PERMUTE TMP_VEC_64(temp_dst) PERM_INDEX(xtmp1)
|
||||
vpermd(temp_dst, xtmp1, temp_dst, vlen_enc == Assembler::AVX_512bit ? vlen_enc : Assembler::AVX_256bit);
|
||||
@ -2037,7 +1993,6 @@ void C2_MacroAssembler::reduceI(int opcode, int vlen,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::reduceL(int opcode, int vlen,
|
||||
Register dst, Register src1, XMMRegister src2,
|
||||
XMMRegister vtmp1, XMMRegister vtmp2) {
|
||||
@ -2049,7 +2004,6 @@ void C2_MacroAssembler::reduceL(int opcode, int vlen,
|
||||
default: assert(false, "wrong vector length");
|
||||
}
|
||||
}
|
||||
#endif // _LP64
|
||||
|
||||
void C2_MacroAssembler::reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
|
||||
switch (vlen) {
|
||||
@ -2299,7 +2253,6 @@ void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRe
|
||||
reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
|
||||
pshufd(vtmp2, src2, 0xE);
|
||||
reduce_operation_128(T_LONG, opcode, vtmp2, src2);
|
||||
@ -2325,7 +2278,6 @@ void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
|
||||
bzhiq(temp, temp, len);
|
||||
kmovql(dst, temp);
|
||||
}
|
||||
#endif // _LP64
|
||||
|
||||
void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
|
||||
reduce_operation_128(T_FLOAT, opcode, dst, src);
|
||||
@ -2741,7 +2693,6 @@ void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister sr
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc) {
|
||||
assert(UseAVX >= 2, "required");
|
||||
bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT));
|
||||
@ -2770,7 +2721,6 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc) {
|
||||
switch (to_elem_bt) {
|
||||
@ -3698,7 +3648,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
XMMRegister vec1, int ae, KRegister mask) {
|
||||
ShortBranchVerifier sbv(this);
|
||||
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only AVX3
|
||||
int stride, stride2, adr_stride, adr_stride1, adr_stride2;
|
||||
int stride2x2 = 0x40;
|
||||
Address::ScaleFactor scale = Address::no_scale;
|
||||
@ -3768,7 +3718,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_AVX2;
|
||||
Label COMPARE_TAIL_LONG;
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only AVX3
|
||||
|
||||
int pcmpmask = 0x19;
|
||||
if (ae == StrIntrinsicNode::LL) {
|
||||
@ -3838,7 +3788,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP);
|
||||
|
||||
#ifdef _LP64
|
||||
if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
|
||||
cmpl(cnt2, stride2x2);
|
||||
jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
|
||||
@ -3862,8 +3811,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
vpxor(vec1, vec1);
|
||||
jmpb(COMPARE_WIDE_TAIL);
|
||||
}//if (VM_Version::supports_avx512vlbw())
|
||||
#endif // _LP64
|
||||
|
||||
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
|
||||
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
|
||||
@ -4032,7 +3979,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
}
|
||||
jmpb(DONE_LABEL);
|
||||
|
||||
#ifdef _LP64
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
|
||||
@ -4058,7 +4004,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
subl(result, cnt1);
|
||||
jmpb(POP_LABEL);
|
||||
}//if (VM_Version::supports_avx512vlbw())
|
||||
#endif // _LP64
|
||||
|
||||
// Discard the stored length difference
|
||||
bind(POP_LABEL);
|
||||
@ -4133,7 +4078,6 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len,
|
||||
|
||||
// check the tail for absense of negatives
|
||||
// ~(~0 << len) applied up to two times (for 32-bit scenario)
|
||||
#ifdef _LP64
|
||||
{
|
||||
Register tmp3_aliased = len;
|
||||
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
|
||||
@ -4141,33 +4085,7 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len,
|
||||
notq(tmp3_aliased);
|
||||
kmovql(mask2, tmp3_aliased);
|
||||
}
|
||||
#else
|
||||
Label k_init;
|
||||
jmp(k_init);
|
||||
|
||||
// We could not read 64-bits from a general purpose register thus we move
|
||||
// data required to compose 64 1's to the instruction stream
|
||||
// We emit 64 byte wide series of elements from 0..63 which later on would
|
||||
// be used as a compare targets with tail count contained in tmp1 register.
|
||||
// Result would be a k register having tmp1 consecutive number or 1
|
||||
// counting from least significant bit.
|
||||
address tmp = pc();
|
||||
emit_int64(0x0706050403020100);
|
||||
emit_int64(0x0F0E0D0C0B0A0908);
|
||||
emit_int64(0x1716151413121110);
|
||||
emit_int64(0x1F1E1D1C1B1A1918);
|
||||
emit_int64(0x2726252423222120);
|
||||
emit_int64(0x2F2E2D2C2B2A2928);
|
||||
emit_int64(0x3736353433323130);
|
||||
emit_int64(0x3F3E3D3C3B3A3938);
|
||||
|
||||
bind(k_init);
|
||||
lea(len, InternalAddress(tmp));
|
||||
// create mask to test for negative byte inside a vector
|
||||
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
|
||||
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);
|
||||
|
||||
#endif
|
||||
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
|
||||
ktestq(mask1, mask2);
|
||||
jcc(Assembler::zero, DONE);
|
||||
@ -4414,7 +4332,6 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
|
||||
lea(ary2, Address(ary2, limit, Address::times_1));
|
||||
negptr(limit);
|
||||
|
||||
#ifdef _LP64
|
||||
if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3;
|
||||
|
||||
@ -4451,7 +4368,7 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
|
||||
|
||||
}//if (VM_Version::supports_avx512vlbw())
|
||||
#endif //_LP64
|
||||
|
||||
bind(COMPARE_WIDE_VECTORS);
|
||||
vmovdqu(vec1, Address(ary1, limit, scaleFactor));
|
||||
if (expand_ary2) {
|
||||
@ -4618,8 +4535,6 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
|
||||
static void convertF2I_slowpath(C2_MacroAssembler& masm, C2GeneralStub<Register, XMMRegister, address>& stub) {
|
||||
#define __ masm.
|
||||
Register dst = stub.data<0>();
|
||||
@ -4666,8 +4581,6 @@ void C2_MacroAssembler::convertF2I(BasicType dst_bt, BasicType src_bt, Register
|
||||
bind(stub->continuation());
|
||||
}
|
||||
|
||||
#endif // _LP64
|
||||
|
||||
void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
|
||||
XMMRegister src1, int imm8, bool merge, int vlen_enc) {
|
||||
switch(ideal_opc) {
|
||||
@ -5327,7 +5240,6 @@ void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister ds
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src,
|
||||
AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
|
||||
Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2) {
|
||||
@ -5379,7 +5291,6 @@ void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src,
|
||||
|
||||
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
|
||||
}
|
||||
#endif // _LP64
|
||||
|
||||
void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
|
||||
BasicType from_elem_bt, BasicType to_elem_bt) {
|
||||
@ -5510,7 +5421,6 @@ void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XM
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
|
||||
Register rtmp2, XMMRegister xtmp, int mask_len,
|
||||
int vec_enc) {
|
||||
@ -5768,7 +5678,6 @@ void C2_MacroAssembler::vector_compress_expand(int opcode, XMMRegister dst, XMMR
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||
KRegister ktmp1, int vec_enc) {
|
||||
@ -5833,10 +5742,8 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
|
||||
|
||||
void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc) {
|
||||
int lane_size = type2aelembytes(bt);
|
||||
bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
|
||||
if ((is_LP64 || lane_size < 8) &&
|
||||
((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) ||
|
||||
(is_subword_type(bt) && VM_Version::supports_avx512vlbw()))) {
|
||||
if ((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) ||
|
||||
(is_subword_type(bt) && VM_Version::supports_avx512vlbw())) {
|
||||
movptr(rtmp, imm32);
|
||||
switch(lane_size) {
|
||||
case 1 : evpbroadcastb(dst, rtmp, vec_enc); break;
|
||||
@ -5848,7 +5755,7 @@ void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Reg
|
||||
}
|
||||
} else {
|
||||
movptr(rtmp, imm32);
|
||||
LP64_ONLY(movq(dst, rtmp)) NOT_LP64(movdl(dst, rtmp));
|
||||
movq(dst, rtmp);
|
||||
switch(lane_size) {
|
||||
case 1 : vpbroadcastb(dst, dst, vec_enc); break;
|
||||
case 2 : vpbroadcastw(dst, dst, vec_enc); break;
|
||||
@ -5983,14 +5890,6 @@ void C2_MacroAssembler::vector_popcount_integral_evex(BasicType bt, XMMRegister
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
void C2_MacroAssembler::vector_maskall_operation32(KRegister dst, Register src, KRegister tmp, int mask_len) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
kmovdl(tmp, src);
|
||||
kunpckdql(dst, tmp, tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bit reversal algorithm first reverses the bits of each byte followed by
|
||||
// a byte level reversal for multi-byte primitive types (short/int/long).
|
||||
// Algorithm performs a lookup table access to get reverse bit sequence
|
||||
@ -6450,7 +6349,6 @@ void C2_MacroAssembler::udivmodI(Register rax, Register divisor, Register rdx, R
|
||||
bind(done);
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::reverseI(Register dst, Register src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp) {
|
||||
if(VM_Version::supports_gfni()) {
|
||||
@ -6614,7 +6512,6 @@ void C2_MacroAssembler::udivmodL(Register rax, Register divisor, Register rdx, R
|
||||
subq(rdx, tmp); // remainder
|
||||
bind(done);
|
||||
}
|
||||
#endif
|
||||
|
||||
void C2_MacroAssembler::rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp,
|
||||
|
||||
@ -130,9 +130,7 @@ public:
|
||||
|
||||
// Covert B2X
|
||||
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
|
||||
#ifdef _LP64
|
||||
void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
|
||||
#endif
|
||||
|
||||
// blend
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
|
||||
@ -152,10 +150,8 @@ public:
|
||||
|
||||
// dst = src1 reduce(op, src2) using vtmp as temps
|
||||
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#ifdef _LP64
|
||||
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void genmask(KRegister dst, Register len, Register temp);
|
||||
#endif // _LP64
|
||||
|
||||
// dst = reduce(op, src2) using vtmp as temps
|
||||
void reduce_fp(int opcode, int vlen,
|
||||
@ -202,11 +198,9 @@ public:
|
||||
void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
// Long Reduction
|
||||
#ifdef _LP64
|
||||
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#endif // _LP64
|
||||
|
||||
// Float Reduction
|
||||
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
@ -237,7 +231,6 @@ public:
|
||||
void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
public:
|
||||
#ifdef _LP64
|
||||
void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
|
||||
|
||||
void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
|
||||
@ -246,14 +239,9 @@ public:
|
||||
Register tmp, int masklen, BasicType bt, int vec_enc);
|
||||
void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
|
||||
Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
|
||||
#endif
|
||||
|
||||
void vector_maskall_operation(KRegister dst, Register src, int mask_len);
|
||||
|
||||
#ifndef _LP64
|
||||
void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len);
|
||||
#endif
|
||||
|
||||
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
||||
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
|
||||
|
||||
@ -313,9 +301,7 @@ public:
|
||||
void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
|
||||
void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
|
||||
|
||||
#ifdef _LP64
|
||||
void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
|
||||
#endif
|
||||
|
||||
void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
|
||||
XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
@ -390,7 +376,6 @@ public:
|
||||
|
||||
void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
|
||||
|
||||
#ifdef _LP64
|
||||
void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
|
||||
Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
|
||||
|
||||
@ -403,13 +388,11 @@ public:
|
||||
void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
|
||||
Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
|
||||
BasicType bt, int vec_enc);
|
||||
#endif // _LP64
|
||||
|
||||
void udivI(Register rax, Register divisor, Register rdx);
|
||||
void umodI(Register rax, Register divisor, Register rdx);
|
||||
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
||||
|
||||
#ifdef _LP64
|
||||
void reverseI(Register dst, Register src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp);
|
||||
void reverseL(Register dst, Register src, XMMRegister xtmp1,
|
||||
@ -417,7 +400,6 @@ public:
|
||||
void udivL(Register rax, Register divisor, Register rdx);
|
||||
void umodL(Register rax, Register divisor, Register rdx);
|
||||
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
||||
#endif
|
||||
|
||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
|
||||
bool merge, BasicType bt, int vlen_enc);
|
||||
@ -511,10 +493,9 @@ public:
|
||||
Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
||||
Register midx, Register length, int vector_len, int vlen_enc);
|
||||
|
||||
#ifdef _LP64
|
||||
void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
|
||||
Register offset, Register mask, Register midx, Register rtmp, int vlen_enc);
|
||||
#endif
|
||||
|
||||
void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
|
||||
Register offset, Register rtmp, int vlen_enc);
|
||||
|
||||
|
||||
@ -59,53 +59,34 @@
|
||||
static constexpr bool isSimpleConstant64(jlong value) {
|
||||
// Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
|
||||
//return value == (int) value; // Cf. storeImmL and immL32.
|
||||
|
||||
// Probably always true, even if a temp register is required.
|
||||
#ifdef _LP64
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
// No additional cost for CMOVL.
|
||||
static constexpr int long_cmove_cost() { return 0; }
|
||||
#else
|
||||
// Needs 2 CMOV's for longs.
|
||||
static constexpr int long_cmove_cost() { return 1; }
|
||||
#endif
|
||||
|
||||
#ifdef _LP64
|
||||
// No CMOVF/CMOVD with SSE2
|
||||
static int float_cmove_cost() { return ConditionalMoveLimit; }
|
||||
#else
|
||||
// No CMOVF/CMOVD with SSE/SSE2
|
||||
static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
|
||||
#endif
|
||||
|
||||
static bool narrow_oop_use_complex_address() {
|
||||
NOT_LP64(ShouldNotCallThis();)
|
||||
assert(UseCompressedOops, "only for compressed oops code");
|
||||
return (LogMinObjAlignmentInBytes <= 3);
|
||||
}
|
||||
|
||||
static bool narrow_klass_use_complex_address() {
|
||||
NOT_LP64(ShouldNotCallThis();)
|
||||
assert(UseCompressedClassPointers, "only for compressed klass code");
|
||||
return (CompressedKlassPointers::shift() <= 3);
|
||||
}
|
||||
|
||||
// Prefer ConN+DecodeN over ConP.
|
||||
static bool const_oop_prefer_decode() {
|
||||
NOT_LP64(ShouldNotCallThis();)
|
||||
// Prefer ConN+DecodeN over ConP.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Prefer ConP over ConNKlass+DecodeNKlass.
|
||||
static bool const_klass_prefer_decode() {
|
||||
NOT_LP64(ShouldNotCallThis();)
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -123,24 +104,12 @@
|
||||
|
||||
// Are floats converted to double when stored to stack during deoptimization?
|
||||
// On x64 it is stored without conversion so we can use normal access.
|
||||
// On x32 it is stored with conversion only when FPU is used for floats.
|
||||
#ifdef _LP64
|
||||
static constexpr bool float_in_double() {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static bool float_in_double() {
|
||||
return (UseSSE == 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Do ints take an entire long register or just half?
|
||||
#ifdef _LP64
|
||||
static const bool int_in_long = true;
|
||||
#else
|
||||
static const bool int_in_long = false;
|
||||
#endif
|
||||
|
||||
|
||||
// Does the CPU supports vector variable shift instructions?
|
||||
static bool supports_vector_variable_shifts(void) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -751,12 +751,11 @@ bool ArchDesc::check_usage() {
|
||||
callback.do_form_by_name("sRegL");
|
||||
|
||||
// special generic vector operands only used in Matcher::pd_specialize_generic_vector_operand
|
||||
// x86_32 combine x86.ad and x86_32.ad, the vec*/legVec* can not be cleaned from IA32
|
||||
#if defined(AARCH64)
|
||||
callback.do_form_by_name("vecA");
|
||||
callback.do_form_by_name("vecD");
|
||||
callback.do_form_by_name("vecX");
|
||||
#elif defined(IA32) || defined(AMD64)
|
||||
#elif defined(AMD64)
|
||||
callback.do_form_by_name("vecS");
|
||||
callback.do_form_by_name("vecD");
|
||||
callback.do_form_by_name("vecX");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user