8353192: C2: Clean up x86 backend after 32-bit x86 removal

Reviewed-by: kvn, epeter
This commit is contained in:
Aleksey Shipilev 2025-04-09 08:22:48 +00:00
parent 9ee5590328
commit 250eb743c1
6 changed files with 73 additions and 492 deletions

View File

@ -43,22 +43,8 @@ void C2SafepointPollStub::emit(C2_MacroAssembler& masm) {
__ bind(entry());
InternalAddress safepoint_pc(masm.pc() - masm.offset() + _safepoint_offset);
#ifdef _LP64
__ lea(rscratch1, safepoint_pc);
__ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
#else
const Register tmp1 = rcx;
const Register tmp2 = rdx;
__ push(tmp1);
__ push(tmp2);
__ lea(tmp1, safepoint_pc);
__ get_thread(tmp2);
__ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
__ pop(tmp2);
__ pop(tmp1);
#endif
__ jump(callback_addr);
}

View File

@ -107,16 +107,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
}
#ifndef _LP64
// If method sets FPU control word do it now
if (fp_mode_24b) {
fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
}
if (UseSSE >= 2 && VerifyFPU) {
verify_FPU(0, "FPU stack must be clean on entry");
}
#endif
#ifdef ASSERT
if (VerifyStackAtCalls) {
Label L;
@ -133,7 +123,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
if (!is_stub) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
#ifdef _LP64
// We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
Label dummy_slow_path;
Label dummy_continuation;
@ -147,10 +136,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
continuation = &stub->continuation();
}
bs->nmethod_entry_barrier(this, slow_path, continuation);
#else
// Don't bother with out-of-line nmethod entry barrier stub for x86_32.
bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */);
#endif
}
}
@ -299,7 +284,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
// Locked by current thread if difference with current SP is less than one page.
subptr(tmpReg, rsp);
// Next instruction set ZFlag == 1 (Success) if difference is less then one page.
andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
andptr(tmpReg, (int32_t) (7 - (int)os::vm_page_size()) );
movptr(Address(boxReg, 0), tmpReg);
}
jmp(DONE_LABEL);
@ -307,10 +292,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
bind(IsInflated);
// The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
#ifndef _LP64
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
orl(boxReg, 1); // set ICC.ZF=0 to indicate failure
#else
// Unconditionally set box->_displaced_header = markWord::unused_mark().
// Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
@ -329,7 +310,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
jccb(Assembler::notEqual, NO_COUNT); // If not recursive, ZF = 0 at this point (fail)
incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
#endif // _LP64
bind(DONE_LABEL);
// ZFlag == 1 count in fast path
@ -338,10 +318,8 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
bind(COUNT);
if (LockingMode == LM_LEGACY) {
#ifdef _LP64
// Count monitors in fast path
increment(Address(thread, JavaThread::held_monitor_count_offset()));
#endif
}
xorl(tmpReg, tmpReg); // Set ZF == 1
@ -404,11 +382,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
// It's inflated.
#ifndef _LP64
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
orl(boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb(DONE_LABEL);
#else
// Despite our balanced locking property we still check that m->_owner == Self
// as java routines or native JNI code called by this thread might
// have released the lock.
@ -462,7 +435,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
bind (LSuccess);
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
#endif // _LP64
if (LockingMode == LM_LEGACY) {
bind (Stacked);
@ -482,9 +454,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
if (LockingMode == LM_LEGACY) {
// Count monitors in fast path
#ifdef _LP64
decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
#endif
}
xorl(tmpReg, tmpReg); // Set ZF == 1
@ -563,11 +533,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
{ // Handle inflated monitor.
bind(inflated);
#ifndef _LP64
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
orl(box, 1); // set ICC.ZF=0 to indicate failure
jmpb(slow_path);
#else
const Register monitor = t;
if (!UseObjectMonitorTable) {
@ -633,7 +598,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
increment(recursions_address);
bind(monitor_locked);
#endif // _LP64
}
bind(locked);
@ -746,11 +710,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
bind(inflated);
#ifndef _LP64
// Just take slow path to avoid dealing with 64 bit atomic instructions here.
orl(t, 1); // set ICC.ZF=0 to indicate failure
jmpb(slow_path);
#else
if (!UseObjectMonitorTable) {
assert(mark == monitor, "should be the same here");
} else {
@ -800,7 +759,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
// Recursive unlock.
bind(recursive);
decrement(recursions_address);
#endif // _LP64
}
bind(unlocked);
@ -1522,7 +1480,6 @@ void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src,
}
}
#ifdef _LP64
void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt,
XMMRegister dst, Register base,
Register idx_base,
@ -1561,7 +1518,6 @@ void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt,
}
}
}
#endif // _LP64
void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst,
Register base, Register idx_base,
@ -1633,7 +1589,7 @@ void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst,
if (mask == noreg) {
vgather8b_offset(elem_ty, temp_dst, base, idx_base, offset, rtmp, vlen_enc);
} else {
LP64_ONLY(vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc));
vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc);
}
// TEMP_PERM_VEC(temp_dst) = PERMUTE TMP_VEC_64(temp_dst) PERM_INDEX(xtmp1)
vpermd(temp_dst, xtmp1, temp_dst, vlen_enc == Assembler::AVX_512bit ? vlen_enc : Assembler::AVX_256bit);
@ -2037,7 +1993,6 @@ void C2_MacroAssembler::reduceI(int opcode, int vlen,
}
}
#ifdef _LP64
void C2_MacroAssembler::reduceL(int opcode, int vlen,
Register dst, Register src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2) {
@ -2049,7 +2004,6 @@ void C2_MacroAssembler::reduceL(int opcode, int vlen,
default: assert(false, "wrong vector length");
}
}
#endif // _LP64
void C2_MacroAssembler::reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
switch (vlen) {
@ -2299,7 +2253,6 @@ void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRe
reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
}
#ifdef _LP64
void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
pshufd(vtmp2, src2, 0xE);
reduce_operation_128(T_LONG, opcode, vtmp2, src2);
@ -2325,7 +2278,6 @@ void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
bzhiq(temp, temp, len);
kmovql(dst, temp);
}
#endif // _LP64
void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
reduce_operation_128(T_FLOAT, opcode, dst, src);
@ -2741,7 +2693,6 @@ void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister sr
}
}
#ifdef _LP64
void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc) {
assert(UseAVX >= 2, "required");
bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT));
@ -2770,7 +2721,6 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register
}
}
}
#endif
void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc) {
switch (to_elem_bt) {
@ -3698,7 +3648,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
XMMRegister vec1, int ae, KRegister mask) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only AVX3
int stride, stride2, adr_stride, adr_stride1, adr_stride2;
int stride2x2 = 0x40;
Address::ScaleFactor scale = Address::no_scale;
@ -3768,7 +3718,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
Label COMPARE_WIDE_VECTORS_LOOP_AVX2;
Label COMPARE_TAIL_LONG;
Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only AVX3
int pcmpmask = 0x19;
if (ae == StrIntrinsicNode::LL) {
@ -3838,7 +3788,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
bind(COMPARE_WIDE_VECTORS_LOOP);
#ifdef _LP64
if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
cmpl(cnt2, stride2x2);
jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
@ -3862,8 +3811,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
vpxor(vec1, vec1);
jmpb(COMPARE_WIDE_TAIL);
}//if (VM_Version::supports_avx512vlbw())
#endif // _LP64
bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
@ -4032,7 +3979,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
}
jmpb(DONE_LABEL);
#ifdef _LP64
if (VM_Version::supports_avx512vlbw()) {
bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
@ -4058,7 +4004,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
subl(result, cnt1);
jmpb(POP_LABEL);
}//if (VM_Version::supports_avx512vlbw())
#endif // _LP64
// Discard the stored length difference
bind(POP_LABEL);
@ -4133,7 +4078,6 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len,
// check the tail for absense of negatives
// ~(~0 << len) applied up to two times (for 32-bit scenario)
#ifdef _LP64
{
Register tmp3_aliased = len;
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
@ -4141,33 +4085,7 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len,
notq(tmp3_aliased);
kmovql(mask2, tmp3_aliased);
}
#else
Label k_init;
jmp(k_init);
// We could not read 64-bits from a general purpose register thus we move
// data required to compose 64 1's to the instruction stream
// We emit 64 byte wide series of elements from 0..63 which later on would
// be used as a compare targets with tail count contained in tmp1 register.
// Result would be a k register having tmp1 consecutive number or 1
// counting from least significant bit.
address tmp = pc();
emit_int64(0x0706050403020100);
emit_int64(0x0F0E0D0C0B0A0908);
emit_int64(0x1716151413121110);
emit_int64(0x1F1E1D1C1B1A1918);
emit_int64(0x2726252423222120);
emit_int64(0x2F2E2D2C2B2A2928);
emit_int64(0x3736353433323130);
emit_int64(0x3F3E3D3C3B3A3938);
bind(k_init);
lea(len, InternalAddress(tmp));
// create mask to test for negative byte inside a vector
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);
#endif
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(mask1, mask2);
jcc(Assembler::zero, DONE);
@ -4414,7 +4332,6 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
lea(ary2, Address(ary2, limit, Address::times_1));
negptr(limit);
#ifdef _LP64
if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3;
@ -4451,7 +4368,7 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
}//if (VM_Version::supports_avx512vlbw())
#endif //_LP64
bind(COMPARE_WIDE_VECTORS);
vmovdqu(vec1, Address(ary1, limit, scaleFactor));
if (expand_ary2) {
@ -4618,8 +4535,6 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
}
}
#ifdef _LP64
static void convertF2I_slowpath(C2_MacroAssembler& masm, C2GeneralStub<Register, XMMRegister, address>& stub) {
#define __ masm.
Register dst = stub.data<0>();
@ -4666,8 +4581,6 @@ void C2_MacroAssembler::convertF2I(BasicType dst_bt, BasicType src_bt, Register
bind(stub->continuation());
}
#endif // _LP64
void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
XMMRegister src1, int imm8, bool merge, int vlen_enc) {
switch(ideal_opc) {
@ -5327,7 +5240,6 @@ void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister ds
}
}
#ifdef _LP64
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src,
AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2) {
@ -5379,7 +5291,6 @@ void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src,
ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/);
}
#endif // _LP64
void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
BasicType from_elem_bt, BasicType to_elem_bt) {
@ -5510,7 +5421,6 @@ void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XM
}
}
#ifdef _LP64
void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
Register rtmp2, XMMRegister xtmp, int mask_len,
int vec_enc) {
@ -5768,7 +5678,6 @@ void C2_MacroAssembler::vector_compress_expand(int opcode, XMMRegister dst, XMMR
}
}
}
#endif
void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
KRegister ktmp1, int vec_enc) {
@ -5833,10 +5742,8 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc) {
int lane_size = type2aelembytes(bt);
bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
if ((is_LP64 || lane_size < 8) &&
((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) ||
(is_subword_type(bt) && VM_Version::supports_avx512vlbw()))) {
if ((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) ||
(is_subword_type(bt) && VM_Version::supports_avx512vlbw())) {
movptr(rtmp, imm32);
switch(lane_size) {
case 1 : evpbroadcastb(dst, rtmp, vec_enc); break;
@ -5848,7 +5755,7 @@ void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Reg
}
} else {
movptr(rtmp, imm32);
LP64_ONLY(movq(dst, rtmp)) NOT_LP64(movdl(dst, rtmp));
movq(dst, rtmp);
switch(lane_size) {
case 1 : vpbroadcastb(dst, dst, vec_enc); break;
case 2 : vpbroadcastw(dst, dst, vec_enc); break;
@ -5983,14 +5890,6 @@ void C2_MacroAssembler::vector_popcount_integral_evex(BasicType bt, XMMRegister
}
}
#ifndef _LP64
void C2_MacroAssembler::vector_maskall_operation32(KRegister dst, Register src, KRegister tmp, int mask_len) {
assert(VM_Version::supports_avx512bw(), "");
kmovdl(tmp, src);
kunpckdql(dst, tmp, tmp);
}
#endif
// Bit reversal algorithm first reverses the bits of each byte followed by
// a byte level reversal for multi-byte primitive types (short/int/long).
// Algorithm performs a lookup table access to get reverse bit sequence
@ -6450,7 +6349,6 @@ void C2_MacroAssembler::udivmodI(Register rax, Register divisor, Register rdx, R
bind(done);
}
#ifdef _LP64
void C2_MacroAssembler::reverseI(Register dst, Register src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp) {
if(VM_Version::supports_gfni()) {
@ -6614,7 +6512,6 @@ void C2_MacroAssembler::udivmodL(Register rax, Register divisor, Register rdx, R
subq(rdx, tmp); // remainder
bind(done);
}
#endif
void C2_MacroAssembler::rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp,

View File

@ -130,9 +130,7 @@ public:
// Covert B2X
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
#ifdef _LP64
void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
#endif
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
@ -152,10 +150,8 @@ public:
// dst = src1 reduce(op, src2) using vtmp as temps
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void genmask(KRegister dst, Register len, Register temp);
#endif // _LP64
// dst = reduce(op, src2) using vtmp as temps
void reduce_fp(int opcode, int vlen,
@ -202,11 +198,9 @@ public:
void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
// Long Reduction
#ifdef _LP64
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
// Float Reduction
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
@ -237,7 +231,6 @@ public:
void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
public:
#ifdef _LP64
void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
@ -246,14 +239,9 @@ public:
Register tmp, int masklen, BasicType bt, int vec_enc);
void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
#endif
void vector_maskall_operation(KRegister dst, Register src, int mask_len);
#ifndef _LP64
void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len);
#endif
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
@ -313,9 +301,7 @@ public:
void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
#ifdef _LP64
void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
#endif
void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
XMMRegister dst, XMMRegister src1, XMMRegister src2,
@ -390,7 +376,6 @@ public:
void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
#ifdef _LP64
void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
@ -403,13 +388,11 @@ public:
void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
BasicType bt, int vec_enc);
#endif // _LP64
void udivI(Register rax, Register divisor, Register rdx);
void umodI(Register rax, Register divisor, Register rdx);
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
#ifdef _LP64
void reverseI(Register dst, Register src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp);
void reverseL(Register dst, Register src, XMMRegister xtmp1,
@ -417,7 +400,6 @@ public:
void udivL(Register rax, Register divisor, Register rdx);
void umodL(Register rax, Register divisor, Register rdx);
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
#endif
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
bool merge, BasicType bt, int vlen_enc);
@ -511,10 +493,9 @@ public:
Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
Register midx, Register length, int vector_len, int vlen_enc);
#ifdef _LP64
void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
Register offset, Register mask, Register midx, Register rtmp, int vlen_enc);
#endif
void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
Register offset, Register rtmp, int vlen_enc);

View File

@ -59,53 +59,34 @@
static constexpr bool isSimpleConstant64(jlong value) {
// Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
//return value == (int) value; // Cf. storeImmL and immL32.
// Probably always true, even if a temp register is required.
#ifdef _LP64
return true;
#else
return false;
#endif
}
#ifdef _LP64
// No additional cost for CMOVL.
static constexpr int long_cmove_cost() { return 0; }
#else
// Needs 2 CMOV's for longs.
static constexpr int long_cmove_cost() { return 1; }
#endif
#ifdef _LP64
// No CMOVF/CMOVD with SSE2
static int float_cmove_cost() { return ConditionalMoveLimit; }
#else
// No CMOVF/CMOVD with SSE/SSE2
static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
#endif
static bool narrow_oop_use_complex_address() {
NOT_LP64(ShouldNotCallThis();)
assert(UseCompressedOops, "only for compressed oops code");
return (LogMinObjAlignmentInBytes <= 3);
}
static bool narrow_klass_use_complex_address() {
NOT_LP64(ShouldNotCallThis();)
assert(UseCompressedClassPointers, "only for compressed klass code");
return (CompressedKlassPointers::shift() <= 3);
}
// Prefer ConN+DecodeN over ConP.
static bool const_oop_prefer_decode() {
NOT_LP64(ShouldNotCallThis();)
// Prefer ConN+DecodeN over ConP.
return true;
}
// Prefer ConP over ConNKlass+DecodeNKlass.
static bool const_klass_prefer_decode() {
NOT_LP64(ShouldNotCallThis();)
return false;
}
@ -123,24 +104,12 @@
// Are floats converted to double when stored to stack during deoptimization?
// On x64 it is stored without conversion so we can use normal access.
// On x32 it is stored with conversion only when FPU is used for floats.
#ifdef _LP64
static constexpr bool float_in_double() {
return false;
}
#else
static bool float_in_double() {
return (UseSSE == 0);
}
#endif
// Do ints take an entire long register or just half?
#ifdef _LP64
static const bool int_in_long = true;
#else
static const bool int_in_long = false;
#endif
// Does the CPU supports vector variable shift instructions?
static bool supports_vector_variable_shifts(void) {

File diff suppressed because it is too large Load Diff

View File

@ -751,12 +751,11 @@ bool ArchDesc::check_usage() {
callback.do_form_by_name("sRegL");
// special generic vector operands only used in Matcher::pd_specialize_generic_vector_operand
// x86_32 combine x86.ad and x86_32.ad, the vec*/legVec* can not be cleaned from IA32
#if defined(AARCH64)
callback.do_form_by_name("vecA");
callback.do_form_by_name("vecD");
callback.do_form_by_name("vecX");
#elif defined(IA32) || defined(AMD64)
#elif defined(AMD64)
callback.do_form_by_name("vecS");
callback.do_form_by_name("vecD");
callback.do_form_by_name("vecX");