mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8354348: Enable Extended EVEX to REX2/REX demotion for commutative operations with same dst and src2
Reviewed-by: jbhateja, epeter, sviswanathan
This commit is contained in:
parent
075ebb4ee5
commit
c41add8d3e
@ -1398,11 +1398,7 @@ void Assembler::addl(Address dst, Register src) {
|
||||
|
||||
void Assembler::eaddl(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x01);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x01, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::addl(Register dst, int32_t imm32) {
|
||||
@ -1432,11 +1428,7 @@ void Assembler::addl(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eaddl(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_arith(0x03, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x03, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::addr_nop_4() {
|
||||
@ -1657,17 +1649,18 @@ void Assembler::eandl(Register dst, Register src1, Address src2, bool no_flags)
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, no_flags);
|
||||
}
|
||||
|
||||
void Assembler::eandl(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x21, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::andl(Register dst, Register src) {
|
||||
(void) prefix_and_encode(dst->encoding(), src->encoding());
|
||||
emit_arith(0x23, 0xC0, dst, src);
|
||||
}
|
||||
|
||||
void Assembler::eandl(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_arith(0x23, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::andnl(Register dst, Register src1, Register src2) {
|
||||
@ -2519,7 +2512,7 @@ void Assembler::imull(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eimull(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */);
|
||||
emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::imull(Register dst, Address src, int32_t value) {
|
||||
@ -4419,11 +4412,7 @@ void Assembler::enotl(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eorw(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_arith(0x0B, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_16bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::orl(Address dst, int32_t imm32) {
|
||||
@ -4467,11 +4456,7 @@ void Assembler::orl(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eorl(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_arith(0x0B, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::orl(Address dst, Register src) {
|
||||
@ -4483,11 +4468,7 @@ void Assembler::orl(Address dst, Register src) {
|
||||
|
||||
void Assembler::eorl(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x09);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x09, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::orb(Address dst, int imm8) {
|
||||
@ -4517,11 +4498,7 @@ void Assembler::orb(Address dst, Register src) {
|
||||
|
||||
void Assembler::eorb(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x08);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x08, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
|
||||
@ -7323,11 +7300,7 @@ void Assembler::xorl(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::exorl(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_arith(0x33, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x33, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::xorl(Address dst, Register src) {
|
||||
@ -7339,11 +7312,7 @@ void Assembler::xorl(Address dst, Register src) {
|
||||
|
||||
void Assembler::exorl(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x31);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x31, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::xorb(Register dst, Address src) {
|
||||
@ -7367,11 +7336,7 @@ void Assembler::xorb(Address dst, Register src) {
|
||||
|
||||
void Assembler::exorb(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x30);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x30, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::xorw(Register dst, Address src) {
|
||||
@ -12955,6 +12920,31 @@ void Assembler::eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimd
|
||||
vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ true, no_flags);
|
||||
}
|
||||
|
||||
void Assembler::emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1, bool is_commutative) {
|
||||
if (is_commutative && is_demotable(no_flags, dst->encoding(), src2->encoding())) {
|
||||
// Opcode byte adjustment due to mismatch between NDD and equivalent demotable variant
|
||||
opcode_byte += 2;
|
||||
if (size == EVEX_64bit) {
|
||||
emit_prefix_and_int8(get_prefixq(src1, dst, is_map1), opcode_byte);
|
||||
} else {
|
||||
// For 32-bit, 16-bit and 8-bit
|
||||
if (size == EVEX_16bit) {
|
||||
emit_int8(0x66);
|
||||
}
|
||||
prefix(src1, dst, false, is_map1);
|
||||
emit_int8(opcode_byte);
|
||||
}
|
||||
} else {
|
||||
bool vex_w = (size == EVEX_64bit) ? true : false;
|
||||
InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), pre, opc, &attributes, no_flags);
|
||||
emit_int8(opcode_byte);
|
||||
}
|
||||
emit_operand(src2, src1, 0);
|
||||
}
|
||||
|
||||
void Assembler::emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1) {
|
||||
if (is_demotable(no_flags, dst->encoding(), src1->encoding())) {
|
||||
@ -13055,18 +13045,20 @@ void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8
|
||||
}
|
||||
|
||||
void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1, bool swap) {
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1, bool swap, bool is_commutative) {
|
||||
int encode;
|
||||
bool is_prefixq = (size == EVEX_64bit) ? true : false;
|
||||
if (is_demotable(no_flags, dst_enc, nds_enc)) {
|
||||
bool first_operand_demotable = is_demotable(no_flags, dst_enc, nds_enc);
|
||||
bool second_operand_demotable = is_commutative && is_demotable(no_flags, dst_enc, src_enc);
|
||||
if (first_operand_demotable || second_operand_demotable) {
|
||||
if (size == EVEX_16bit) {
|
||||
emit_int8(0x66);
|
||||
}
|
||||
|
||||
int src = first_operand_demotable ? src_enc : nds_enc;
|
||||
if (swap) {
|
||||
encode = is_prefixq ? prefixq_and_encode(dst_enc, src_enc, is_map1) : prefix_and_encode(dst_enc, src_enc, is_map1);
|
||||
encode = is_prefixq ? prefixq_and_encode(dst_enc, src, is_map1) : prefix_and_encode(dst_enc, src, is_map1);
|
||||
} else {
|
||||
encode = is_prefixq ? prefixq_and_encode(src_enc, dst_enc, is_map1) : prefix_and_encode(src_enc, dst_enc, is_map1);
|
||||
encode = is_prefixq ? prefixq_and_encode(src, dst_enc, is_map1) : prefix_and_encode(src, dst_enc, is_map1);
|
||||
}
|
||||
emit_opcode_prefix_and_encoding((unsigned char)opcode_byte, 0xC0, encode);
|
||||
} else {
|
||||
@ -13114,6 +13106,26 @@ int Assembler::eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc,
|
||||
return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);
|
||||
}
|
||||
|
||||
void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int op1, int op2, bool no_flags, bool is_commutative) {
|
||||
bool demotable = is_demotable(no_flags, dst->encoding(), src1->encoding());
|
||||
if (!demotable && is_commutative) {
|
||||
if (is_demotable(no_flags, dst->encoding(), src2->encoding())) {
|
||||
// swap src1 and src2
|
||||
Register tmp = src1;
|
||||
src1 = src2;
|
||||
src2 = tmp;
|
||||
}
|
||||
}
|
||||
bool vex_w = (size == EVEX_64bit) ? true : false;
|
||||
bool use_prefixq = vex_w;
|
||||
InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), pre, opc, &attributes, no_flags, use_prefixq);
|
||||
emit_arith(op1, op2, src1, src2);
|
||||
}
|
||||
|
||||
void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int op1, int op2, bool no_flags) {
|
||||
int dst_enc = dst->encoding();
|
||||
@ -13124,7 +13136,6 @@ void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds
|
||||
} else {
|
||||
bool vex_w = (size == EVEX_64bit) ? true : false;
|
||||
InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
//attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix_and_encode(0, dst_enc, nds_enc, pre, opc, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags);
|
||||
|
||||
@ -14623,11 +14634,7 @@ void Assembler::addq(Address dst, Register src) {
|
||||
|
||||
void Assembler::eaddq(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x01);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x01, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::addq(Register dst, int32_t imm32) {
|
||||
@ -14656,11 +14663,7 @@ void Assembler::addq(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eaddq(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
|
||||
emit_arith(0x03, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x03, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::adcxq(Register dst, Register src) {
|
||||
@ -14753,11 +14756,7 @@ void Assembler::andq(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eandq(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
|
||||
emit_arith(0x23, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x23, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::andq(Address dst, Register src) {
|
||||
@ -14768,11 +14767,7 @@ void Assembler::andq(Address dst, Register src) {
|
||||
|
||||
void Assembler::eandq(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x21);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x21, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::andnq(Register dst, Register src1, Register src2) {
|
||||
@ -15118,7 +15113,7 @@ void Assembler::eimulq(Register dst, Register src, bool no_flags) {
|
||||
}
|
||||
|
||||
void Assembler::eimulq(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */);
|
||||
emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::imulq(Register src) {
|
||||
@ -15580,11 +15575,7 @@ void Assembler::orq(Address dst, Register src) {
|
||||
|
||||
void Assembler::eorq(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x09);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x09, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::orq(Register dst, int32_t imm32) {
|
||||
@ -15624,13 +15615,8 @@ void Assembler::orq(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::eorq(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
|
||||
emit_arith(0x0B, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::popcntq(Register dst, Address src) {
|
||||
assert(VM_Version::supports_popcnt(), "must support");
|
||||
InstructionMark im(this);
|
||||
@ -16372,11 +16358,7 @@ void Assembler::xorq(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void Assembler::exorq(Register dst, Register src1, Register src2, bool no_flags) {
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
// NDD shares its encoding bits with NDS bits for regular EVEX instruction.
|
||||
// Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
|
||||
(void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
|
||||
emit_arith(0x33, 0xC0, src1, src2);
|
||||
emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x33, 0xC0, no_flags, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void Assembler::xorq(Register dst, Address src) {
|
||||
@ -16430,11 +16412,7 @@ void Assembler::esetzucc(Condition cc, Register dst) {
|
||||
|
||||
void Assembler::exorq(Register dst, Address src1, Register src2, bool no_flags) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
|
||||
eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
|
||||
emit_int8(0x31);
|
||||
emit_operand(src2, src1, 0);
|
||||
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x31, no_flags, false /* is_map1 */, true /* is_commutative */);
|
||||
}
|
||||
|
||||
void InstructionAttr::set_address_attributes(int tuple_type, int input_size_in_bits) {
|
||||
|
||||
@ -807,14 +807,20 @@ private:
|
||||
int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc,
|
||||
InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);
|
||||
|
||||
void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int op1, int op2, bool no_flags = false, bool is_commutative = false);
|
||||
|
||||
void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int op1, int op2, bool no_flags);
|
||||
|
||||
void emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags = false, bool is_map1 = false);
|
||||
|
||||
void emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags = false, bool is_map1 = false, bool is_commutative = false);
|
||||
|
||||
void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false);
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false, bool is_commutative = false);
|
||||
|
||||
void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc,
|
||||
int size, int opcode_byte, bool no_flags, bool is_map1 = false);
|
||||
@ -1149,6 +1155,7 @@ private:
|
||||
void eandl(Register dst, Register src, int32_t imm32, bool no_flags);
|
||||
void andl(Register dst, Address src);
|
||||
void eandl(Register dst, Register src1, Address src2, bool no_flags);
|
||||
void eandl(Register dst, Address src1, Register src2, bool no_flags);
|
||||
void andl(Register dst, Register src);
|
||||
void eandl(Register dst, Register src1, Register src2, bool no_flags);
|
||||
void andl(Address dst, Register src);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -92,6 +92,8 @@ registers_mapping = {
|
||||
'r31': {64: 'r31', 32: 'r31d', 16: 'r31w', 8: 'r31b'},
|
||||
}
|
||||
|
||||
commutative_instrs = ['imul', 'add', 'and', 'xor', 'or']
|
||||
|
||||
class Operand(object):
|
||||
def generate(self):
|
||||
return self
|
||||
@ -400,6 +402,15 @@ class RegMemRegNddInstruction(NFInstruction):
|
||||
self.mem = Address().generate(mem_base, mem_idx, width)
|
||||
self.reg2 = Register().generate(reg2, width)
|
||||
self.generate_operands(self.reg1, self.mem, self.reg2)
|
||||
self.demote = True
|
||||
|
||||
def astr(self):
|
||||
if self.demote:
|
||||
ops = [op.cstr() for op in self.operands]
|
||||
# imul does not support RegMemReg
|
||||
if self._aname in commutative_instrs[1:] and ops[0] == ops[2] and (not self.no_flag):
|
||||
return f'{self._aname} ' + ', '.join([op.astr() for op in self.operands[:2]])
|
||||
return super().astr()
|
||||
|
||||
class RegRegImmNddInstruction(NFInstruction):
|
||||
def __init__(self, name, aname, width, no_flag, reg1, reg2, imm):
|
||||
@ -448,6 +459,9 @@ class RegRegRegNddInstruction(NFInstruction):
|
||||
ops = [op.cstr() for op in self.operands]
|
||||
if ops[0] == ops[1] and (not self.no_flag):
|
||||
return hdr + f'{self._aname} ' + ', '.join([op.astr() for op in self.operands[1:]])
|
||||
if self._aname in commutative_instrs and ops[0] == ops[2] and (not self.no_flag):
|
||||
return hdr + f'{self._aname} ' + ', '.join([op.astr() for op in self.operands[:2]])
|
||||
|
||||
return hdr + super().astr()
|
||||
|
||||
class RegRegRegImmNddInstruction(NFInstruction):
|
||||
@ -574,6 +588,18 @@ def generate(RegOp, ops, print_lp64_flag=True, full_set=False):
|
||||
lp64_flag = handle_lp64_flag(lp64_flag, print_lp64_flag, test_reg1, test_reg2, test_reg3)
|
||||
instr = RegOp(*op, reg1=test_reg1, reg2=test_reg2, reg3=test_reg3)
|
||||
print_instruction(instr, lp64_flag, print_lp64_flag)
|
||||
|
||||
demote = True if TEST_DEMOTION else False
|
||||
commute = True if op[1] in commutative_instrs else False
|
||||
if RegOp in [RegRegRegNddInstruction] and demote and commute :
|
||||
for i in range(len(test_regs) if full_set else 1):
|
||||
test_reg1 = test_regs[i] if full_set else random.choice(test_regs)
|
||||
test_reg2 = test_regs[(i + 2) % len(test_regs)] if full_set else random.choice(test_regs)
|
||||
test_reg3 = test_reg1
|
||||
|
||||
lp64_flag = handle_lp64_flag(lp64_flag, print_lp64_flag, test_reg1, test_reg2, test_reg3)
|
||||
instr = RegOp(*op, reg1=test_reg1, reg2=test_reg2, reg3=test_reg3)
|
||||
print_instruction(instr, lp64_flag, print_lp64_flag)
|
||||
|
||||
elif RegOp in [MemRegInstruction, RegMemInstruction, MoveRegMemInstruction, CmpxchgInstruction, CondRegMemInstruction, RegMemNddInstruction]:
|
||||
if full_set:
|
||||
@ -699,7 +725,7 @@ def generate(RegOp, ops, print_lp64_flag=True, full_set=False):
|
||||
print_instruction(instr, lp64_flag, print_lp64_flag)
|
||||
|
||||
elif RegOp in [RegMemRegNddInstruction, RegRegMemNddInstruction, CondRegRegMemInstruction]:
|
||||
demote_options = [False] if TEST_DEMOTION and RegOp not in [RegMemRegNddInstruction] else [False, True]
|
||||
demote_options = [False, True]
|
||||
for demote in demote_options:
|
||||
for i in range(len(test_regs) if full_set else 1):
|
||||
test_reg1 = test_regs[i] if full_set else random.choice(test_regs)
|
||||
@ -1023,6 +1049,8 @@ instruction_set = {
|
||||
RegMemRegNddInstruction: [
|
||||
('eaddl', 'add', 32, False),
|
||||
('eaddl', 'add', 32, True),
|
||||
('eandl', 'and', 32, False),
|
||||
('eandl', 'and', 32, True),
|
||||
('eorl', 'or', 32, False),
|
||||
('eorl', 'or', 32, True),
|
||||
('eorb', 'or', 8, False),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user