8354348: Enable Extended EVEX to REX2/REX demotion for commutative operations with same dst and src2

Reviewed-by: jbhateja, epeter, sviswanathan
2026-03-14 18:03:44 +00:00 · 2025-09-16 18:13:34 +00:00 · 2025-09-16 18:13:34 +00:00 · c41add8d3e
commit c41add8d3e
parent 075ebb4ee5
4 changed files with 2916 additions and 2567 deletions
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@ -1398,11 +1398,7 @@ void Assembler::addl(Address dst, Register src) {

 void Assembler::eaddl(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x01);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x01, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::addl(Register dst, int32_t imm32) {
@ -1432,11 +1428,7 @@ void Assembler::addl(Register dst, Register src) {
 }

 void Assembler::eaddl(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_arith(0x03, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x03, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::addr_nop_4() {
@ -1657,17 +1649,18 @@ void Assembler::eandl(Register dst, Register src1, Address src2, bool no_flags)
  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, no_flags);
 }

+void Assembler::eandl(Register dst, Address src1, Register src2, bool no_flags) {
+  InstructionMark im(this);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x21, no_flags, false /* is_map1 */, true /* is_commutative */);
+}
+
 void Assembler::andl(Register dst, Register src) {
  (void) prefix_and_encode(dst->encoding(), src->encoding());
  emit_arith(0x23, 0xC0, dst, src);
 }

 void Assembler::eandl(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_arith(0x23, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::andnl(Register dst, Register src1, Register src2) {
@ -2519,7 +2512,7 @@ void Assembler::imull(Register dst, Register src) {
 }

 void Assembler::eimull(Register dst, Register src1, Register src2, bool no_flags) {
-  emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */,  true /* swap */);
+  emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */,  true /* swap */, true /* is_commutative */);
 }

 void Assembler::imull(Register dst, Address src, int32_t value) {
@ -4419,11 +4412,7 @@ void Assembler::enotl(Register dst, Register src) {
 }

 void Assembler::eorw(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_arith(0x0B, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_16bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::orl(Address dst, int32_t imm32) {
@ -4467,11 +4456,7 @@ void Assembler::orl(Register dst, Register src) {
 }

 void Assembler::eorl(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_arith(0x0B, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::orl(Address dst, Register src) {
@ -4483,11 +4468,7 @@ void Assembler::orl(Address dst, Register src) {

 void Assembler::eorl(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x09);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x09, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::orb(Address dst, int imm8) {
@ -4517,11 +4498,7 @@ void Assembler::orb(Address dst, Register src) {

 void Assembler::eorb(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x08);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x08, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
@ -7323,11 +7300,7 @@ void Assembler::xorl(Register dst, Register src) {
 }

 void Assembler::exorl(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_arith(0x33, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x33, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::xorl(Address dst, Register src) {
@ -7339,11 +7312,7 @@ void Assembler::xorl(Address dst, Register src) {

 void Assembler::exorl(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x31);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x31, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::xorb(Register dst, Address src) {
@ -7367,11 +7336,7 @@ void Assembler::xorb(Address dst, Register src) {

 void Assembler::exorb(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x30);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x30, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::xorw(Register dst, Address src) {
@ -12955,6 +12920,31 @@ void Assembler::eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimd
  vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ true, no_flags);
 }

+void Assembler::emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+                                     int size, int opcode_byte, bool no_flags, bool is_map1, bool is_commutative) {
+  if (is_commutative && is_demotable(no_flags, dst->encoding(), src2->encoding())) {
+    // Opcode byte adjustment due to mismatch between NDD and equivalent demotable variant
+    opcode_byte += 2;
+    if (size == EVEX_64bit) {
+      emit_prefix_and_int8(get_prefixq(src1, dst, is_map1), opcode_byte);
+    } else {
+      // For 32-bit, 16-bit and 8-bit
+      if (size == EVEX_16bit) {
+        emit_int8(0x66);
+      }
+      prefix(src1, dst, false, is_map1);
+      emit_int8(opcode_byte);
+    }
+  } else {
+    bool vex_w = (size == EVEX_64bit) ? true : false;
+    InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+    attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size);
+    eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), pre, opc, &attributes, no_flags);
+    emit_int8(opcode_byte);
+  }
+  emit_operand(src2, src1, 0);
+}
+
 void Assembler::emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
                                     int size, int opcode_byte, bool no_flags, bool is_map1) {
  if (is_demotable(no_flags, dst->encoding(), src1->encoding())) {
@ -13055,18 +13045,20 @@ void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8
 }

 void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
-                                     int size, int opcode_byte, bool no_flags, bool is_map1, bool swap) {
+                                     int size, int opcode_byte, bool no_flags, bool is_map1, bool swap, bool is_commutative) {
  int encode;
  bool is_prefixq = (size == EVEX_64bit) ? true : false;
-  if (is_demotable(no_flags, dst_enc, nds_enc)) {
+  bool first_operand_demotable = is_demotable(no_flags, dst_enc, nds_enc);
+  bool second_operand_demotable = is_commutative && is_demotable(no_flags, dst_enc, src_enc);
+  if (first_operand_demotable || second_operand_demotable) {
    if (size == EVEX_16bit) {
      emit_int8(0x66);
    }
-
+    int src = first_operand_demotable ? src_enc : nds_enc;
    if (swap) {
-      encode = is_prefixq ? prefixq_and_encode(dst_enc, src_enc, is_map1) : prefix_and_encode(dst_enc, src_enc, is_map1);
+      encode = is_prefixq ? prefixq_and_encode(dst_enc, src, is_map1) : prefix_and_encode(dst_enc, src, is_map1);
    } else {
-      encode = is_prefixq ? prefixq_and_encode(src_enc, dst_enc, is_map1) : prefix_and_encode(src_enc, dst_enc, is_map1);
+      encode = is_prefixq ? prefixq_and_encode(src, dst_enc, is_map1) : prefix_and_encode(src, dst_enc, is_map1);
    }
    emit_opcode_prefix_and_encoding((unsigned char)opcode_byte, 0xC0, encode);
  } else {
@ -13114,6 +13106,26 @@ int Assembler::eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc,
  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);
 }

+void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+                                                      int size, int op1, int op2, bool no_flags, bool is_commutative) {
+  bool demotable = is_demotable(no_flags, dst->encoding(), src1->encoding());
+  if (!demotable && is_commutative) {
+    if (is_demotable(no_flags, dst->encoding(), src2->encoding())) {
+      // swap src1 and src2
+      Register tmp = src1;
+      src1 = src2;
+      src2 = tmp;
+    }
+  }
+  bool vex_w = (size == EVEX_64bit) ? true : false;
+  bool use_prefixq = vex_w;
+  InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
+  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
+  (void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), pre, opc, &attributes, no_flags, use_prefixq);
+  emit_arith(op1, op2, src1, src2);
+}
+
 void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
                                                      int size, int op1, int op2, bool no_flags) {
  int dst_enc = dst->encoding();
@ -13124,7 +13136,6 @@ void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds
  } else {
    bool vex_w = (size == EVEX_64bit) ? true : false;
    InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-    //attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size);
    attributes.set_is_evex_instruction();
    vex_prefix_and_encode(0, dst_enc, nds_enc, pre, opc, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags);

@ -14623,11 +14634,7 @@ void Assembler::addq(Address dst, Register src) {

 void Assembler::eaddq(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x01);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x01, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::addq(Register dst, int32_t imm32) {
@ -14656,11 +14663,7 @@ void Assembler::addq(Register dst, Register src) {
 }

 void Assembler::eaddq(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
-  emit_arith(0x03, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x03, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::adcxq(Register dst, Register src) {
@ -14753,11 +14756,7 @@ void Assembler::andq(Register dst, Register src) {
 }

 void Assembler::eandq(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
-  emit_arith(0x23, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x23, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::andq(Address dst, Register src) {
@ -14768,11 +14767,7 @@ void Assembler::andq(Address dst, Register src) {

 void Assembler::eandq(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x21);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x21, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::andnq(Register dst, Register src1, Register src2) {
@ -15118,7 +15113,7 @@ void Assembler::eimulq(Register dst, Register src, bool no_flags) {
 }

 void Assembler::eimulq(Register dst, Register src1, Register src2, bool no_flags) {
-  emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */,  true /* swap */);
+  emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */,  true /* swap */, true /* is_commutative */);
 }

 void Assembler::imulq(Register src) {
@ -15580,11 +15575,7 @@ void Assembler::orq(Address dst, Register src) {

 void Assembler::eorq(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x09);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x09, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void Assembler::orq(Register dst, int32_t imm32) {
@ -15624,13 +15615,8 @@ void Assembler::orq(Register dst, Register src) {
 }

 void Assembler::eorq(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
-  emit_arith(0x0B, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
 }
-
 void Assembler::popcntq(Register dst, Address src) {
  assert(VM_Version::supports_popcnt(), "must support");
  InstructionMark im(this);
@ -16372,11 +16358,7 @@ void Assembler::xorq(Register dst, Register src) {
 }

 void Assembler::exorq(Register dst, Register src1, Register src2, bool no_flags) {
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
-  // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
-  (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
-  emit_arith(0x33, 0xC0, src1, src2);
+  emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x33, 0xC0, no_flags, true /* is_commutative */);
 }

 void Assembler::xorq(Register dst, Address src) {
@ -16430,11 +16412,7 @@ void Assembler::esetzucc(Condition cc, Register dst) {

 void Assembler::exorq(Register dst, Address src1, Register src2, bool no_flags) {
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
-  eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
-  emit_int8(0x31);
-  emit_operand(src2, src1, 0);
+  emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x31, no_flags, false /* is_map1 */, true /* is_commutative */);
 }

 void InstructionAttr::set_address_attributes(int tuple_type, int input_size_in_bits) {
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@ -807,14 +807,20 @@ private:
  int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc,
                                      InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);

+  void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+                                             int size, int op1, int op2, bool no_flags = false, bool is_commutative = false);
+
  void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
                                             int size, int op1, int op2, bool no_flags);

  void emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
                            int size, int opcode_byte, bool no_flags = false, bool is_map1 = false);

+  void emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+                            int size, int opcode_byte, bool no_flags = false, bool is_map1 = false, bool is_commutative = false);
+
  void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
-                            int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false);
+                            int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false, bool is_commutative = false);

  void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc,
                            int size, int opcode_byte, bool no_flags, bool is_map1 = false);
@ -1149,6 +1155,7 @@ private:
  void eandl(Register dst, Register src, int32_t imm32, bool no_flags);
  void andl(Register dst, Address src);
  void eandl(Register dst, Register src1, Address src2, bool no_flags);
+  void eandl(Register dst, Address src1, Register src2, bool no_flags);
  void andl(Register dst, Register src);
  void eandl(Register dst, Register src1, Register src2, bool no_flags);
  void andl(Address dst, Register src);
--- a/test/hotspot/gtest/x86/asmtest.out.h
+++ b/test/hotspot/gtest/x86/asmtest.out.h
--- a/test/hotspot/gtest/x86/x86-asmtest.py
+++ b/test/hotspot/gtest/x86/x86-asmtest.py
@ -92,6 +92,8 @@ registers_mapping = {
    'r31': {64: 'r31', 32: 'r31d', 16: 'r31w', 8: 'r31b'},
 }

+commutative_instrs = ['imul', 'add', 'and', 'xor', 'or']
+
 class Operand(object):
    def generate(self):
        return self
@ -400,6 +402,15 @@ class RegMemRegNddInstruction(NFInstruction):
        self.mem = Address().generate(mem_base, mem_idx, width)
        self.reg2 = Register().generate(reg2, width)
        self.generate_operands(self.reg1, self.mem, self.reg2)
+        self.demote = True
+    
+    def astr(self):
+        if self.demote:
+            ops = [op.cstr() for op in self.operands]
+            # imul does not support RegMemReg
+            if self._aname in commutative_instrs[1:] and ops[0] == ops[2] and (not self.no_flag):
+                return  f'{self._aname} ' + ', '.join([op.astr() for op in self.operands[:2]])
+        return super().astr()

 class RegRegImmNddInstruction(NFInstruction):
    def __init__(self, name, aname, width, no_flag, reg1, reg2, imm):
@ -448,6 +459,9 @@ class RegRegRegNddInstruction(NFInstruction):
            ops = [op.cstr() for op in self.operands]
            if ops[0] == ops[1] and (not self.no_flag):
                return  hdr + f'{self._aname} ' + ', '.join([op.astr() for op in self.operands[1:]])
+            if self._aname in commutative_instrs and ops[0] == ops[2] and (not self.no_flag):
+                return  hdr + f'{self._aname} ' + ', '.join([op.astr() for op in self.operands[:2]])
+            
        return hdr + super().astr()

 class RegRegRegImmNddInstruction(NFInstruction):
@ -574,6 +588,18 @@ def generate(RegOp, ops, print_lp64_flag=True, full_set=False):
                    lp64_flag = handle_lp64_flag(lp64_flag, print_lp64_flag, test_reg1, test_reg2, test_reg3)
                    instr = RegOp(*op, reg1=test_reg1, reg2=test_reg2, reg3=test_reg3)
                    print_instruction(instr, lp64_flag, print_lp64_flag)
+            
+            demote = True if TEST_DEMOTION else False
+            commute = True if op[1] in commutative_instrs else False
+            if RegOp in [RegRegRegNddInstruction] and demote and commute :
+                for i in range(len(test_regs) if full_set else 1):
+                    test_reg1 = test_regs[i] if full_set else random.choice(test_regs)
+                    test_reg2 = test_regs[(i + 2) % len(test_regs)] if full_set else random.choice(test_regs)
+                    test_reg3 = test_reg1
+                    
+                    lp64_flag = handle_lp64_flag(lp64_flag, print_lp64_flag, test_reg1, test_reg2, test_reg3)
+                    instr = RegOp(*op, reg1=test_reg1, reg2=test_reg2, reg3=test_reg3)
+                    print_instruction(instr, lp64_flag, print_lp64_flag)

        elif RegOp in [MemRegInstruction, RegMemInstruction, MoveRegMemInstruction, CmpxchgInstruction, CondRegMemInstruction, RegMemNddInstruction]:
            if full_set:
@ -699,7 +725,7 @@ def generate(RegOp, ops, print_lp64_flag=True, full_set=False):
                print_instruction(instr, lp64_flag, print_lp64_flag)

        elif RegOp in [RegMemRegNddInstruction, RegRegMemNddInstruction, CondRegRegMemInstruction]:
-            demote_options = [False] if TEST_DEMOTION and RegOp not in [RegMemRegNddInstruction] else [False, True]
+            demote_options = [False, True]
            for demote in demote_options:
                for i in range(len(test_regs) if full_set else 1):
                    test_reg1 = test_regs[i] if full_set else random.choice(test_regs)
@ -1023,6 +1049,8 @@ instruction_set = {
    RegMemRegNddInstruction: [
        ('eaddl', 'add', 32, False),
        ('eaddl', 'add', 32, True),
+        ('eandl', 'and', 32, False),
+        ('eandl', 'and', 32, True),
        ('eorl', 'or', 32, False),
        ('eorl', 'or', 32, True),
        ('eorb', 'or', 8, False),