8349582: APX NDD code generation for OpenJDK

Reviewed-by: epeter, jbhateja, sviswanathan
This commit is contained in:
Srinivas Vamsi Parasa 2025-03-24 16:44:26 +00:00
parent 7d1fe0e03f
commit c87e1be052
5 changed files with 3575 additions and 1609 deletions

View File

@ -332,6 +332,21 @@ void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
}
}
void Assembler::emit_arith_ndd(int op1, int op2, Register dst, int32_t imm32) {
assert(isByte(op1) && isByte(op2), "wrong opcode");
assert(op1 == 0x81, "Unexpected opcode");
// This code cache friendly optimization saves 3 bytes per encoding, which offsets the EVEX encoding penalty.
if (is8bit(imm32)) {
emit_int24(op1 | 0x02, // set sign bit
op2 | encode(dst),
imm32 & 0xFF);
}
else {
emit_int16(op1, (op2 | encode(dst)));
emit_int32(imm32);
}
}
// Force generation of a 4 byte immediate value even if it fits into 8bit
void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
assert(isByte(op1) && isByte(op2), "wrong opcode");
@ -1461,7 +1476,7 @@ void Assembler::addl(Register dst, int32_t imm32) {
void Assembler::eaddl(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
(void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xC0, src, imm32);
emit_arith_ndd(0x81, 0xC0, src, imm32);
}
void Assembler::addl(Register dst, Address src) {
@ -1695,7 +1710,7 @@ void Assembler::andl(Register dst, int32_t imm32) {
void Assembler::eandl(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
(void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xE0, src, imm32);
emit_arith_ndd(0x81, 0xE0, src, imm32);
}
void Assembler::andl(Address dst, Register src) {
@ -4532,7 +4547,7 @@ void Assembler::orl(Register dst, int32_t imm32) {
void Assembler::eorl(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xC8, src, imm32);
emit_arith_ndd(0x81, 0xC8, src, imm32);
}
void Assembler::orl(Register dst, Address src) {
@ -7171,7 +7186,7 @@ void Assembler::subl(Register dst, int32_t imm32) {
void Assembler::esubl(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
(void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xE8, src, imm32);
emit_arith_ndd(0x81, 0xE8, src, imm32);
}
// Force generation of a 4 byte immediate value even if it fits into 8bit
@ -7512,7 +7527,7 @@ void Assembler::xorl(Register dst, int32_t imm32) {
void Assembler::exorl(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xF0, src, imm32);
emit_arith_ndd(0x81, 0xF0, src, imm32);
}
void Assembler::xorl(Register dst, Address src) {
@ -15158,7 +15173,7 @@ void Assembler::addq(Register dst, int32_t imm32) {
void Assembler::eaddq(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
(void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xC0, src, imm32);
emit_arith_ndd(0x81, 0xC0, src, imm32);
}
void Assembler::addq(Register dst, Address src) {
@ -15255,7 +15270,7 @@ void Assembler::andq(Register dst, int32_t imm32) {
void Assembler::eandq(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xE0, src, imm32);
emit_arith_ndd(0x81, 0xE0, src, imm32);
}
void Assembler::andq(Register dst, Address src) {
@ -16142,7 +16157,7 @@ void Assembler::orq(Register dst, int32_t imm32) {
void Assembler::eorq(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xC8, src, imm32);
emit_arith_ndd(0x81, 0xC8, src, imm32);
}
void Assembler::orq_imm32(Register dst, int32_t imm32) {
@ -16830,7 +16845,7 @@ void Assembler::subq(Register dst, int32_t imm32) {
void Assembler::esubq(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
(void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xE8, src, imm32);
emit_arith_ndd(0x81, 0xE8, src, imm32);
}
// Force generation of a 4 byte immediate value even if it fits into 8bit
@ -16961,7 +16976,7 @@ void Assembler::xorq(Register dst, int32_t imm32) {
void Assembler::exorq(Register dst, Register src, int32_t imm32, bool no_flags) {
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
emit_arith(0x81, 0xF0, src, imm32);
emit_arith_ndd(0x81, 0xF0, src, imm32);
}
void Assembler::xorq(Address dst, int32_t imm32) {

View File

@ -823,6 +823,7 @@ private:
void emit_arith_b(int op1, int op2, Register dst, int imm8);
void emit_arith(int op1, int op2, Register dst, int32_t imm32);
void emit_arith_ndd(int op1, int op2, Register dst, int32_t imm32);
// Force generation of a 4 byte immediate value even if it fits into 8bit
void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
void emit_arith(int op1, int op2, Register dst, Register src);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -57,7 +57,7 @@ shift_rot_ops = {'sarl', 'sarq', 'sall', 'salq', 'shll', 'shlq', 'shrl', 'shrq',
registers_mapping = {
# skip rax, rsi, rdi, rsp, rbp as they have special encodings
# 'rax': {64: 'rax', 32: 'eax', 16: 'ax', 8: 'al'},
'rax': {64: 'rax', 32: 'eax', 16: 'ax', 8: 'al'},
'rcx': {64: 'rcx', 32: 'ecx', 16: 'cx', 8: 'cl'},
'rdx': {64: 'rdx', 32: 'edx', 16: 'dx', 8: 'dl'},
'rbx': {64: 'rbx', 32: 'ebx', 16: 'bx', 8: 'bl'},
@ -415,7 +415,7 @@ class RegRegRegImmNddInstruction(NFInstruction):
self.imm = Immediate().generate(imm)
self.generate_operands(self.reg1, self.reg2, self.reg3, self.imm)
test_regs = list(registers_mapping.keys())
test_regs = [key for key in registers_mapping.keys() if key != 'rax']
immediates32 = [2 ** i for i in range(0, 32, 4)]
immediates16 = [2 ** i for i in range(0, 16, 2)]
@ -624,6 +624,14 @@ def generate(RegOp, ops, print_lp64_flag=True, full_set=False):
instr = RegOp(*op, reg1=test_reg1, reg2=test_reg2, imm=imm)
print_instruction(instr, lp64_flag, print_lp64_flag)
# additional tests with rax as destination
if RegOp in [RegRegImmNddInstruction]:
test_reg1 = 'rax'
test_reg2 = random.choice(test_regs)
lp64_flag = handle_lp64_flag(lp64_flag, print_lp64_flag, test_reg1, test_reg2)
instr = RegOp(*op, reg1=test_reg1, reg2=test_reg2, imm=imm)
print_instruction(instr, lp64_flag, print_lp64_flag)
elif RegOp in [RegMemImmInstruction, RegMemImmNddInstruction]:
if full_set:
imm_list = get_immediate_list(op_name, width)
@ -1466,4 +1474,4 @@ if __name__ == "__main__":
print("// END Generated code -- do not edit")
for f in ["x86ops.s", "x86ops.o", "x86ops.bin"]:
os.remove(f)
os.remove(f)