8259822: [PPC64] Support the prefixed instruction format added in POWER10

Reviewed-by: cashford, mdoerr
2026-05-12 22:50:08 +00:00 · 2021-04-09 09:07:37 +00:00 · 2021-04-09 09:07:37 +00:00 · f7a6c63ad3
commit f7a6c63ad3
parent a45733f840
3 changed files with 244 additions and 4 deletions
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2020 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2021 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -221,6 +221,12 @@ class Assembler : public AbstractAssembler {
    SPR_0_4_SHIFT  = 16u, // SPR_0_4 field in bits 16 -- 20
    RS_SHIFT       = 21u, // RS field in bits 21 -- 25
    OPCODE_SHIFT   = 26u, // opcode in bits 26 -- 31
+
+    // Shift counts in prefix word
+    PRE_TYPE_SHIFT = 24u, // Prefix type in bits 24 -- 25
+    PRE_ST1_SHIFT  = 23u, // ST1 field in bits 23 -- 23
+    PRE_R_SHIFT    = 20u, // R-bit in bits 20 -- 20
+    PRE_ST4_SHIFT  = 20u, // ST4 field in bits 23 -- 20
  };

  enum opcdxos_masks {
@ -797,6 +803,28 @@ class Assembler : public AbstractAssembler {

  };

+  enum opcdeos_mask {
+    // Mask for prefix primary opcode field
+    PREFIX_OPCODE_MASK        = (63u << OPCODE_SHIFT),
+    // Mask for prefix opcode and type fields
+    PREFIX_OPCODE_TYPE_MASK   = (63u << OPCODE_SHIFT) | (3u << PRE_TYPE_SHIFT),
+    // Masks for type 00/10 and type 01/11, including opcode, type, and st fieds
+    PREFIX_OPCODE_TYPEx0_MASK = PREFIX_OPCODE_TYPE_MASK | ( 1u << PRE_ST1_SHIFT),
+    PREFIX_OPCODE_TYPEx1_MASK = PREFIX_OPCODE_TYPE_MASK | (15u << PRE_ST4_SHIFT),
+
+    // Masks for each instructions
+    PADDI_PREFIX_OPCODE_MASK  = PREFIX_OPCODE_TYPEx0_MASK,
+    PADDI_SUFFIX_OPCODE_MASK  = ADDI_OPCODE_MASK,
+  };
+
+  enum opcdeos {
+    PREFIX_PRIMARY_OPCODE = (1u << OPCODE_SHIFT),
+
+    // Prefixed addi/li
+    PADDI_PREFIX_OPCODE   = PREFIX_PRIMARY_OPCODE | (2u << PRE_TYPE_SHIFT),
+    PADDI_SUFFIX_OPCODE   = ADDI_OPCODE,
+  };
+
  // Trap instructions TO bits
  enum trap_to_bits {
    // single bits
@ -1082,6 +1110,20 @@ class Assembler : public AbstractAssembler {
  static int inv_bo_field(int x)  { return inv_opp_u_field(x, 10,  6); }
  static int inv_bi_field(int x)  { return inv_opp_u_field(x, 15, 11); }

+  // For extended opcodes (prefixed instructions) introduced with Power 10
+  static long inv_r_eo(   int x)  { return  inv_opp_u_field(x, 11, 11); }
+  static long inv_type(   int x)  { return  inv_opp_u_field(x,  7,  6); }
+  static long inv_st_x0(  int x)  { return  inv_opp_u_field(x,  8,  8); }
+  static long inv_st_x1(  int x)  { return  inv_opp_u_field(x, 11,  8); }
+
+  //  - 8LS:D/MLS:D Formats
+  static long inv_d0_eo( long x)  { return  inv_opp_u_field(x, 31, 14); }
+
+  //  - 8RR:XX4/8RR:D Formats
+  static long inv_imm0_eo(int x)  { return  inv_opp_u_field(x, 31, 16); }
+  static long inv_uimm_eo(int x)  { return  inv_opp_u_field(x, 31, 29); }
+  static long inv_imm_eo( int x)  { return  inv_opp_u_field(x, 31, 24); }
+
  #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
  #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))

@ -1203,6 +1245,24 @@ class Assembler : public AbstractAssembler {
  static int vcmp_rc(   int        x)  { return  opp_u_field(x,             21, 21); } // for vcmp* instructions
  static int xxsplt_uim(int        x)  { return  opp_u_field(x,             15, 14); } // for xxsplt* instructions

+  // For extended opcodes (prefixed instructions) introduced with Power 10
+  static long r_eo(     int        x)  { return  opp_u_field(x,             11, 11); }
+  static long type(     int        x)  { return  opp_u_field(x,              7,  6); }
+  static long st_x0(    int        x)  { return  opp_u_field(x,              8,  8); }
+  static long st_x1(    int        x)  { return  opp_u_field(x,             11,  8); }
+
+  //  - 8LS:D/MLS:D Formats
+  static long d0_eo(    long       x)  { return  opp_u_field((x >> 16) & 0x3FFFF, 31, 14); }
+  static long d1_eo(    long       x)  { return  opp_u_field(x & 0xFFFF,    31, 16); }
+  static long s0_eo(    long       x)  { return  d0_eo(x); }
+  static long s1_eo(    long       x)  { return  d1_eo(x); }
+
+  //  - 8RR:XX4/8RR:D Formats
+  static long imm0_eo(  int        x)  { return  opp_u_field(x >> 16,       31, 16); }
+  static long imm1_eo(  int        x)  { return  opp_u_field(x & 0xFFFF,    31, 16); }
+  static long uimm_eo(  int        x)  { return  opp_u_field(x,             31, 29); }
+  static long imm_eo(   int        x)  { return  opp_u_field(x,             31, 24); }
+
  //static int xo1(     int        x)  { return  opp_u_field(x,             29, 21); }// is contained in our opcodes
  //static int xo2(     int        x)  { return  opp_u_field(x,             30, 21); }// is contained in our opcodes
  //static int xo3(     int        x)  { return  opp_u_field(x,             30, 22); }// is contained in our opcodes
@ -1302,9 +1362,15 @@ class Assembler : public AbstractAssembler {
  // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
  inline void addi( Register d, Register a, int si16);
  inline void addis(Register d, Register a, int si16);
+
+  // Prefixed add immediate, introduced by POWER10
+  inline void paddi(Register d, Register a, long si34, bool r);
+  inline void pli(  Register d, long si34);
+
 private:
  inline void addi_r0ok( Register d, Register a, int si16);
  inline void addis_r0ok(Register d, Register a, int si16);
+  inline void paddi_r0ok(Register d, Register a, long si34, bool r);
 public:
  inline void addic_( Register d, Register a, int si16);
  inline void subfic( Register d, Register a, int si16);
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
@ -131,6 +131,17 @@ inline void Assembler::divw_(  Register d, Register a, Register b) { emit_int32(
 inline void Assembler::divwu(  Register d, Register a, Register b) { emit_int32(DIVWU_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::divwu_( Register d, Register a, Register b) { emit_int32(DIVWU_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }

+// Prefixed instructions, introduced by POWER10
+inline void Assembler::paddi(Register d, Register a, long si34, bool r = false) {
+  assert(a != R0 || r, "r0 not allowed, unless R is set (CIA relative)");
+  paddi_r0ok( d, a, si34, r);
+}
+
+inline void Assembler::paddi_r0ok(Register d, Register a, long si34, bool r = false) {
+  emit_int32(PADDI_PREFIX_OPCODE | r_eo(r) | d0_eo(si34));
+  emit_int32(PADDI_SUFFIX_OPCODE | rt(d)   | ra(a)   | d1_eo(si34));
+}
+
 // Fixed-Point Arithmetic Instructions with Overflow detection
 inline void Assembler::addo(    Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
 inline void Assembler::addo_(   Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
@ -169,6 +180,9 @@ inline void Assembler::lis(  Register d, int si16)             { Assembler::addi
 inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
 inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }

+// Prefixed instructions, introduced by POWER10
+inline void Assembler::pli(Register d, long si34) { Assembler::paddi_r0ok( d, R0, si34, false); }
+
 // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
 inline void Assembler::cmpi(  ConditionRegister f, int l, Register a, int si16)   { emit_int32( CMPI_OPCODE  | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
 inline void Assembler::cmp(   ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE   | bf(f) | l10(l) | ra(a) | rb(b)); }
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@ -1155,7 +1155,41 @@ static int cc_to_biint(int cc, int flags_reg) {
 // is the number of bytes (not instructions) which will be inserted before
 // the instruction. The padding must match the size of a NOP instruction.

-// Currently not used on this platform.
+// Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
+// (See Section 1.6 of Power ISA Version 3.1)
+static int compute_prefix_padding(int current_offset) {
+  assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
+         "Code buffer must be aligned to a multiple of 64 bytes");
+  if (is_aligned(current_offset + BytesPerInstWord, 64)) {
+    return BytesPerInstWord;
+  }
+  return 0;
+}
+
+int loadConI32Node::compute_padding(int current_offset) const {
+  return compute_prefix_padding(current_offset);
+}
+
+int loadConL34Node::compute_padding(int current_offset) const {
+  return compute_prefix_padding(current_offset);
+}
+
+int addI_reg_imm32Node::compute_padding(int current_offset) const {
+  return compute_prefix_padding(current_offset);
+}
+
+int addL_reg_imm34Node::compute_padding(int current_offset) const {
+  return compute_prefix_padding(current_offset);
+}
+
+int addP_reg_imm34Node::compute_padding(int current_offset) const {
+  return compute_prefix_padding(current_offset);
+}
+
+int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
+  return compute_prefix_padding(current_offset);
+}
+

 //=============================================================================

@ -1893,7 +1927,7 @@ uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {

 #ifndef PRODUCT
 void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-  st->print("NOP \t// %d nops to pad for loops.", _count);
+  st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
 }
 #endif

@ -4016,6 +4050,15 @@ operand immIhi16() %{
  interface(CONST_INTER);
 %}

+// Integer Immediate: 32-bit immediate for prefixed addi and load/store.
+operand immI32() %{
+  predicate(PowerArchitecturePPC64 >= 10);
+  op_cost(0);
+  match(ConI);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 operand immInegpow2() %{
  predicate(is_power_of_2(-(juint)(n->get_int())));
  match(ConI);
@ -4257,6 +4300,15 @@ operand immL32() %{
  interface(CONST_INTER);
 %}

+// Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
+operand immL34() %{
+  predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
 operand immLhighest16() %{
  predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
@ -5791,6 +5843,23 @@ instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
  ins_pipe(pipe_class_default);
 %}

+instruct loadConI32(iRegIdst dst, immI32 src) %{
+  match(Set dst src);
+  // This macro is valid only in Power 10 and up, but adding the following predicate here
+  // caused a build error, so we comment it out for now.
+  // predicate(PowerArchitecturePPC64 >= 10);
+  ins_cost(DEFAULT_COST+1);
+
+  format %{ "PLI     $dst, $src" %}
+  size(8);
+  ins_encode %{
+    assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
+    __ pli($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(pipe_class_default);
+  ins_alignment(2);
+%}
+
 instruct loadConI_Ex(iRegIdst dst, immI src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST*2);
@ -5862,6 +5931,24 @@ instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
  %}
 %}

+// Load 34-bit long constant using prefixed addi. No constant pool entries required.
+instruct loadConL34(iRegLdst dst, immL34 src) %{
+  match(Set dst src);
+  // This macro is valid only in Power 10 and up, but adding the following predicate here
+  // caused a build error, so we comment it out for now.
+  // predicate(PowerArchitecturePPC64 >= 10);
+  ins_cost(DEFAULT_COST+1);
+
+  format %{ "PLI     $dst, $src \t// long" %}
+  size(8);
+  ins_encode %{
+    assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
+    __ pli($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(pipe_class_default);
+  ins_alignment(2);
+%}
+
 // Load long constant 0x????000000000000.
 instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
  match(Set dst src);
@ -8474,6 +8561,21 @@ instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
  ins_pipe(pipe_class_default);
 %}

+// Immediate Addition using prefixed addi
+instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
+  match(Set dst (AddI src1 src2));
+  predicate(PowerArchitecturePPC64 >= 10);
+  ins_cost(DEFAULT_COST+1);
+  format %{ "PADDI   $dst, $src1, $src2" %}
+  size(8);
+  ins_encode %{
+    assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
+    __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(pipe_class_default);
+  ins_alignment(2);
+%}
+
 // Long Addition
 instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (AddL src1 src2));
@ -8548,6 +8650,23 @@ instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
  ins_pipe(pipe_class_default);
 %}

+// Long Immediate Addition using prefixed addi
+// No constant pool entries required.
+instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
+  match(Set dst (AddL src1 src2));
+  predicate(PowerArchitecturePPC64 >= 10);
+  ins_cost(DEFAULT_COST+1);
+
+  format %{ "PADDI   $dst, $src1, $src2" %}
+  size(8);
+  ins_encode %{
+    assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
+    __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(pipe_class_default);
+  ins_alignment(2);
+%}
+
 // Pointer Register Addition
 instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
  match(Set dst (AddP src1 src2));
@ -8585,6 +8704,23 @@ instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
  ins_pipe(pipe_class_default);
 %}

+// Pointer Immediate Addition using prefixed addi
+// No constant pool entries required.
+instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
+  match(Set dst (AddP src1 src2));
+  predicate(PowerArchitecturePPC64 >= 10);
+  ins_cost(DEFAULT_COST+1);
+
+  format %{ "PADDI    $dst, $src1, $src2" %}
+  size(8);
+  ins_encode %{
+    assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
+    __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(pipe_class_default);
+  ins_alignment(2);
+%}
+
 //---------------------
 // Subtraction Instructions

@ -11887,6 +12023,7 @@ instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, fla

 instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
  match(Set dst (Whitespace src1));
+  predicate(PowerArchitecturePPC64 <= 9);
  effect(TEMP src2, TEMP crx);
  ins_cost(4 * DEFAULT_COST);

@ -11906,6 +12043,29 @@ instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, fl
  ins_pipe(pipe_class_default);
 %}

+// Power 10 version, using prefixed addi to load 32-bit constant
+instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
+  match(Set dst (Whitespace src1));
+  predicate(PowerArchitecturePPC64 >= 10);
+  effect(TEMP src2, TEMP crx);
+  ins_cost(3 * DEFAULT_COST);
+
+  format %{ "PLI     $src2, 0x201C0D09\n\t"
+            "CMPRB   $crx, 1, $src1, $src2\n\t"
+            "SETB    $dst, $crx" %}
+  size(16);
+  ins_encode %{
+    // 0x09 to 0x0D, 0x1C to 0x20
+    assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
+    __ pli($src2$$Register, 0x201C0D09);
+    // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
+    __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
+    __ setb($dst$$Register, $crx$$CondRegister);
+  %}
+  ins_pipe(pipe_class_default);
+  ins_alignment(2);
+%}
+
 //----------Branches---------------------------------------------------------
 // Jump