8188139: PPC64: Superword Level Parallelization with VSX

Reviewed-by: kvn, gromero
This commit is contained in:
Michihiro Horie 2017-10-03 17:37:15 -07:00
parent e6765bf9bf
commit 7b17b2d2a3
10 changed files with 750 additions and 16 deletions

View File

@ -517,6 +517,9 @@ class Assembler : public AbstractAssembler {
XXPERMDI_OPCODE= (60u << OPCODE_SHIFT | 10u << 3),
XXMRGHW_OPCODE = (60u << OPCODE_SHIFT | 18u << 3),
XXMRGLW_OPCODE = (60u << OPCODE_SHIFT | 50u << 3),
XXSPLTW_OPCODE = (60u << OPCODE_SHIFT | 164u << 2),
XXLXOR_OPCODE = (60u << OPCODE_SHIFT | 154u << 3),
XXLEQV_OPCODE = (60u << OPCODE_SHIFT | 186u << 3),
// Vector Permute and Formatting
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
@ -1125,6 +1128,7 @@ class Assembler : public AbstractAssembler {
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
@ -2155,6 +2159,11 @@ class Assembler : public AbstractAssembler {
inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void mtvsrd( VectorSRegister d, Register a);
inline void mtvsrwz( VectorSRegister d, Register a);
inline void xxspltw( VectorSRegister d, VectorSRegister b, int ui2);
inline void xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b);
// VSX Extended Mnemonics
inline void xxspltd( VectorSRegister d, VectorSRegister a, int x);

View File

@ -759,15 +759,20 @@ inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit
inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
// Vector-Scalar (VSX) instructions.
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1) | 1u); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2) | 1u); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1) | 1u); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2) | 1u); }
inline void Assembler::mtvsrd( VectorSRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d) | ra(a) | 1u); }
inline void Assembler::mtvsrwz( VectorSRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a) | 1u); }
inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2) { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2)) | 1u << 1 | 1u); }
inline void Assembler::xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b) | 1u << 2 | 1u << 1 | 1u); }
inline void Assembler::xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b) | 1u << 2 | 1u << 1 | 1u); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrwz( Register a, VectorRegister d) { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::xxpermdi(VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm) { emit_int32( XXPERMDI_OPCODE | vsrt(d) | vsra(a) | vsrb(b) | vsdm(dm)); }
inline void Assembler::xxpermdi(VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm) { emit_int32( XXPERMDI_OPCODE | vsrt(d) | vsra(a) | vsrb(b) | 0u << 10 | vsdm(dm) | 1u << 2 | 1u << 1 | 1u); }
inline void Assembler::xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXMRGHW_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXMRGHW_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }

View File

@ -254,6 +254,73 @@ register %{
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
// ----------------------------
// Vector-Scalar Registers
// ----------------------------
reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
// ----------------------------
// Specify priority of register selection within phases of register
@ -395,6 +462,73 @@ alloc_class chunk3 (
SR_PPR
);
alloc_class chunk4 (
VSR0,
VSR1,
VSR2,
VSR3,
VSR4,
VSR5,
VSR6,
VSR7,
VSR8,
VSR9,
VSR10,
VSR11,
VSR12,
VSR13,
VSR14,
VSR15,
VSR16,
VSR17,
VSR18,
VSR19,
VSR20,
VSR21,
VSR22,
VSR23,
VSR24,
VSR25,
VSR26,
VSR27,
VSR28,
VSR29,
VSR30,
VSR31,
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51,
VSR52,
VSR53,
VSR54,
VSR55,
VSR56,
VSR57,
VSR58,
VSR59,
VSR60,
VSR61,
VSR62,
VSR63
);
//-------Architecture Description Register Classes-----------------------
// Several register classes are automatically defined based upon
@ -769,6 +903,73 @@ reg_class dbl_reg(
F31, F31_H // nv!
);
// Class for all 128bit vector registers
reg_class vectorx_reg(VSR0,
VSR1,
VSR2,
VSR3,
VSR4,
VSR5,
VSR6,
VSR7,
VSR8,
VSR9,
VSR10,
VSR11,
VSR12,
VSR13,
VSR14,
VSR15,
VSR16,
VSR17,
VSR18,
VSR19,
VSR20,
VSR21,
VSR22,
VSR23,
VSR24,
VSR25,
VSR26,
VSR27,
VSR28,
VSR29,
VSR30,
VSR31,
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51,
VSR52,
VSR53,
VSR54,
VSR55,
VSR56,
VSR57,
VSR58,
VSR59,
VSR60,
VSR61,
VSR62,
VSR63
);
%}
//----------DEFINITION BLOCK---------------------------------------------------
@ -2048,14 +2249,24 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
assert(MaxVectorSize == 8, "");
return 8;
if (VM_Version::has_vsx()) {
assert(MaxVectorSize == 16, "");
return 16;
} else {
assert(MaxVectorSize == 8, "");
return 8;
}
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
if (VM_Version::has_vsx()) {
assert(MaxVectorSize == 16 && size == 16, "");
return Op_VecX;
} else {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
}
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
@ -2075,7 +2286,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
return false;
if (VM_Version::has_vsx())
return !AlignVector; // can be changed by flag
else
return false;
}
// PPC AES support not yet implemented
@ -2217,10 +2431,31 @@ const MachRegisterNumbers farg_reg[13] = {
F13_num
};
const MachRegisterNumbers vsarg_reg[64] = {
VSR0_num, VSR1_num, VSR2_num, VSR3_num,
VSR4_num, VSR5_num, VSR6_num, VSR7_num,
VSR8_num, VSR9_num, VSR10_num, VSR11_num,
VSR12_num, VSR13_num, VSR14_num, VSR15_num,
VSR16_num, VSR17_num, VSR18_num, VSR19_num,
VSR20_num, VSR21_num, VSR22_num, VSR23_num,
VSR24_num, VSR23_num, VSR24_num, VSR25_num,
VSR28_num, VSR29_num, VSR30_num, VSR31_num,
VSR32_num, VSR33_num, VSR34_num, VSR35_num,
VSR36_num, VSR37_num, VSR38_num, VSR39_num,
VSR40_num, VSR41_num, VSR42_num, VSR43_num,
VSR44_num, VSR45_num, VSR46_num, VSR47_num,
VSR48_num, VSR49_num, VSR50_num, VSR51_num,
VSR52_num, VSR53_num, VSR54_num, VSR55_num,
VSR56_num, VSR57_num, VSR58_num, VSR59_num,
VSR60_num, VSR61_num, VSR62_num, VSR63_num
};
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
@ -2552,6 +2787,115 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
return nodes;
}
typedef struct {
loadConL_hiNode *_large_hi;
loadConL_loNode *_large_lo;
mtvsrdNode *_moved;
xxspltdNode *_replicated;
loadConLNode *_small;
MachNode *_last;
} loadConLReplicatedNodesTuple;
loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
vecXOper *dst, immI_0Oper *zero,
OptoReg::Name reg_second, OptoReg::Name reg_first,
OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
loadConLReplicatedNodesTuple nodes;
const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
if (large_constant_pool) {
// Create new nodes.
loadConL_hiNode *m1 = new loadConL_hiNode();
loadConL_loNode *m2 = new loadConL_loNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m1->add_req(NULL, toc);
m2->add_req(NULL, m1);
m3->add_req(NULL, m2);
m4->add_req(NULL, m3);
// operands for new nodes
m1->_opnds[0] = new iRegLdstOper(); // dst
m1->_opnds[1] = immSrc; // src
m1->_opnds[2] = new iRegPdstOper(); // toc
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegLdstOper(); // base
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib TOC fields.
m1->_const_toc_offset = -1;
m2->_const_toc_offset_hi_node = m1;
// Initialize ins_attrib instruction offset.
m1->_cbuf_insts_offset = -1;
// register allocation for new nodes
ra_->set_pair(m1->_idx, reg_second, reg_first);
ra_->set_pair(m2->_idx, reg_second, reg_first);
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// Create result.
nodes._large_hi = m1;
nodes._large_lo = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._small = NULL;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
} else {
loadConLNode *m2 = new loadConLNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m2->add_req(NULL, toc);
// operands for new nodes
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegPdstOper(); // toc
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib instruction offset.
m2->_cbuf_insts_offset = -1;
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// register allocation for new nodes
ra_->set_pair(m2->_idx, reg_second, reg_first);
// Create result.
nodes._large_hi = NULL;
nodes._large_lo = NULL;
nodes._small = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
}
return nodes;
}
%} // source
encode %{
@ -3212,6 +3556,27 @@ encode %{
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
%}
enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
// Create new nodes.
// Make an operand with the bit pattern to load as float.
immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
immI_0Oper *op_zero = new immI_0Oper(0);
loadConLReplicatedNodesTuple loadConLNodes =
loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
// Push new nodes.
if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
assert(nodes->length() >= 1, "must have created at least 1 node");
%}
// This enc_class is needed so that scheduler gets proper
// input mapping for latency computation.
enc_class enc_poll(immI dst, iRegLdst poll) %{
@ -3840,6 +4205,14 @@ ins_attrib ins_field_load_ic_node(0);
//
// Formats are generated automatically for constants and base registers.
operand vecX() %{
constraint(ALLOC_IN_RC(vectorx_reg));
match(VecX);
format %{ %}
interface(REG_INTER);
%}
//----------Simple Operands----------------------------------------------------
// Immediate Operands
@ -5372,6 +5745,20 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
ins_pipe(pipe_class_memory);
%}
// Load Aligned Packed Byte
instruct loadV16(vecX dst, indirect mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(MEMORY_REF_COST);
format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
size(4);
ins_encode %{
__ lxvd2x($dst$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
match(Set dst (LoadRange mem));
@ -6368,6 +6755,20 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
ins_pipe(pipe_class_memory);
%}
// Store Packed Byte long register to memory
instruct storeV16(indirect mem, vecX src) %{
predicate(n->as_StoreVector()->memory_size() == 16);
match(Set mem (StoreVector mem src));
ins_cost(MEMORY_REF_COST);
format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
size(4);
ins_encode %{
__ stxvd2x($src$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@ -13239,6 +13640,26 @@ instruct storeS_reversed(iRegIsrc src, indirect mem) %{
ins_pipe(pipe_class_default);
%}
instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
effect(DEF temp1, USE src);
size(4);
ins_encode %{
__ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
effect(DEF dst, USE src, USE imm1);
size(4);
ins_encode %{
__ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
%}
ins_pipe(pipe_class_default);
%}
//---------- Replicate Vector Instructions ------------------------------------
// Insrdi does replicate if src == dst.
@ -13318,6 +13739,46 @@ instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 imm1 %{ (int) 1 %}
moveReg(tmpL, src);
repl56(tmpL);
repl48(tmpL);
mtvsrwz(tmpV, tmpL);
xxspltw(dst, tmpV, imm1);
%}
%}
instruct repl16B_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateB zero));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 4);
@ -13352,6 +13813,46 @@ instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl48(tmpL);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl8S_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateS zero));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 2);
@ -13386,6 +13887,46 @@ instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * DEFAULT_COST);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4I_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
match(Set dst (ReplicateF src));
@ -13484,6 +14025,154 @@ instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
%}
instruct repl4F_reg_Ex(vecX dst, regF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
expand %{
stackSlotL tmpS;
iRegIdst tmpI;
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveF2I_reg_stack(tmpS, src); // Move float to stack.
moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
moveReg(tmpL, tmpI); // Move int to long reg.
repl32(tmpL); // Replicate bitpattern.
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4F_immF_Ex(vecX dst, immF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(10 * DEFAULT_COST);
postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
%}
instruct repl4F_immF0(vecX dst, immF_0 zero) %{
match(Set dst (ReplicateF zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_reg_Ex(vecX dst, regD src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
expand %{
stackSlotL tmpS;
iRegLdst tmpL;
iRegLdst tmp;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveD2L_reg_stack(tmpS, src);
moveD2L_stack_reg(tmpL, tmpS);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2D_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateD zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct mtvsrd(vecX dst, iRegLsrc src) %{
predicate(false);
effect(DEF dst, USE src);
format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
size(4);
ins_encode %{
__ mtvsrd($dst$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
effect(DEF dst, USE src, USE zero);
format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
effect(DEF dst, USE src1, USE src2, USE zero);
format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
expand %{
vecX tmpV;
immI8 zero %{ (int) 0 %}
mtvsrd(tmpV, src);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2L_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateL zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// ============================================================================
// Safepoint Instruction

View File

@ -31,3 +31,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorSRegister, vsnoreg);

View File

@ -677,7 +677,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
* 2 // register halves
+ ConditionRegisterImpl::number_of_registers // condition code registers
+ SpecialRegisterImpl::number_of_registers // special registers
+ VectorRegisterImpl::number_of_registers // VSX registers
+ VectorSRegisterImpl::number_of_registers // VSX registers
};
static const int max_gpr;

View File

@ -479,8 +479,12 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
// Is vector's size (in bytes) bigger than a size saved by default?
bool SharedRuntime::is_wide_vector(int size) {
// Note, MaxVectorSize == 8 on PPC64.
assert(size <= 8, "%d bytes vectors are not supported", size);
// Note, MaxVectorSize == 8/16 on PPC64.
if (VM_Version::has_vsx()) {
assert(size <= 16, "%d bytes vectors are not supported", size);
} else {
assert(size <= 8, "%d bytes vectors are not supported", size);
}
return size > 8;
}

View File

@ -107,7 +107,10 @@ void VM_Version::initialize() {
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
}
MaxVectorSize = 8;
if (VM_Version::has_vsx())
MaxVectorSize = 16;
else
MaxVectorSize = 8;
#endif
// Create and print feature-string.

View File

@ -2276,6 +2276,10 @@ private:
if (strcmp(rep_var,"$XMMRegister") == 0) return "as_XMMRegister";
#endif
if (strcmp(rep_var,"$CondRegister") == 0) return "as_ConditionRegister";
#if defined(PPC64)
if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister";
if (strcmp(rep_var,"$VectorSRegister") == 0) return "as_VectorSRegister";
#endif
return NULL;
}

View File

@ -115,6 +115,18 @@ public:
ConditionRegister as_ConditionRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
return ::as_ConditionRegister(reg(ra_, node, idx));
}
VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node) const {
return ::as_VectorRegister(reg(ra_, node));
}
VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
return ::as_VectorRegister(reg(ra_, node, idx));
}
VectorSRegister as_VectorSRegister(PhaseRegAlloc *ra_, const Node *node) const {
return ::as_VectorSRegister(reg(ra_, node));
}
VectorSRegister as_VectorSRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
return ::as_VectorSRegister(reg(ra_, node, idx));
}
#endif
virtual intptr_t constant() const;

View File

@ -67,7 +67,13 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#elif defined(PPC64) || defined(S390)
#elif defined(PPC64)
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#elif defined(S390)
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX