8327652: S390x: Implements SLP support

Reviewed-by: amitkumar, lucy, mdoerr
This commit is contained in:
Sidraya Jayagond 2024-11-18 06:51:11 +00:00
parent a47d9ba98a
commit 92b26317d4
17 changed files with 1147 additions and 89 deletions

View File

@ -1236,6 +1236,9 @@ class Assembler : public AbstractAssembler {
// NOR
#define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m
//NOT-XOR
#define VNX_ZOPC (unsigned long)(0xe7L << 40 | 0x6cL << 0) // V1 := !(V2 | V3), element size = 2**m
// OR
#define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m
@ -1287,6 +1290,13 @@ class Assembler : public AbstractAssembler {
#define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare
#define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String
#define VFA_ZOPC (unsigned long)(0xe7L << 40 | 0xE3L << 0) // V1 := V2 + V3, element size = 2**m
#define VFS_ZOPC (unsigned long)(0xe7L << 40 | 0xE2L << 0) // V1 := V2 - V3, element size = 2**m
#define VFM_ZOPC (unsigned long)(0xe7L << 40 | 0xE7L << 0) // V1 := V2 * V3, element size = 2**m
#define VFD_ZOPC (unsigned long)(0xe7L << 40 | 0xE5L << 0) // V1 := V2 / V3, element size = 2**m
#define VFSQ_ZOPC (unsigned long)(0xe7L << 40 | 0xCEL << 0) // V1 := sqrt of V2, element size = 2**m
#define VFLR_ZOPC (unsigned long)(0xe7L << 40 | 0xC5L << 0) // vector fp load rounded, element size = 2**m
//--------------------------------
//-- Miscellaneous Operations --
@ -2322,22 +2332,22 @@ class Assembler : public AbstractAssembler {
inline void z_xilf(Register r1, int64_t i2); // xor r1 = r1 ^ i2_imm32 ; or only for bits 32-63
// shift
inline void z_sla( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved!
inline void z_slak(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved!
inline void z_slag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, only 63 bits shifted, sign preserved!
inline void z_sra( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, sign extended
inline void z_srak(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int32, sign extended
inline void z_srag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, sign extended
inline void z_sll( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, zeros added
inline void z_sllk(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int32, zeros added
inline void z_sllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, zeros added
inline void z_srl( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, zero extended
inline void z_srlk(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int32, zero extended
inline void z_srlg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, zero extended
inline void z_sla( Register r1, int64_t d2, Register b2 = Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved!
inline void z_slak(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved!
inline void z_slag(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, only 63 bits shifted, sign preserved!
inline void z_sra( Register r1, int64_t d2, Register b2 = Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, sign extended
inline void z_srak(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int32, sign extended
inline void z_srag(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, sign extended
inline void z_sll( Register r1, int64_t d2, Register b2 = Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, zeros added
inline void z_sllk(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int32, zeros added
inline void z_sllg(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, zeros added
inline void z_srl( Register r1, int64_t d2, Register b2 = Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, zero extended
inline void z_srlk(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int32, zero extended
inline void z_srlg(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, zero extended
// rotate
inline void z_rll( Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int32 -- z10
inline void z_rllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int64 -- z10
inline void z_rll( Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int32 -- z10
inline void z_rllg(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int64 -- z10
// rotate the AND/XOR/OR/insert
inline void z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then AND selected bits -- z196
@ -2459,7 +2469,7 @@ class Assembler : public AbstractAssembler {
inline void z_mvc(const Address& d, const Address& s, int64_t l); // move l bytes
inline void z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes
inline void z_mvcin(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes
inline void z_mvcle(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // move region of memory
inline void z_mvcle(Register r1, Register r3, int64_t d2, Register b2 = Z_R0); // move region of memory
inline void z_stfle(int64_t d2, Register b2); // store facility list extended
@ -2491,6 +2501,7 @@ class Assembler : public AbstractAssembler {
// Load (transfer from memory)
inline void z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vl( VectorRegister v1, const Address& a);
inline void z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
@ -2529,10 +2540,10 @@ class Assembler : public AbstractAssembler {
inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2 = Z_R0);
inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2 = Z_R0);
inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2 = Z_R0);
inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2 = Z_R0);
inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
@ -2619,6 +2630,7 @@ class Assembler : public AbstractAssembler {
// Store
inline void z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vst( VectorRegister v1, const Address& a);
inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
@ -2679,13 +2691,16 @@ class Assembler : public AbstractAssembler {
inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// MULTIPLY
inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmlhw(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// MULTIPLY & ADD
inline void z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
@ -2744,6 +2759,9 @@ class Assembler : public AbstractAssembler {
// NOR
inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3);
//NOT-XOR
inline void z_vnx( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// OR
inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3);
@ -2810,6 +2828,10 @@ class Assembler : public AbstractAssembler {
inline void z_vctzf( VectorRegister v1, VectorRegister v2);
inline void z_vctzg( VectorRegister v1, VectorRegister v2);
inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vpopctb(VectorRegister v1, VectorRegister v2);
inline void z_vpopcth(VectorRegister v1, VectorRegister v2);
inline void z_vpopctf(VectorRegister v1, VectorRegister v2);
inline void z_vpopctg(VectorRegister v1, VectorRegister v2);
// Rotate/Shift
inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
@ -2898,9 +2920,39 @@ class Assembler : public AbstractAssembler {
inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
// Vector Floatingpoint instructions
// ==========================
// Add
inline void z_vfa( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vfasb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vfadb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
//SUB
inline void z_vfs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vfssb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vfsdb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
//MUL
inline void z_vfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vfmsb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vfmdb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
//DIV
inline void z_vfd( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vfdsb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vfddb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
//square root
inline void z_vfsq( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vfsqsb(VectorRegister v1, VectorRegister v2);
inline void z_vfsqdb(VectorRegister v1, VectorRegister v2);
//vector fp load rounded
inline void z_vflr( VectorRegister v1, VectorRegister v2, int64_t m3, int64_t m5);
inline void z_vflrd( VectorRegister v1, VectorRegister v2, int64_t m5);
// Floatingpoint instructions
// ==========================
// compare instructions
inline void z_cebr(FloatRegister r1, FloatRegister r2); // compare (r1, r2) ; float
inline void z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm12+x2+b2)) ; float

View File

@ -787,6 +787,7 @@ inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Reg
inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
inline void Assembler::z_vl(VectorRegister v1, const Address& a) { z_vl(v1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
// Gather/Scatter
inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t ix3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | uimm4(ix3, 32, 48)); }
@ -820,7 +821,7 @@ inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Reg
inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR.
inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmaskt_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); }
inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); }
@ -916,6 +917,7 @@ inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Reg
inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
inline void Assembler::z_vst(VectorRegister v1, const Address& a) { z_vst(v1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
// Misc
inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
@ -973,6 +975,9 @@ inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorReg
// MULTIPLY
inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vml(v1, v2, v3, VRET_BYTE);} // vector element type 'B'
inline void Assembler::z_vmlhw( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vml(v1, v2, v3, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vmlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vml(v1, v2, v3, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
@ -1035,6 +1040,9 @@ inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorReg
// NOR
inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
//NOT-XOR
inline void Assembler::z_vnx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// OR
inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
@ -1101,6 +1109,10 @@ inline void Assembler::z_vctzh( VectorRegister v1, VectorRegister v2)
inline void Assembler::z_vctzf( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vctzg( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VPOPCT_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vpopctb( VectorRegister v1, VectorRegister v2) {z_vpopct(v1, v2, VRET_BYTE); }
inline void Assembler::z_vpopcth( VectorRegister v1, VectorRegister v2) {z_vpopct(v1, v2, VRET_HW); }
inline void Assembler::z_vpopctf( VectorRegister v1, VectorRegister v2) {z_vpopct(v1, v2, VRET_FW); }
inline void Assembler::z_vpopctg( VectorRegister v1, VectorRegister v2) {z_vpopct(v1, v2, VRET_DW); }
// Rotate/Shift
inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
@ -1108,7 +1120,7 @@ inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorReg
inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
@ -1188,12 +1200,41 @@ inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2)
inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); }
inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); }
//-------------------------------
// Vector FLOAT INSTRUCTIONS
//-------------------------------
// ADD
inline void Assembler::z_vfa( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); }
inline void Assembler::z_vfasb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfa(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vfadb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfa(v1, v2, v3, VRET_DW); } // vector element type 'G'
// SUB
inline void Assembler::z_vfs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); }
inline void Assembler::z_vfssb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfs(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vfsdb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfs(v1, v2, v3, VRET_DW); } // vector element type 'G'
// MUL
inline void Assembler::z_vfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); }
inline void Assembler::z_vfmsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfm(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vfmdb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfm(v1, v2, v3, VRET_DW); } // vector element type 'G'
// DIV
inline void Assembler::z_vfd( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VFD_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_QW, 32)); }
inline void Assembler::z_vfdsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfd(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vfddb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vfd(v1, v2, v3, VRET_DW); } // vector element type 'G'
// square root
inline void Assembler::z_vfsq( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VFSQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_FW, VRET_QW, 32)); }
inline void Assembler::z_vfsqsb( VectorRegister v1, VectorRegister v2) {z_vfsq(v1, v2, VRET_FW); }
inline void Assembler::z_vfsqdb( VectorRegister v1, VectorRegister v2) {z_vfsq(v1, v2, VRET_DW); }
// vector fp load rounded
inline void Assembler::z_vflr( VectorRegister v1, VectorRegister v2, int64_t m5, int64_t m3) {emit_48(VFLR_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m5, VRET_FW, 7, 24) | vesc_mask(m3, VRET_FW, VRET_QW, 32)); }
inline void Assembler::z_vflrd( VectorRegister v1, VectorRegister v2, int64_t m5) {z_vflr(v1, v2, m5, VRET_DW); }
//-------------------------------
// FLOAT INSTRUCTIONS
//-------------------------------
//----------------
// LOAD
//----------------
inline void Assembler::z_ler( FloatRegister r1, FloatRegister r2) { emit_16( LER_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); }

View File

@ -169,6 +169,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
#endif
clear_reg(Z_R0); // make sure register is properly initialized.
#if 0
if (VM_Version::has_VectorFacility()) {
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
// Otherwise just do nothing in vector mode.
@ -223,6 +224,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
bind(VectorDone);
}
#endif
{
const int min_cnt = 8; // Minimum #characters required to use unrolled loop.
@ -461,6 +463,7 @@ unsigned int C2_MacroAssembler::string_inflate(Register src, Register dst, Regis
#endif
clear_reg(Z_R0); // make sure register is properly initialized.
#if 0
if (VM_Version::has_VectorFacility()) {
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
// Otherwise just do nothing in vector mode.
@ -489,6 +492,7 @@ unsigned int C2_MacroAssembler::string_inflate(Register src, Register dst, Regis
bind(VectorDone);
}
#endif
const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.
// Otherwise just do nothing in unrolled scalar mode.
@ -623,6 +627,7 @@ unsigned int C2_MacroAssembler::string_inflate_const(Register src, Register dst,
bool restore_inputs = false;
bool workreg_clear = false;
#if 0
if ((len >= 32) && VM_Version::has_VectorFacility()) {
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
// Otherwise just do nothing in vector mode.
@ -678,6 +683,7 @@ unsigned int C2_MacroAssembler::string_inflate_const(Register src, Register dst,
src_off += min_vcnt;
dst_off += min_vcnt*2;
}
#endif
if ((len-nprocessed) > 8) {
const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.

View File

@ -60,7 +60,7 @@ define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, false);
define_pd_global(bool, SuperWordLoopUnrollAnalysis, true);
// On s390x, we can clear the array with a single instruction,
// so don't idealize it.
define_pd_global(bool, IdealizeClearArrayNode, false);

View File

@ -107,6 +107,11 @@ define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong);
/* Seems to pay off with 2 pages already. */ \
product(size_t, MVCLEThreshold, +2*(4*K), DIAGNOSTIC, \
"Threshold above which page-aligned MVCLE copy/init is used.") \
/* special instructions */ \
product(bool, SuperwordUseVX, false, \
"Use Z15 Vector instructions for superword optimization.") \
product(bool, UseSFPV, false, DIAGNOSTIC, \
"Use SFPV Vector instructions for superword optimization.") \
\
product(bool, PreferLAoverADD, false, DIAGNOSTIC, \
"Use LA/LAY instructions over ADD instructions (z/Architecture).") \

View File

@ -47,10 +47,11 @@ class RegisterSaver {
// Boolean flags to force only argument registers to be saved.
static int live_reg_save_size(RegisterSet reg_set);
static int live_reg_frame_size(RegisterSet reg_set);
static int live_reg_frame_size(RegisterSet reg_set, bool save_vectors = false);
static int calculate_vregstosave_num();
// Specify the register that should be stored as the return pc in the current frame.
static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14);
static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set);
static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14, bool save_vectors = false);
static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors = false);
// Generate the OopMap (again, regs where saved before).
static OopMap* generate_oop_map(MacroAssembler* masm, RegisterSet reg_set);
@ -65,11 +66,13 @@ class RegisterSaver {
int_reg = 0,
float_reg = 1,
excluded_reg = 2, // Not saved/restored.
v_reg = 3
} RegisterType;
typedef enum {
reg_size = 8,
half_reg_size = reg_size / 2,
v_reg_size = 16
} RegisterConstants;
// Remember type, number, and VMReg.

View File

@ -26,11 +26,6 @@
#include "precompiled.hpp"
#include "register_s390.hpp"
const int ConcreteRegisterImpl::max_gpr = Register::number_of_registers * 2;
const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
FloatRegister::number_of_registers * 2;
const char* Register::name() const {
const char* names[number_of_registers] = {
"Z_R0", "Z_R1", "Z_R2", "Z_R3", "Z_R4", "Z_R5", "Z_R6", "Z_R7",
@ -54,5 +49,11 @@ const char* VectorRegister::name() const {
"Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
"Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
};
return is_valid() ? names[encoding()] : "fnoreg";
return is_valid() ? names[encoding()] : "vnoreg";
}
// Method to convert a FloatRegister to a VectorRegister (VectorRegister)
VectorRegister FloatRegister::to_vr() const {
if (*this == fnoreg) { return vnoreg; }
return as_VectorRegister(encoding());
}

View File

@ -64,6 +64,7 @@ class Register {
public:
enum {
number_of_registers = 16,
max_slots_per_register = 2,
number_of_arg_registers = 5
};
@ -164,12 +165,13 @@ constexpr ConditionRegister Z_CR = as_ConditionRegister(0);
//=========================
// The implementation of float registers for the z/Architecture.
class VectorRegister;
class FloatRegister {
int _encoding;
public:
enum {
number_of_registers = 16,
max_slots_per_register = 2,
number_of_arg_registers = 4
};
@ -192,6 +194,8 @@ public:
constexpr bool is_nonvolatile() const { return (8 <= _encoding && _encoding <= 15); }
const char* name() const;
// convert to VR
VectorRegister to_vr() const;
};
inline constexpr FloatRegister as_FloatRegister(int encoding) {
@ -285,6 +289,7 @@ class VectorRegister {
public:
enum {
number_of_registers = 32,
max_slots_per_register = 4,
number_of_arg_registers = 0
};
@ -379,21 +384,20 @@ constexpr VectorRegister Z_V31 = as_VectorRegister(31);
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(Register::number_of_registers +
FloatRegister::number_of_registers)
* 2 // register halves
+ 1 // condition code register
max_gpr = Register::number_of_registers * Register::max_slots_per_register,
max_fpr = max_gpr + FloatRegister::number_of_registers * FloatRegister::max_slots_per_register,
max_vr = max_fpr + VectorRegister::number_of_registers * VectorRegister::max_slots_per_register,
// A big enough number for C2: all the registers plus flags
// This number must be large enough to cover REG_COUNT (defined by c2) registers.
// There is no requirement that any ordering here matches any ordering c2 gives
// it's optoregs.
number_of_registers = max_vr + 1 // gpr/fpr/vr + flags
};
static const int max_gpr;
static const int max_fpr;
};
// Common register declarations used in assembler code.
constexpr Register Z_EXC_OOP = Z_R2;
constexpr Register Z_EXC_PC = Z_R3;

View File

@ -97,8 +97,9 @@ register %{
// e.g. Z_R3_H, which is needed by the allocator, but is not used
// for stores, loads, etc.
// Integer/Long Registers
// ----------------------------
// ----------------------------
// Integer/Long Registers
// ----------------------------
// z/Architecture has 16 64-bit integer registers.
@ -136,7 +137,9 @@ register %{
reg_def Z_R15 (NS, NS, Op_RegI, 15, Z_R15->as_VMReg()); // s SP
reg_def Z_R15_H(NS, NS, Op_RegI, 99, Z_R15->as_VMReg()->next());
// Float/Double Registers
// ----------------------------
// Float/Double Registers
// ----------------------------
// The rules of ADL require that double registers be defined in pairs.
// Each pair must be two 32-bit values, but not necessarily a pair of
@ -182,7 +185,169 @@ register %{
reg_def Z_F15 (SOC, SOE, Op_RegF, 15, Z_F15->as_VMReg());
reg_def Z_F15_H(SOC, SOE, Op_RegF, 99, Z_F15->as_VMReg()->next());
// ----------------------------
// Vector Registers
// ----------------------------
// 1st 16 VRs are aliases for the FPRs which are already defined above.
reg_def Z_VR0 ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
reg_def Z_VR0_H ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
reg_def Z_VR0_J ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
reg_def Z_VR0_K ( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
reg_def Z_VR1 ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad());
reg_def Z_VR1_H ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad());
reg_def Z_VR1_J ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad());
reg_def Z_VR1_K ( SOC, SOC, Op_RegF, 1, VMRegImpl::Bad());
reg_def Z_VR2 ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad());
reg_def Z_VR2_H ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad());
reg_def Z_VR2_J ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad());
reg_def Z_VR2_K ( SOC, SOC, Op_RegF, 2, VMRegImpl::Bad());
reg_def Z_VR3 ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad());
reg_def Z_VR3_H ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad());
reg_def Z_VR3_J ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad());
reg_def Z_VR3_K ( SOC, SOC, Op_RegF, 3, VMRegImpl::Bad());
reg_def Z_VR4 ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad());
reg_def Z_VR4_H ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad());
reg_def Z_VR4_J ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad());
reg_def Z_VR4_K ( SOC, SOC, Op_RegF, 4, VMRegImpl::Bad());
reg_def Z_VR5 ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad());
reg_def Z_VR5_H ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad());
reg_def Z_VR5_J ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad());
reg_def Z_VR5_K ( SOC, SOC, Op_RegF, 5, VMRegImpl::Bad());
reg_def Z_VR6 ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad());
reg_def Z_VR6_H ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad());
reg_def Z_VR6_J ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad());
reg_def Z_VR6_K ( SOC, SOC, Op_RegF, 6, VMRegImpl::Bad());
reg_def Z_VR7 ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad());
reg_def Z_VR7_H ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad());
reg_def Z_VR7_J ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad());
reg_def Z_VR7_K ( SOC, SOC, Op_RegF, 7, VMRegImpl::Bad());
reg_def Z_VR8 ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
reg_def Z_VR8_H ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
reg_def Z_VR8_J ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
reg_def Z_VR8_K ( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
reg_def Z_VR9 ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
reg_def Z_VR9_H ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
reg_def Z_VR9_J ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
reg_def Z_VR9_K ( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
reg_def Z_VR10 ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
reg_def Z_VR10_H ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
reg_def Z_VR10_J ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
reg_def Z_VR10_K ( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
reg_def Z_VR11 ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
reg_def Z_VR11_H ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
reg_def Z_VR11_J ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
reg_def Z_VR11_K ( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
reg_def Z_VR12 ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
reg_def Z_VR12_H ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
reg_def Z_VR12_J ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
reg_def Z_VR12_K ( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
reg_def Z_VR13 ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
reg_def Z_VR13_H ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
reg_def Z_VR13_J ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
reg_def Z_VR13_K ( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
reg_def Z_VR14 ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
reg_def Z_VR14_H ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
reg_def Z_VR14_J ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
reg_def Z_VR14_K ( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
reg_def Z_VR15 ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
reg_def Z_VR15_H ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
reg_def Z_VR15_J ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
reg_def Z_VR15_K ( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
reg_def Z_VR16 ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg() );
reg_def Z_VR16_H ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg()->next() );
reg_def Z_VR16_J ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg()->next(2) );
reg_def Z_VR16_K ( SOC, SOC, Op_RegF, 16, Z_V16->as_VMReg()->next(3) );
reg_def Z_VR17 ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg() );
reg_def Z_VR17_H ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg()->next() );
reg_def Z_VR17_J ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg()->next(2) );
reg_def Z_VR17_K ( SOC, SOC, Op_RegF, 17, Z_V17->as_VMReg()->next(3) );
reg_def Z_VR18 ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg() );
reg_def Z_VR18_H ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg()->next() );
reg_def Z_VR18_J ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg()->next(2) );
reg_def Z_VR18_K ( SOC, SOC, Op_RegF, 18, Z_V18->as_VMReg()->next(3) );
reg_def Z_VR19 ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg() );
reg_def Z_VR19_H ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg()->next() );
reg_def Z_VR19_J ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg()->next(2) );
reg_def Z_VR19_K ( SOC, SOC, Op_RegF, 19, Z_V19->as_VMReg()->next(3) );
reg_def Z_VR20 ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg() );
reg_def Z_VR20_H ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg()->next() );
reg_def Z_VR20_J ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg()->next(2) );
reg_def Z_VR20_K ( SOC, SOC, Op_RegF, 20, Z_V20->as_VMReg()->next(3) );
reg_def Z_VR21 ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg() );
reg_def Z_VR21_H ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg()->next() );
reg_def Z_VR21_J ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg()->next(2) );
reg_def Z_VR21_K ( SOC, SOC, Op_RegF, 21, Z_V21->as_VMReg()->next(3) );
reg_def Z_VR22 ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg() );
reg_def Z_VR22_H ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg()->next() );
reg_def Z_VR22_J ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg()->next(2) );
reg_def Z_VR22_K ( SOC, SOC, Op_RegF, 22, Z_V22->as_VMReg()->next(3) );
reg_def Z_VR23 ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg() );
reg_def Z_VR23_H ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg()->next() );
reg_def Z_VR23_J ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg()->next(2) );
reg_def Z_VR23_K ( SOC, SOC, Op_RegF, 23, Z_V23->as_VMReg()->next(3) );
reg_def Z_VR24 ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg() );
reg_def Z_VR24_H ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg()->next() );
reg_def Z_VR24_J ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg()->next(2) );
reg_def Z_VR24_K ( SOC, SOC, Op_RegF, 24, Z_V24->as_VMReg()->next(3) );
reg_def Z_VR25 ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg() );
reg_def Z_VR25_H ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg()->next() );
reg_def Z_VR25_J ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg()->next(2) );
reg_def Z_VR25_K ( SOC, SOC, Op_RegF, 25, Z_V25->as_VMReg()->next(3) );
reg_def Z_VR26 ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg() );
reg_def Z_VR26_H ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg()->next() );
reg_def Z_VR26_J ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg()->next(2) );
reg_def Z_VR26_K ( SOC, SOC, Op_RegF, 26, Z_V26->as_VMReg()->next(3) );
reg_def Z_VR27 ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg() );
reg_def Z_VR27_H ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg()->next() );
reg_def Z_VR27_J ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg()->next(2) );
reg_def Z_VR27_K ( SOC, SOC, Op_RegF, 27, Z_V27->as_VMReg()->next(3) );
reg_def Z_VR28 ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg() );
reg_def Z_VR28_H ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg()->next() );
reg_def Z_VR28_J ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg()->next(2) );
reg_def Z_VR28_K ( SOC, SOC, Op_RegF, 28, Z_V28->as_VMReg()->next(3) );
reg_def Z_VR29 ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg() );
reg_def Z_VR29_H ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg()->next() );
reg_def Z_VR29_J ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg()->next(2) );
reg_def Z_VR29_K ( SOC, SOC, Op_RegF, 29, Z_V29->as_VMReg()->next(3) );
reg_def Z_VR30 ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg() );
reg_def Z_VR30_H ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg()->next() );
reg_def Z_VR30_J ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg()->next(2) );
reg_def Z_VR30_K ( SOC, SOC, Op_RegF, 30, Z_V30->as_VMReg()->next(3) );
reg_def Z_VR31 ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg() );
reg_def Z_VR31_H ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg()->next() );
reg_def Z_VR31_J ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg()->next(2) );
reg_def Z_VR31_K ( SOC, SOC, Op_RegF, 31, Z_V31->as_VMReg()->next(3) );
// Special Registers
// Condition Codes Flag Registers
@ -194,7 +359,6 @@ register %{
reg_def Z_CR(SOC, SOC, Op_RegFlags, 0, Z_CR->as_VMReg()); // volatile
// Specify priority of register selection within phases of register
// allocation. Highest priority is first. A useful heuristic is to
// give registers a low priority when they are required by machine
@ -268,6 +432,41 @@ alloc_class chunk1(
);
alloc_class chunk2(
Z_VR0, Z_VR0_H, Z_VR0_J, Z_VR0_K,
Z_VR1, Z_VR1_H, Z_VR1_J, Z_VR1_K,
Z_VR2, Z_VR2_H, Z_VR2_J, Z_VR2_K,
Z_VR3, Z_VR3_H, Z_VR3_J, Z_VR3_K,
Z_VR4, Z_VR4_H, Z_VR4_J, Z_VR4_K,
Z_VR5, Z_VR5_H, Z_VR5_J, Z_VR5_K,
Z_VR6, Z_VR6_H, Z_VR6_J, Z_VR6_K,
Z_VR7, Z_VR7_H, Z_VR7_J, Z_VR7_K,
Z_VR8, Z_VR8_H, Z_VR8_J, Z_VR8_K,
Z_VR9, Z_VR9_H, Z_VR9_J, Z_VR9_K,
Z_VR10, Z_VR10_H, Z_VR10_J, Z_VR10_K,
Z_VR11, Z_VR11_H, Z_VR11_J, Z_VR11_K,
Z_VR12, Z_VR12_H, Z_VR12_J, Z_VR12_K,
Z_VR13, Z_VR13_H, Z_VR13_J, Z_VR13_K,
Z_VR14, Z_VR14_H, Z_VR14_J, Z_VR14_K,
Z_VR15, Z_VR15_H, Z_VR15_J, Z_VR15_K,
Z_VR16, Z_VR16_H, Z_VR16_J, Z_VR16_K,
Z_VR17, Z_VR17_H, Z_VR17_J, Z_VR17_K,
Z_VR18, Z_VR18_H, Z_VR18_J, Z_VR18_K,
Z_VR19, Z_VR19_H, Z_VR19_J, Z_VR19_K,
Z_VR20, Z_VR20_H, Z_VR20_J, Z_VR20_K,
Z_VR21, Z_VR21_H, Z_VR21_J, Z_VR21_K,
Z_VR22, Z_VR22_H, Z_VR22_J, Z_VR22_K,
Z_VR23, Z_VR23_H, Z_VR23_J, Z_VR23_K,
Z_VR24, Z_VR24_H, Z_VR24_J, Z_VR24_K,
Z_VR25, Z_VR25_H, Z_VR25_J, Z_VR25_K,
Z_VR26, Z_VR26_H, Z_VR26_J, Z_VR26_K,
Z_VR27, Z_VR27_H, Z_VR27_J, Z_VR27_K,
Z_VR28, Z_VR28_H, Z_VR28_J, Z_VR28_K,
Z_VR29, Z_VR29_H, Z_VR29_J, Z_VR29_K,
Z_VR30, Z_VR30_H, Z_VR30_J, Z_VR30_K,
Z_VR31, Z_VR31_H, Z_VR31_J, Z_VR31_K
);
alloc_class chunk3(
Z_CR
);
@ -542,6 +741,27 @@ reg_class z_dbl_reg(
);
reg_class z_rscratch1_dbl_reg(Z_F1,Z_F1_H);
reg_class z_v_reg(
// Attention: Only these ones are saved & restored at safepoint by RegisterSaver.
//1st 16 VRs overlaps with 1st 16 FPRs.
Z_VR16, Z_VR16_H, Z_VR16_J, Z_VR16_K,
Z_VR17, Z_VR17_H, Z_VR17_J, Z_VR17_K,
Z_VR18, Z_VR18_H, Z_VR18_J, Z_VR18_K,
Z_VR19, Z_VR19_H, Z_VR19_J, Z_VR19_K,
Z_VR20, Z_VR20_H, Z_VR20_J, Z_VR20_K,
Z_VR21, Z_VR21_H, Z_VR21_J, Z_VR21_K,
Z_VR22, Z_VR22_H, Z_VR22_J, Z_VR22_K,
Z_VR23, Z_VR23_H, Z_VR23_J, Z_VR23_K,
Z_VR24, Z_VR24_H, Z_VR24_J, Z_VR24_K,
Z_VR25, Z_VR25_H, Z_VR25_J, Z_VR25_K,
Z_VR26, Z_VR26_H, Z_VR26_J, Z_VR26_K,
Z_VR27, Z_VR27_H, Z_VR27_J, Z_VR27_K,
Z_VR28, Z_VR28_H, Z_VR28_J, Z_VR28_K,
Z_VR29, Z_VR29_H, Z_VR29_J, Z_VR29_K,
Z_VR30, Z_VR30_H, Z_VR30_J, Z_VR30_K,
Z_VR31, Z_VR31_H, Z_VR31_J, Z_VR31_K
);
%}
//----------DEFINITION BLOCK---------------------------------------------------
@ -953,8 +1173,8 @@ const Pipeline * MachEpilogNode::pipeline() const {
//=============================================================================
// Figure out which register class each belongs in: rc_int, rc_float, rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_stack };
// Figure out which register class each belongs in: rc_int, rc_float, rc_vector, rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
static enum RC rc_class(OptoReg::Name reg) {
// Return the register class for the given register. The given register
@ -975,8 +1195,13 @@ static enum RC rc_class(OptoReg::Name reg) {
return rc_float;
}
// we have 128 vector register halves at index 64
if (reg < 32+32+128) {
return rc_vector;
}
// Between float regs & stack are the flags regs.
assert(reg >= OptoReg::stack0(), "blow up if spilling flags");
assert(OptoReg::is_stack(reg) || reg < 32+32+128, "blow up if spilling flags");
return rc_stack;
}
@ -1035,7 +1260,7 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
"expected aligned-adjacent pairs");
// Generate spill code!
int size = 0;
if (src_lo == dst_lo && src_hi == dst_hi) {
return 0; // Self copy, no move.
}
@ -1049,6 +1274,37 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
const char *mnemo = nullptr;
unsigned long opc = 0;
if (bottom_type()->isa_vect() != nullptr && ideal_reg() == Op_VecX) {
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
if (masm != nullptr) {
__ z_mvc(Address(Z_SP, 0, dst_offset), Address(Z_SP, 0, src_offset), 16);
}
size += 6;
} else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
VectorRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]);
if (masm != nullptr) {
__ z_vst(Rsrc, Address(Z_SP, 0, dst_offset));
}
size += 6;
} else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
VectorRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]);
if (masm != nullptr) {
__ z_vl(Rdst, Address(Z_SP, 0, src_offset));
}
size += 6;
} else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
VectorRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]);
VectorRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]);
if (masm != nullptr) {
__ z_vlr(Rdst, Rsrc);
}
size += 6;
} else {
ShouldNotReachHere();
}
return size;
}
// Memory->Memory Spill. Use Z_R0 to hold the value.
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
@ -1283,7 +1539,7 @@ source_hpp %{
//
// To keep related declarations/definitions/uses close together,
// we switch between source %{ }% and source_hpp %{ }% freely as needed.
#include "opto/convertnode.hpp"
#include "oops/klass.inline.hpp"
//--------------------------------------------------------------
@ -1446,6 +1702,32 @@ bool Matcher::match_rule_supported(int opcode) {
case Op_PopCountL:
// PopCount supported by H/W from z/Architecture G5 (z196) on.
return (UsePopCountInstruction && VM_Version::has_PopCount());
case Op_AddVB:
case Op_AddVS:
case Op_AddVI:
case Op_AddVL:
case Op_AddVD:
case Op_SubVB:
case Op_SubVS:
case Op_SubVI:
case Op_SubVL:
case Op_SubVD:
case Op_MulVB:
case Op_MulVS:
case Op_MulVI:
case Op_MulVD:
case Op_DivVD:
case Op_SqrtVD:
case Op_RoundDoubleModeV:
return SuperwordUseVX;
case Op_AddVF:
case Op_SubVF:
case Op_MulVF:
case Op_DivVF:
case Op_SqrtVF:
//PopCountVI supported by z14 onwards.
case Op_PopCountVI:
return (SuperwordUseVX && UseSFPV);
case Op_FmaF:
case Op_FmaD:
return UseFMA;
@ -1491,14 +1773,24 @@ OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
// Vector width in bytes.
int Matcher::vector_width_in_bytes(BasicType bt) {
assert(MaxVectorSize == 8, "");
return 8;
if (SuperwordUseVX) {
assert(MaxVectorSize == 16, "");
return 16;
} else {
assert(MaxVectorSize == 8, "");
return 8;
}
}
// Vector ideal reg.
uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
if (SuperwordUseVX) {
assert(MaxVectorSize == 16 && size == 16, "");
return Op_VecX;
} else {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
}
}
// Limits on vector size (number of elements) loaded into vector.
@ -2391,6 +2683,14 @@ ins_attrib ins_should_rematerialize(false);
// Immediate Operands
// Please note:
// Formats are generated automatically for constants and base registers.
operand vecX() %{
constraint(ALLOC_IN_RC(z_v_reg));
match(VecX);
format %{ %}
interface(REG_INTER);
%}
//----------------------------------------------
// SIGNED (shorter than INT) immediate operands
@ -10534,6 +10834,45 @@ instruct Repl4S_immm1(iRegL dst, immS_minus1 src) %{
ins_pipe(pipe_class_dummy);
%}
instruct repl8S_reg_Ex(vecX dst, iRegI src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 8 &&
Matcher::vector_element_basic_type(n) == T_SHORT);
size(12);
ins_encode %{
__ z_vlvgh($dst$$VectorRegister, $src$$Register, 0);
__ z_vreph($dst$$VectorRegister, $dst$$VectorRegister, 0);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 8 &&
Matcher::vector_element_basic_type(n) == T_SHORT);
format %{ "VONE $dst, $src \t// replicate8S" %}
size(6);
ins_encode %{
__ z_vone($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl8S_immI0(vecX dst, immI_0 zero) %{
match(Set dst (Replicate zero));
predicate(n->as_Vector()->length() == 8 &&
Matcher::vector_element_basic_type(n) == T_SHORT);
format %{ "VZERO $dst, $zero \t// replicate8S" %}
size(6);
ins_encode %{
__ z_vzero($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
// Exploit rotate_then_insert, if available.
// Replicate scalar int to packed int values (8 Bytes).
instruct Repl2I_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{
@ -10586,7 +10925,44 @@ instruct Repl2I_immm1(iRegL dst, immI_minus1 src) %{
ins_pipe(pipe_class_dummy);
%}
//
instruct repl4I_reg_Ex(vecX dst, iRegI src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 4 &&
Matcher::vector_element_basic_type(n) == T_INT);
size(12);
ins_encode %{
__ z_vlvgf($dst$$VectorRegister, $src$$Register, 0);
__ z_vrepf($dst$$VectorRegister, $dst$$VectorRegister, 0);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl4I_immI0(vecX dst, immI_0 zero) %{
match(Set dst (Replicate zero));
predicate(n->as_Vector()->length() == 4 &&
Matcher::vector_element_basic_type(n) == T_INT);
format %{ "VZERO $dst, $zero \t// replicate4I" %}
size(6);
ins_encode %{
__ z_vzero($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 4 &&
Matcher::vector_element_basic_type(n) == T_INT);
format %{ "VONE $dst, $dst, $dst \t// replicate4I" %}
size(6);
ins_encode %{
__ z_vone($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct Repl2F_reg_indirect(iRegL dst, regF src, flagsReg cr) %{
match(Set dst (Replicate src));
@ -10650,6 +11026,139 @@ instruct Repl2F_imm0(iRegL dst, immFp0 src) %{
ins_pipe(pipe_class_dummy);
%}
instruct repl4F_reg_Ex(vecX dst, regF src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 4 &&
Matcher::vector_element_basic_type(n) == T_FLOAT);
format %{ "VREP $dst, $src \t// replicate4F" %}
size(6);
ins_encode %{
__ z_vrepf($dst$$VectorRegister, $src$$FloatRegister->to_vr(), 0);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl4F_immF0(vecX dst, immFp0 zero) %{
match(Set dst (Replicate zero));
predicate(n->as_Vector()->length() == 4 &&
Matcher::vector_element_basic_type(n) == T_FLOAT);
format %{ "VZERO $dst, $zero \t// replicate4F" %}
size(6);
ins_encode %{
__ z_vzero($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl2D_reg_Ex(vecX dst, regD src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 2 &&
Matcher::vector_element_basic_type(n) == T_DOUBLE);
format %{ "VREP $dst, $src \t// replicate2D" %}
size(6);
ins_encode %{
__ z_vrepg($dst$$VectorRegister, $src$$FloatRegister->to_vr(), 0);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl2D_immD0(vecX dst, immDp0 zero) %{
match(Set dst (Replicate zero));
predicate(n->as_Vector()->length() == 2 &&
Matcher::vector_element_basic_type(n) == T_DOUBLE);
format %{ "VZERO $dst, $zero \t// replicate2D" %}
size(6);
ins_encode %{
__ z_vzero($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl16B_reg_Ex(vecX dst, iRegI src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 16 &&
Matcher::vector_element_basic_type(n) == T_BYTE);
size(12);
ins_encode %{
__ z_vlvgb($dst$$VectorRegister, $src$$Register, 0);
__ z_vrepb($dst$$VectorRegister, $dst$$VectorRegister, 0);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 16 &&
Matcher::vector_element_basic_type(n) == T_BYTE);
format %{ "VONE $dst, $src \t// replicate16B" %}
size(6);
ins_encode %{
__ z_vone($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl16B_immI0(vecX dst, immI_0 zero) %{
match(Set dst (Replicate zero));
predicate(n->as_Vector()->length() == 16 &&
Matcher::vector_element_basic_type(n) == T_BYTE);
format %{ "VZERO $dst, $zero \t// replicate16B" %}
size(6);
ins_encode %{
__ z_vzero($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl2L_reg_Ex(vecX dst, iRegL src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 2 &&
Matcher::vector_element_basic_type(n) == T_LONG);
size(12);
ins_encode %{
__ z_vlvgg($dst$$VectorRegister, $src$$Register, 0);
__ z_vrepg($dst$$VectorRegister, $dst$$VectorRegister, 0);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (Replicate src));
predicate(n->as_Vector()->length() == 2 &&
Matcher::vector_element_basic_type(n) == T_LONG);
format %{ "VONE $dst, $src \t// replicate2L" %}
size(6);
ins_encode %{
__ z_vone($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct repl2L_immI0(vecX dst, immI_0 zero) %{
match(Set dst (Replicate zero));
predicate(n->as_Vector()->length() == 2 &&
Matcher::vector_element_basic_type(n) == T_LONG);
format %{ "VZERO $dst, $zero \t// replicate16B" %}
size(6);
ins_encode %{
__ z_vzero($dst$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
// Load/Store vector
// Store Aligned Packed Byte register to memory (8 Bytes).
@ -10664,6 +11173,21 @@ instruct storeA8B(memory mem, iRegL src) %{
ins_pipe(pipe_class_dummy);
%}
// Store Packed Byte long register to memory
instruct storeV16(memoryRX mem, vecX src) %{
predicate(n->as_StoreVector()->memory_size() == 16);
match(Set mem (StoreVector mem src));
ins_cost(MEMORY_REF_COST);
format %{ "VST $mem, $src \t// store 16-byte Vector" %}
size(6);
ins_encode %{
__ z_vst($src$$VectorRegister,
Address(reg_to_register_object($mem$$base), $mem$$index$$Register, $mem$$disp));
%}
ins_pipe(pipe_class_dummy);
%}
instruct loadV8(iRegL dst, memory mem) %{
match(Set dst (LoadVector mem));
predicate(n->as_LoadVector()->memory_size() == 8);
@ -10675,6 +11199,21 @@ instruct loadV8(iRegL dst, memory mem) %{
ins_pipe(pipe_class_dummy);
%}
// Load Aligned Packed Byte
instruct loadV16(vecX dst, memoryRX mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(MEMORY_REF_COST);
format %{ "VL $dst, $mem \t// load 16-byte Vector" %}
size(6);
ins_encode %{
__ z_vl($dst$$VectorRegister,
Address(reg_to_register_object($mem$$base), $mem$$index$$Register, $mem$$disp));
%}
ins_pipe(pipe_class_dummy);
%}
// Reinterpret: only one vector size used
instruct reinterpret(iRegL dst) %{
match(Set dst (VectorReinterpret dst));
@ -10684,6 +11223,303 @@ instruct reinterpret(iRegL dst) %{
ins_pipe(pipe_class_dummy);
%}
instruct reinterpretX(vecX dst) %{
match(Set dst (VectorReinterpret dst));
ins_cost(0);
format %{ "reinterpret $dst" %}
ins_encode( /*empty*/ );
ins_pipe(pipe_class_dummy);
%}
//----------Vector Arithmetic Instructions--------------------------------------
// Vector Addition Instructions
instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (AddVB src1 src2));
predicate(n->as_Vector()->length() == 16);
format %{ "VAB $dst,$src1,$src2\t// add packed16B" %}
size(6);
ins_encode %{
__ z_vab($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (AddVS src1 src2));
predicate(n->as_Vector()->length() == 8);
format %{ "VAH $dst,$src1,$src2\t// add packed8S" %}
size(6);
ins_encode %{
__ z_vah($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (AddVI src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VAF $dst,$src1,$src2\t// add packed4I" %}
size(6);
ins_encode %{
__ z_vaf($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (AddVL src1 src2));
predicate(n->as_Vector()->length() == 2);
format %{ "VAG $dst,$src1,$src2\t// add packed2L" %}
size(6);
ins_encode %{
__ z_vag($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vmul16B_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (MulVB src1 src2));
predicate(n->as_Vector()->length() == 16);
format %{ "VMLB $dst,$src1,$src2\t// mul packed16B" %}
size(6);
ins_encode %{
__ z_vmlb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (MulVS src1 src2));
predicate(n->as_Vector()->length() == 8);
format %{ "VMLHW $dst,$src1,$src2\t// mul packed8S" %}
size(6);
ins_encode %{
__ z_vmlhw($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (MulVI src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VMLF $dst,$src1,$src2\t// mul packed4I" %}
size(6);
ins_encode %{
__ z_vmlf($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (SubVB src1 src2));
predicate(n->as_Vector()->length() == 16);
format %{ "VSB $dst,$src1,$src2\t// sub packed16B" %}
size(6);
ins_encode %{
__ z_vsb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (SubVS src1 src2));
predicate(n->as_Vector()->length() == 8);
format %{ "VSH $dst,$src1,$src2\t// sub packed8S" %}
size(6);
ins_encode %{
__ z_vsh($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (SubVI src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VSF $dst,$src1,$src2\t// sub packed4I" %}
size(6);
ins_encode %{
__ z_vsf($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (SubVL src1 src2));
predicate(n->as_Vector()->length() == 2);
format %{ "VSG $dst,$src1,$src2\t// sub packed2L" %}
size(6);
ins_encode %{
__ z_vsg($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (AddVF src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VFASB $dst,$src1,$src2\t// add packed4F" %}
size(6);
ins_encode %{
__ z_vfasb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (AddVD src1 src2));
predicate(n->as_Vector()->length() == 2);
format %{ "VFADB $dst,$src1,$src2\t// add packed2D" %}
size(6);
ins_encode %{
__ z_vfadb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (SubVF src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VFSSB $dst,$src1,$src2\t// sub packed4F" %}
size(6);
ins_encode %{
__ z_vfssb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (SubVD src1 src2));
predicate(n->as_Vector()->length() == 2);
format %{ "VFSDB $dst,$src1,$src2\t// sub packed2D" %}
size(6);
ins_encode %{
__ z_vfsdb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (MulVF src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VFMSB $dst,$src1,$src2\t// mul packed4F" %}
size(6);
ins_encode %{
__ z_vfmsb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (MulVD src1 src2));
predicate(n->as_Vector()->length() == 2);
format %{ "VFMDB $dst,$src1,$src2\t// mul packed2D" %}
size(6);
ins_encode %{
__ z_vfmdb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (DivVF src1 src2));
predicate(n->as_Vector()->length() == 4);
format %{ "VFDSB $dst,$src1,$src2\t// div packed4F" %}
size(6);
ins_encode %{
__ z_vfdsb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
match(Set dst (DivVD src1 src2));
predicate(n->as_Vector()->length() == 2);
format %{ "VFDDB $dst,$src1,$src2\t// div packed2D" %}
size(6);
ins_encode %{
__ z_vfddb($dst$$VectorRegister, $src1$$VectorRegister, $src2$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
// Vector Square Root Instructions
instruct vsqrt4F_reg(vecX dst, vecX src) %{
match(Set dst (SqrtVF src));
predicate(n->as_Vector()->length() == 4);
format %{ "VFSQSB $dst,$src\t// sqrt packed4F" %}
size(6);
ins_encode %{
__ z_vfsqsb($dst$$VectorRegister, $src$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
instruct vsqrt2D_reg(vecX dst, vecX src) %{
match(Set dst (SqrtVD src));
predicate(n->as_Vector()->length() == 2);
format %{ "VFSQDB $dst,$src\t// sqrt packed2D" %}
size(6);
ins_encode %{
__ z_vfsqdb($dst$$VectorRegister, $src$$VectorRegister);
%}
ins_pipe(pipe_class_dummy);
%}
// Vector Population Count Instructions
instruct vpopcnt_reg(vecX dst, vecX src) %{
match(Set dst (PopCountVI src));
format %{ "VPOPCT $dst,$src\t// pop count packed" %}
size(6);
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
switch (bt) {
case T_BYTE:
__ z_vpopctb($dst$$VectorRegister, $src$$VectorRegister);
break;
case T_SHORT:
__ z_vpopcth($dst$$VectorRegister, $src$$VectorRegister);
break;
case T_INT:
__ z_vpopctf($dst$$VectorRegister, $src$$VectorRegister);
break;
case T_LONG:
__ z_vpopctg($dst$$VectorRegister, $src$$VectorRegister);
break;
default:
ShouldNotReachHere();
}
%}
ins_pipe(pipe_class_dummy);
%}
// Vector Round Instructions
instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
match(Set dst (RoundDoubleModeV src rmode));
predicate(n->as_Vector()->length() == 2);
format %{ "RoundDoubleModeV $src,$rmode" %}
size(6);
ins_encode %{
switch ($rmode$$constant) {
case RoundDoubleModeNode::rmode_rint:
__ z_vflrd($dst$$VectorRegister, $src$$VectorRegister, 0);
break;
case RoundDoubleModeNode::rmode_floor:
__ z_vflrd($dst$$VectorRegister, $src$$VectorRegister, 7);
break;
case RoundDoubleModeNode::rmode_ceil:
__ z_vflrd($dst$$VectorRegister, $src$$VectorRegister, 6);
break;
default:
ShouldNotReachHere();
}
%}
ins_pipe(pipe_class_dummy);
%}
//----------POPULATION COUNT RULES--------------------------------------------
// Byte reverse

View File

@ -81,6 +81,9 @@
#define RegisterSaver_ExcludedFloatReg(regname) \
{ RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
#define RegisterSaver_LiveVReg(regname) \
{ RegisterSaver::v_reg, regname->encoding(), regname->as_VMReg() }
static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
// Live registers which get spilled to the stack. Register positions
// in this array correspond directly to the stack layout.
@ -258,6 +261,26 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
// RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
};
static const RegisterSaver::LiveRegType RegisterSaver_LiveVRegs[] = {
// live vector registers (optional, only these are used by C2):
RegisterSaver_LiveVReg( Z_V16 ),
RegisterSaver_LiveVReg( Z_V17 ),
RegisterSaver_LiveVReg( Z_V18 ),
RegisterSaver_LiveVReg( Z_V19 ),
RegisterSaver_LiveVReg( Z_V20 ),
RegisterSaver_LiveVReg( Z_V21 ),
RegisterSaver_LiveVReg( Z_V22 ),
RegisterSaver_LiveVReg( Z_V23 ),
RegisterSaver_LiveVReg( Z_V24 ),
RegisterSaver_LiveVReg( Z_V25 ),
RegisterSaver_LiveVReg( Z_V26 ),
RegisterSaver_LiveVReg( Z_V27 ),
RegisterSaver_LiveVReg( Z_V28 ),
RegisterSaver_LiveVReg( Z_V29 ),
RegisterSaver_LiveVReg( Z_V30 ),
RegisterSaver_LiveVReg( Z_V31 )
};
int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
int reg_space = -1;
switch (reg_set) {
@ -271,23 +294,28 @@ int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
}
int RegisterSaver::calculate_vregstosave_num() {
return (sizeof(RegisterSaver_LiveVRegs) / sizeof(RegisterSaver::LiveRegType));
}
int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
return live_reg_save_size(reg_set) + frame::z_abi_160_size;
int RegisterSaver::live_reg_frame_size(RegisterSet reg_set, bool save_vectors) {
const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
return live_reg_save_size(reg_set) + vregstosave_num * v_reg_size + frame::z_abi_160_size;
}
// return_pc: Specify the register that should be stored as the return pc in the current frame.
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc, bool save_vectors) {
// Record volatile registers as callee-save values in an OopMap so
// their save locations will be propagated to the caller frame's
// RegisterMap during StackFrameStream construction (needed for
// deoptimization; see compiledVFrame::create_stack_value).
// Calculate frame size.
const int frame_size_in_bytes = live_reg_frame_size(reg_set);
const int frame_size_in_bytes = live_reg_frame_size(reg_set, save_vectors);
const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
const int register_save_offset = frame_size_in_bytes - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);
// OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
OopMap* map = new OopMap(frame_size_in_slots, 0);
@ -382,6 +410,23 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg
assert(first != noreg, "Should spill at least one int reg.");
__ z_stmg(first, last, first_offset, Z_SP);
for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
int reg_num = RegisterSaver_LiveVRegs[i].reg_num;
__ z_vst(as_VectorRegister(reg_num), Address(Z_SP, offset));
map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
RegisterSaver_LiveVRegs[i].vmreg);
map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2),
RegisterSaver_LiveVRegs[i].vmreg->next());
map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2),
RegisterSaver_LiveVRegs[i].vmreg->next(2));
map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2),
RegisterSaver_LiveVRegs[i].vmreg->next(3));
}
assert(offset == frame_size_in_bytes, "consistency check");
// And we're done.
return map;
}
@ -433,14 +478,18 @@ OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_se
}
offset += reg_size;
}
#ifdef ASSERT
assert(offset == frame_size_in_bytes, "consistency check");
#endif
return map;
}
// Pop the current frame and restore all the registers that we saved.
void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors) {
int offset;
const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
const int register_save_offset = live_reg_frame_size(reg_set, save_vectors) - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);
Register first = noreg;
Register last = noreg;
@ -517,6 +566,12 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg
assert(first != noreg, "Should spill at least one int reg.");
__ z_lmg(first, last, first_offset, Z_SP);
for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
int reg_num = RegisterSaver_LiveVRegs[i].reg_num;
__ z_vl(as_VectorRegister(reg_num), Address(Z_SP, offset));
}
// Pop the frame.
__ pop_frame();
@ -527,14 +582,12 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg
// Pop the current frame and restore the registers that might be holding a result.
void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
int i;
int offset;
const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
sizeof(RegisterSaver::LiveRegType);
const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
// Restore all result registers (ints and floats).
offset = register_save_offset;
int offset = register_save_offset;
for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
int reg_num = RegisterSaver_LiveRegs[i].reg_num;
int reg_type = RegisterSaver_LiveRegs[i].reg_type;
@ -557,6 +610,7 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
ShouldNotReachHere();
}
}
assert(offset == live_reg_frame_size(all_registers), "consistency check");
}
// ---------------------------------------------------------------------------
@ -980,8 +1034,8 @@ static void gen_special_dispatch(MacroAssembler *masm,
// Is the size of a vector size (in bytes) bigger than a size saved by default?
// 8 bytes registers are saved by default on z/Architecture.
bool SharedRuntime::is_wide_vector(int size) {
// Note, MaxVectorSize == 8 on this platform.
assert(size <= 8, "%d bytes vectors are not supported", size);
// Note, MaxVectorSize == 8/16 on this platform.
assert(size <= (SuperwordUseVX ? 16 : 8), "%d bytes vectors are not supported", size);
return size > 8;
}
@ -2865,8 +2919,9 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal
__ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
}
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
// Save registers, fpu state, and flags
map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, save_vectors);
if (!cause_return) {
// Keep a copy of the return pc to detect if it gets modified.
@ -2898,7 +2953,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal
// Pending exception case, used (sporadically) by
// api/java_lang/Thread.State/index#ThreadState et al.
RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);
// Jump to forward_exception_entry, with the issuing PC in Z_R14
// so it looks like the original nmethod called forward_exception_entry.
@ -2911,7 +2966,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal
if (!cause_return) {
Label no_adjust;
// If our stashed return pc was modified by the runtime we avoid touching it
const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors);
__ z_cg(Z_R6, offset_of_return_pc, Z_SP);
__ z_brne(no_adjust);
@ -2924,7 +2979,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal
}
// Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);
__ z_br(Z_R14);
@ -2932,7 +2987,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address cal
masm->flush();
// Fill-out other meta info
return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors)/wordSize);
}

View File

@ -97,7 +97,23 @@ void VM_Version::initialize() {
intx cache_line_size = Dcache_lineSize(0);
#ifdef COMPILER2
MaxVectorSize = 8;
int model_ix = get_model_index();
if ( model_ix >= 7 ) {
if (FLAG_IS_DEFAULT(SuperwordUseVX)) {
FLAG_SET_ERGO(SuperwordUseVX, true);
}
if (model_ix > 7 && FLAG_IS_DEFAULT(UseSFPV) && SuperwordUseVX) {
FLAG_SET_ERGO(UseSFPV, true);
} else if (model_ix == 7 && UseSFPV) {
warning("UseSFPV specified, but needs at least Z14.");
FLAG_SET_DEFAULT(UseSFPV, false);
}
} else if (SuperwordUseVX) {
warning("SuperwordUseVX specified, but needs at least Z13.");
FLAG_SET_DEFAULT(SuperwordUseVX, false);
}
MaxVectorSize = SuperwordUseVX ? 16 : 8;
#endif
if (has_PrefetchRaw()) {

View File

@ -43,6 +43,16 @@ void VMRegImpl::set_regName() {
regName[i++] = freg->name();
freg = freg->successor();
}
VectorRegister vreg = ::as_VectorRegister(0);
for (; i < ConcreteRegisterImpl::max_vr;) {
regName[i++] = vreg->name();
regName[i++] = vreg->name();
regName[i++] = vreg->name();
regName[i++] = vreg->name();
vreg = vreg->successor();
}
for (; i < ConcreteRegisterImpl::number_of_registers; i ++) {
regName[i] = "NON-GPR-XMM";
}

View File

@ -35,14 +35,26 @@ inline bool is_FloatRegister() {
value() < ConcreteRegisterImpl::max_fpr;
}
inline bool is_VectorRegister() {
return value() >= ConcreteRegisterImpl::max_fpr &&
value() < ConcreteRegisterImpl::max_vr;
}
inline Register as_Register() {
assert(is_Register() && is_even(value()), "even-aligned GPR name");
return ::as_Register(value() >> 1);
return ::as_Register(value() / Register::max_slots_per_register);
}
inline FloatRegister as_FloatRegister() {
assert(is_FloatRegister() && is_even(value()), "must be");
return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
FloatRegister::max_slots_per_register);
}
inline VectorRegister as_VectorRegister() {
assert(is_VectorRegister(), "must be");
return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
VectorRegister::max_slots_per_register);
}
inline bool is_concrete() {

View File

@ -27,15 +27,21 @@
#define CPU_S390_VMREG_S390_INLINE_HPP
inline VMReg Register::as_VMReg() const {
return VMRegImpl::as_VMReg(encoding() << 1);
return VMRegImpl::as_VMReg(encoding() * Register::max_slots_per_register);
}
inline VMReg FloatRegister::as_VMReg() const {
return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
return VMRegImpl::as_VMReg((encoding() * FloatRegister::max_slots_per_register) +
ConcreteRegisterImpl::max_gpr);
}
inline VMReg VectorRegister::as_VMReg() const {
return VMRegImpl::as_VMReg((encoding() * VectorRegister::max_slots_per_register) +
ConcreteRegisterImpl::max_fpr);
}
inline VMReg ConditionRegister::as_VMReg() const {
return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr);
return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_vr);
}
#endif // CPU_S390_VMREG_S390_INLINE_HPP

View File

@ -2358,6 +2358,9 @@ private:
if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister";
if (strcmp(rep_var,"$VectorSRegister") == 0) return "as_VectorSRegister";
#endif
#if defined(S390)
if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister";
#endif
#if defined(AARCH64)
if (strcmp(rep_var,"$PRegister") == 0) return "as_PRegister";
#endif

View File

@ -134,6 +134,14 @@ public:
return ::as_VectorSRegister(reg(ra_, node, idx));
}
#endif
#if defined(S390)
VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node) const {
return ::as_VectorRegister(reg(ra_, node));
}
VectorRegister as_VectorRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
return ::as_VectorRegister(reg(ra_, node, idx));
}
#endif
#if defined(AARCH64)
PRegister as_PRegister(PhaseRegAlloc* ra_, const Node* node) const {
return ::as_PRegister(reg(ra_, node));

View File

@ -77,7 +77,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#else // all other