diff --git a/src/hotspot/cpu/ppc/assembler_ppc.hpp b/src/hotspot/cpu/ppc/assembler_ppc.hpp index b38c4ac5bae..0b17854a58b 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -535,8 +535,12 @@ class Assembler : public AbstractAssembler { // Vector-Scalar (VSX) instruction support. LXV_OPCODE = (61u << OPCODE_SHIFT | 1u ), - LXVL_OPCODE = (31u << OPCODE_SHIFT | 269u << 1), STXV_OPCODE = (61u << OPCODE_SHIFT | 5u ), + LXVX_OPCODE = (31u << OPCODE_SHIFT | 4u << 7 | 12u << 1), + STXVX_OPCODE = (31u << OPCODE_SHIFT | 396u << 1), + LXVP_OPCODE = ( 6u << OPCODE_SHIFT ), + STXVP_OPCODE = ( 6u << OPCODE_SHIFT | 1u ), + LXVL_OPCODE = (31u << OPCODE_SHIFT | 269u << 1), STXVL_OPCODE = (31u << OPCODE_SHIFT | 397u << 1), LXVD2X_OPCODE = (31u << OPCODE_SHIFT | 844u << 1), STXVD2X_OPCODE = (31u << OPCODE_SHIFT | 972u << 1), @@ -1243,6 +1247,11 @@ class Assembler : public AbstractAssembler { static int vsdm( int x) { return opp_u_field(x, 23, 22); } static int vsrs_dq( int x) { return opp_u_field(x & 0x1F, 10, 6) | opp_u_field((x & 0x20) >> 5, 28, 28); } static int vsrt_dq( int x) { return vsrs_dq(x); } + static int vsrtp( int x) { + assert((x & 1) == 0, "must be even"); + return opp_u_field((x & 0x1F) >> 1, 9, 6) | opp_u_field((x & 0x20) >> 5, 10, 10); + } + static int vsrsp( int x) { return vsrtp(x); } static int vsra( VectorSRegister r) { return vsra(r->encoding());} static int vsrb( VectorSRegister r) { return vsrb(r->encoding());} @@ -1251,6 +1260,8 @@ class Assembler : public AbstractAssembler { static int vsrt( VectorSRegister r) { return vsrt(r->encoding());} static int vsrs_dq(VectorSRegister r) { return vsrs_dq(r->encoding());} static int vsrt_dq(VectorSRegister r) { return vsrt_dq(r->encoding());} + static int vsrtp( VectorSRegister r) { return vsrtp(r->encoding());} + static int vsrsp( VectorSRegister r) { return vsrsp(r->encoding());} static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions @@ -2356,14 +2367,24 @@ class Assembler : public AbstractAssembler { inline void mfvscr( VectorRegister d); // Vector-Scalar (VSX) instructions. - inline void lxv( VectorSRegister d, int si16, Register a); - inline void stxv( VectorSRegister d, int si16, Register a); - inline void lxvl( VectorSRegister d, Register a, Register b); - inline void stxvl( VectorSRegister d, Register a, Register b); + // Power8 inline void lxvd2x( VectorSRegister d, Register a); inline void lxvd2x( VectorSRegister d, Register a, Register b); inline void stxvd2x( VectorSRegister d, Register a); inline void stxvd2x( VectorSRegister d, Register a, Register b); + + // Power9 + inline void lxv( VectorSRegister d, int si16, Register a); + inline void stxv( VectorSRegister d, int si16, Register a); + inline void lxvx( VectorSRegister d, Register a, Register b); + inline void stxvx( VectorSRegister d, Register a, Register b); + inline void lxvl( VectorSRegister d, Register a, Register b); + inline void stxvl( VectorSRegister d, Register a, Register b); + + // Power10 + inline void lxvp( VectorSRegister d, int si16, Register a); + inline void stxvp( VectorSRegister d, int si16, Register a); + inline void mtvrwz( VectorRegister d, Register a); inline void mfvrwz( Register a, VectorRegister d); inline void mtvrd( VectorRegister d, Register a); @@ -2480,6 +2501,9 @@ class Assembler : public AbstractAssembler { inline void std( Register d, int si16); inline void stdbrx( Register d, Register s2); + inline void lxvx( VectorSRegister d, Register b); + inline void stxvx(VectorSRegister d, Register b); + // PPC 2, section 3.2.1 Instruction Cache Instructions inline void icbi( Register s2); // PPC 2, section 3.2.2 Data Cache Instructions diff --git a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp index 4fb8c5c4198..a9ac76a76db 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp @@ -862,8 +862,12 @@ inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } // Vector-Scalar (VSX) instructions. -inline void Assembler::lxv( VectorSRegister d, int ui16, Register a) { assert(is_aligned(ui16, 16), "displacement must be a multiple of 16"); emit_int32( LXV_OPCODE | vsrt_dq(d) | ra0mem(a) | uimm(ui16, 16)); } -inline void Assembler::stxv( VectorSRegister d, int ui16, Register a) { assert(is_aligned(ui16, 16), "displacement must be a multiple of 16"); emit_int32( STXV_OPCODE | vsrs_dq(d) | ra0mem(a) | uimm(ui16, 16)); } +inline void Assembler::lxv( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( LXV_OPCODE | vsrt_dq(d) | ra0mem(a) | simm(si16, 16)); } +inline void Assembler::stxv( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( STXV_OPCODE | vsrs_dq(d) | ra0mem(a) | simm(si16, 16)); } +inline void Assembler::lxvx( VectorSRegister d, Register a, Register b) { emit_int32( LXVX_OPCODE | vsrt(d) | ra0mem(a) | rb(b)); } +inline void Assembler::stxvx( VectorSRegister d, Register a, Register b) { emit_int32( STXVX_OPCODE | vsrs(d) | ra0mem(a) | rb(b)); } +inline void Assembler::lxvp( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( LXVP_OPCODE | vsrtp(d) | ra0mem(a) | simm(si16, 16)); } +inline void Assembler::stxvp( VectorSRegister d, int si16, Register a) { assert(is_aligned(si16, 16), "displacement must be a multiple of 16"); emit_int32( STXVP_OPCODE | vsrsp(d) | ra0mem(a) | simm(si16, 16)); } inline void Assembler::lxvl( VectorSRegister d, Register s1, Register b) { emit_int32( LXVL_OPCODE | vsrt(d) | ra0mem(s1) | rb(b)); } inline void Assembler::stxvl( VectorSRegister d, Register s1, Register b) { emit_int32( STXVL_OPCODE | vsrt(d) | ra0mem(s1) | rb(b)); } inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); } @@ -1140,6 +1144,10 @@ inline void Assembler::std( Register d, int si16 ) { emit_int32( STD_OPCODE inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));} inline void Assembler::stdbrx(Register d, Register s2){ emit_int32(STDBRX_OPCODE| rs(d) | rb(s2));} +inline void Assembler::lxvx( VectorSRegister d, Register b) { emit_int32( LXVX_OPCODE | vsrt(d) | rb(b)); } +inline void Assembler::stxvx(VectorSRegister d, Register b) { emit_int32( STXVX_OPCODE | vsrs(d) | rb(b)); } + + // ra0 version inline void Assembler::icbi( Register s2) { emit_int32( ICBI_OPCODE | rb(s2) ); } //inline void Assembler::dcba( Register s2) { emit_int32( DCBA_OPCODE | rb(s2) ); } diff --git a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp index 79b129c08ae..f1afbdd3a1d 100644 --- a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp @@ -162,8 +162,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { Register r = as_Register(i); if (FrameMap::reg_needs_save(r)) { int sp_offset = cpu_reg_save_offsets[i]; - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset>>2), r->as_VMReg()); - oop_map->set_callee_saved(VMRegImpl::stack2reg((sp_offset>>2) + 1), r->as_VMReg()->next()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset >> 2), r->as_VMReg()); } } @@ -171,8 +170,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { for (i = 0; i < FrameMap::nof_fpu_regs; i++) { FloatRegister r = as_FloatRegister(i); int sp_offset = fpu_reg_save_offsets[i]; - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset>>2), r->as_VMReg()); - oop_map->set_callee_saved(VMRegImpl::stack2reg((sp_offset>>2) + 1), r->as_VMReg()->next()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset >> 2), r->as_VMReg()); } } diff --git a/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp b/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp index 4d98b763078..8f5f0ba4955 100644 --- a/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp +++ b/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, SAP SE. All rights reserved. + * Copyright (c) 2020, 2025, SAP SE. All rights reserved. * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -35,16 +35,6 @@ #define __ masm-> -bool ABIDescriptor::is_volatile_reg(Register reg) const { - return _integer_argument_registers.contains(reg) - || _integer_additional_volatile_registers.contains(reg); -} - -bool ABIDescriptor::is_volatile_reg(FloatRegister reg) const { - return _float_argument_registers.contains(reg) - || _float_additional_volatile_registers.contains(reg); -} - bool ForeignGlobals::is_foreign_linker_supported() { return true; } @@ -62,10 +52,6 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::FLOAT, abi._float_return_registers, as_FloatRegister); - objArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); - parse_register_array(volatileStorage, StorageType::INTEGER, abi._integer_additional_volatile_registers, as_Register); - parse_register_array(volatileStorage, StorageType::FLOAT, abi._float_additional_volatile_registers, as_FloatRegister); - abi._stack_alignment_bytes = jdk_internal_foreign_abi_ABIDescriptor::stackAlignment(abi_oop); abi._shadow_space_bytes = jdk_internal_foreign_abi_ABIDescriptor::shadowSpace(abi_oop); diff --git a/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp b/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp index baccdf2c9bb..b25ee28f192 100644 --- a/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp +++ b/src/hotspot/cpu/ppc/foreignGlobals_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2023 SAP SE. All rights reserved. + * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,9 +34,6 @@ struct ABIDescriptor { GrowableArray _float_argument_registers; GrowableArray _float_return_registers; - GrowableArray _integer_additional_volatile_registers; - GrowableArray _float_additional_volatile_registers; - int32_t _stack_alignment_bytes; int32_t _shadow_space_bytes; diff --git a/src/hotspot/cpu/ppc/frame_ppc.hpp b/src/hotspot/cpu/ppc/frame_ppc.hpp index 560615089fe..2fff3fb2e48 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.hpp +++ b/src/hotspot/cpu/ppc/frame_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -134,56 +134,6 @@ #define _native_abi_reg_args_spill(_component) \ (offset_of(frame::native_abi_reg_args_spill, _component)) - // non-volatile GPRs: - - struct spill_nonvolatiles { - uint64_t r14; - uint64_t r15; //_16 - uint64_t r16; - uint64_t r17; //_16 - uint64_t r18; - uint64_t r19; //_16 - uint64_t r20; - uint64_t r21; //_16 - uint64_t r22; - uint64_t r23; //_16 - uint64_t r24; - uint64_t r25; //_16 - uint64_t r26; - uint64_t r27; //_16 - uint64_t r28; - uint64_t r29; //_16 - uint64_t r30; - uint64_t r31; //_16 - - double f14; - double f15; - double f16; - double f17; - double f18; - double f19; - double f20; - double f21; - double f22; - double f23; - double f24; - double f25; - double f26; - double f27; - double f28; - double f29; - double f30; - double f31; - - // aligned to frame::alignment_in_bytes (16) - }; - - enum { - spill_nonvolatiles_size = sizeof(spill_nonvolatiles) - }; - - #define _spill_nonvolatiles_neg(_component) \ - (int)(-frame::spill_nonvolatiles_size + offset_of(frame::spill_nonvolatiles, _component)) // Frame layout for the Java template interpreter on PPC64. // @@ -230,6 +180,7 @@ // [callee's Java result] // [callee's locals w/o arguments] // [outgoing arguments] + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // ABI for every Java frame, compiled and interpreted @@ -292,7 +243,6 @@ uint64_t result_type; uint64_t arguments_tos_address; //_16 // aligned to frame::alignment_in_bytes (16) - uint64_t r[spill_nonvolatiles_size/sizeof(uint64_t)]; }; enum { diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp index acf916c8c72..32a7011ac26 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp @@ -337,15 +337,24 @@ int SaveLiveRegisters::iterate_over_register_mask(IterationAction action, int of assert(SuperwordUseVSX, "or should not reach here"); VectorSRegister vs_reg = vm_reg->as_VectorSRegister(); if (vs_reg->encoding() >= VSR32->encoding() && vs_reg->encoding() <= VSR51->encoding()) { - reg_save_index += 2; + reg_save_index += (2 + (reg_save_index & 1)); // 2 slots + alignment if needed Register spill_addr = R0; + int spill_offset = offset - reg_save_index * BytesPerWord; if (action == ACTION_SAVE) { - _masm->addi(spill_addr, R1_SP, offset - reg_save_index * BytesPerWord); - _masm->stxvd2x(vs_reg, spill_addr); + if (PowerArchitecturePPC64 >= 9) { + _masm->stxv(vs_reg, spill_offset, R1_SP); + } else { + _masm->addi(spill_addr, R1_SP, spill_offset); + _masm->stxvd2x(vs_reg, spill_addr); + } } else if (action == ACTION_RESTORE) { - _masm->addi(spill_addr, R1_SP, offset - reg_save_index * BytesPerWord); - _masm->lxvd2x(vs_reg, spill_addr); + if (PowerArchitecturePPC64 >= 9) { + _masm->lxv(vs_reg, spill_offset, R1_SP); + } else { + _masm->addi(spill_addr, R1_SP, spill_offset); + _masm->lxvd2x(vs_reg, spill_addr); + } } else { assert(action == ACTION_COUNT_ONLY, "Sanity"); } diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index ca0a1344d14..19f64a79300 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -774,93 +774,82 @@ void MacroAssembler::clobber_carg_stack_slots(Register tmp) { } } -// Uses ordering which corresponds to ABI: -// _savegpr0_14: std r14,-144(r1) -// _savegpr0_15: std r15,-136(r1) -// _savegpr0_16: std r16,-128(r1) -void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) { - std(R14, offset, dst); offset += 8; - std(R15, offset, dst); offset += 8; - std(R16, offset, dst); offset += 8; - std(R17, offset, dst); offset += 8; - std(R18, offset, dst); offset += 8; - std(R19, offset, dst); offset += 8; - std(R20, offset, dst); offset += 8; - std(R21, offset, dst); offset += 8; - std(R22, offset, dst); offset += 8; - std(R23, offset, dst); offset += 8; - std(R24, offset, dst); offset += 8; - std(R25, offset, dst); offset += 8; - std(R26, offset, dst); offset += 8; - std(R27, offset, dst); offset += 8; - std(R28, offset, dst); offset += 8; - std(R29, offset, dst); offset += 8; - std(R30, offset, dst); offset += 8; - std(R31, offset, dst); offset += 8; +void MacroAssembler::save_nonvolatile_registers(Register dst, int offset, bool include_fp_regs, bool include_vector_regs) { + BLOCK_COMMENT("save_nonvolatile_registers {"); - stfd(F14, offset, dst); offset += 8; - stfd(F15, offset, dst); offset += 8; - stfd(F16, offset, dst); offset += 8; - stfd(F17, offset, dst); offset += 8; - stfd(F18, offset, dst); offset += 8; - stfd(F19, offset, dst); offset += 8; - stfd(F20, offset, dst); offset += 8; - stfd(F21, offset, dst); offset += 8; - stfd(F22, offset, dst); offset += 8; - stfd(F23, offset, dst); offset += 8; - stfd(F24, offset, dst); offset += 8; - stfd(F25, offset, dst); offset += 8; - stfd(F26, offset, dst); offset += 8; - stfd(F27, offset, dst); offset += 8; - stfd(F28, offset, dst); offset += 8; - stfd(F29, offset, dst); offset += 8; - stfd(F30, offset, dst); offset += 8; - stfd(F31, offset, dst); + for (int i = 14; i < 32; i++) { + std(as_Register(i), offset, dst); + offset += 8; + } + + if (include_fp_regs) { + for (int i = 14; i < 32; i++) { + stfd(as_FloatRegister(i), offset, dst); + offset += 8; + } + } + + if (include_vector_regs) { + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + for (int i = 20; i < 32; i += 2) { + stxvp(as_VectorRegister(i)->to_vsr(), offset, dst); + offset += 32; + } + } else { + for (int i = 20; i < 32; i++) { + if (PowerArchitecturePPC64 >= 9) { + stxv(as_VectorRegister(i)->to_vsr(), offset, dst); + } else { + Register spill_addr = R0; + addi(spill_addr, dst, offset); + stxvd2x(as_VectorRegister(i)->to_vsr(), spill_addr); + } + offset += 16; + } + } + } + + BLOCK_COMMENT("} save_nonvolatile_registers "); } -// Uses ordering which corresponds to ABI: -// _restgpr0_14: ld r14,-144(r1) -// _restgpr0_15: ld r15,-136(r1) -// _restgpr0_16: ld r16,-128(r1) -void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) { - ld(R14, offset, src); offset += 8; - ld(R15, offset, src); offset += 8; - ld(R16, offset, src); offset += 8; - ld(R17, offset, src); offset += 8; - ld(R18, offset, src); offset += 8; - ld(R19, offset, src); offset += 8; - ld(R20, offset, src); offset += 8; - ld(R21, offset, src); offset += 8; - ld(R22, offset, src); offset += 8; - ld(R23, offset, src); offset += 8; - ld(R24, offset, src); offset += 8; - ld(R25, offset, src); offset += 8; - ld(R26, offset, src); offset += 8; - ld(R27, offset, src); offset += 8; - ld(R28, offset, src); offset += 8; - ld(R29, offset, src); offset += 8; - ld(R30, offset, src); offset += 8; - ld(R31, offset, src); offset += 8; +void MacroAssembler::restore_nonvolatile_registers(Register src, int offset, bool include_fp_regs, bool include_vector_regs) { + BLOCK_COMMENT("restore_nonvolatile_registers {"); - // FP registers - lfd(F14, offset, src); offset += 8; - lfd(F15, offset, src); offset += 8; - lfd(F16, offset, src); offset += 8; - lfd(F17, offset, src); offset += 8; - lfd(F18, offset, src); offset += 8; - lfd(F19, offset, src); offset += 8; - lfd(F20, offset, src); offset += 8; - lfd(F21, offset, src); offset += 8; - lfd(F22, offset, src); offset += 8; - lfd(F23, offset, src); offset += 8; - lfd(F24, offset, src); offset += 8; - lfd(F25, offset, src); offset += 8; - lfd(F26, offset, src); offset += 8; - lfd(F27, offset, src); offset += 8; - lfd(F28, offset, src); offset += 8; - lfd(F29, offset, src); offset += 8; - lfd(F30, offset, src); offset += 8; - lfd(F31, offset, src); + for (int i = 14; i < 32; i++) { + ld(as_Register(i), offset, src); + offset += 8; + } + + if (include_fp_regs) { + for (int i = 14; i < 32; i++) { + lfd(as_FloatRegister(i), offset, src); + offset += 8; + } + } + + if (include_vector_regs) { + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + for (int i = 20; i < 32; i += 2) { + lxvp(as_VectorRegister(i)->to_vsr(), offset, src); + offset += 32; + } + } else { + for (int i = 20; i < 32; i++) { + if (PowerArchitecturePPC64 >= 9) { + lxv(as_VectorRegister(i)->to_vsr(), offset, src); + } else { + Register spill_addr = R0; + addi(spill_addr, src, offset); + lxvd2x(as_VectorRegister(i)->to_vsr(), spill_addr); + } + offset += 16; + } + } + } + + BLOCK_COMMENT("} restore_nonvolatile_registers"); } // For verify_oops. @@ -1029,13 +1018,6 @@ void MacroAssembler::push_frame_reg_args(unsigned int bytes, Register tmp) { push_frame(bytes + frame::native_abi_reg_args_size, tmp); } -// Setup up a new C frame with a spill area for non-volatile GPRs and -// additional space for local variables. -void MacroAssembler::push_frame_reg_args_nonvolatiles(unsigned int bytes, - Register tmp) { - push_frame(bytes + frame::native_abi_reg_args_size + frame::spill_nonvolatiles_size, tmp); -} - // Pop current C frame. void MacroAssembler::pop_frame() { ld(R1_SP, _abi0(callers_sp), R1_SP); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp index 7e2925ace26..3528dbb5591 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp @@ -299,8 +299,14 @@ class MacroAssembler: public Assembler { void clobber_nonvolatile_registers() NOT_DEBUG_RETURN; void clobber_carg_stack_slots(Register tmp); - void save_nonvolatile_gprs( Register dst_base, int offset); - void restore_nonvolatile_gprs(Register src_base, int offset); + int save_nonvolatile_registers_size(bool include_fp_regs, bool include_vector_regs) { + int size = (32 - 14) * 8; // GP regs + if (include_fp_regs) size += (32 - 14) * 8; + if (include_vector_regs) size += (32 - 20) * 16; + return size; + } + void save_nonvolatile_registers( Register dst_base, int offset, bool include_fp_regs, bool include_vector_regs); + void restore_nonvolatile_registers(Register src_base, int offset, bool include_fp_regs, bool include_vector_regs); enum { num_volatile_gp_regs = 11, @@ -334,10 +340,6 @@ class MacroAssembler: public Assembler { // Push a frame of size `bytes' plus native_abi_reg_args on top. void push_frame_reg_args(unsigned int bytes, Register tmp); - // Setup up a new C frame with a spill area for non-volatile GPRs and additional - // space for local variables - void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp); - // pop current C frame void pop_frame(); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 07d681e8982..5c6d9815245 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -258,71 +258,326 @@ register %{ // Vector-Scalar Registers // ---------------------------- // 1st 32 VSRs are aliases for the FPRs which are already defined above. - reg_def VSR0 ( SOC, SOC, Op_VecX, 0, VMRegImpl::Bad()); - reg_def VSR1 ( SOC, SOC, Op_VecX, 1, VMRegImpl::Bad()); - reg_def VSR2 ( SOC, SOC, Op_VecX, 2, VMRegImpl::Bad()); - reg_def VSR3 ( SOC, SOC, Op_VecX, 3, VMRegImpl::Bad()); - reg_def VSR4 ( SOC, SOC, Op_VecX, 4, VMRegImpl::Bad()); - reg_def VSR5 ( SOC, SOC, Op_VecX, 5, VMRegImpl::Bad()); - reg_def VSR6 ( SOC, SOC, Op_VecX, 6, VMRegImpl::Bad()); - reg_def VSR7 ( SOC, SOC, Op_VecX, 7, VMRegImpl::Bad()); - reg_def VSR8 ( SOC, SOC, Op_VecX, 8, VMRegImpl::Bad()); - reg_def VSR9 ( SOC, SOC, Op_VecX, 9, VMRegImpl::Bad()); - reg_def VSR10 ( SOC, SOC, Op_VecX, 10, VMRegImpl::Bad()); - reg_def VSR11 ( SOC, SOC, Op_VecX, 11, VMRegImpl::Bad()); - reg_def VSR12 ( SOC, SOC, Op_VecX, 12, VMRegImpl::Bad()); - reg_def VSR13 ( SOC, SOC, Op_VecX, 13, VMRegImpl::Bad()); - reg_def VSR14 ( SOC, SOE, Op_VecX, 14, VMRegImpl::Bad()); - reg_def VSR15 ( SOC, SOE, Op_VecX, 15, VMRegImpl::Bad()); - reg_def VSR16 ( SOC, SOE, Op_VecX, 16, VMRegImpl::Bad()); - reg_def VSR17 ( SOC, SOE, Op_VecX, 17, VMRegImpl::Bad()); - reg_def VSR18 ( SOC, SOE, Op_VecX, 18, VMRegImpl::Bad()); - reg_def VSR19 ( SOC, SOE, Op_VecX, 19, VMRegImpl::Bad()); - reg_def VSR20 ( SOC, SOE, Op_VecX, 20, VMRegImpl::Bad()); - reg_def VSR21 ( SOC, SOE, Op_VecX, 21, VMRegImpl::Bad()); - reg_def VSR22 ( SOC, SOE, Op_VecX, 22, VMRegImpl::Bad()); - reg_def VSR23 ( SOC, SOE, Op_VecX, 23, VMRegImpl::Bad()); - reg_def VSR24 ( SOC, SOE, Op_VecX, 24, VMRegImpl::Bad()); - reg_def VSR25 ( SOC, SOE, Op_VecX, 25, VMRegImpl::Bad()); - reg_def VSR26 ( SOC, SOE, Op_VecX, 26, VMRegImpl::Bad()); - reg_def VSR27 ( SOC, SOE, Op_VecX, 27, VMRegImpl::Bad()); - reg_def VSR28 ( SOC, SOE, Op_VecX, 28, VMRegImpl::Bad()); - reg_def VSR29 ( SOC, SOE, Op_VecX, 29, VMRegImpl::Bad()); - reg_def VSR30 ( SOC, SOE, Op_VecX, 30, VMRegImpl::Bad()); - reg_def VSR31 ( SOC, SOE, Op_VecX, 31, VMRegImpl::Bad()); + reg_def VSR0 (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def VSR0_H (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def VSR0_J (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + reg_def VSR0_K (SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); + + reg_def VSR1 (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def VSR1_H (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def VSR1_J (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + reg_def VSR1_K (SOC, SOC, Op_RegF, 1, VMRegImpl::Bad()); + + reg_def VSR2 (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def VSR2_H (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def VSR2_J (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + reg_def VSR2_K (SOC, SOC, Op_RegF, 2, VMRegImpl::Bad()); + + reg_def VSR3 (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def VSR3_H (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def VSR3_J (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + reg_def VSR3_K (SOC, SOC, Op_RegF, 3, VMRegImpl::Bad()); + + reg_def VSR4 (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def VSR4_H (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def VSR4_J (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + reg_def VSR4_K (SOC, SOC, Op_RegF, 4, VMRegImpl::Bad()); + + reg_def VSR5 (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def VSR5_H (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def VSR5_J (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + reg_def VSR5_K (SOC, SOC, Op_RegF, 5, VMRegImpl::Bad()); + + reg_def VSR6 (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def VSR6_H (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def VSR6_J (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + reg_def VSR6_K (SOC, SOC, Op_RegF, 6, VMRegImpl::Bad()); + + reg_def VSR7 (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def VSR7_H (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def VSR7_J (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + reg_def VSR7_K (SOC, SOC, Op_RegF, 7, VMRegImpl::Bad()); + + reg_def VSR8 (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def VSR8_H (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def VSR8_J (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + reg_def VSR8_K (SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); + + reg_def VSR9 (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def VSR9_H (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def VSR9_J (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + reg_def VSR9_K (SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); + + reg_def VSR10 (SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def VSR10_H(SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def VSR10_J(SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + reg_def VSR10_K(SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); + + reg_def VSR11 (SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def VSR11_H(SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def VSR11_J(SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + reg_def VSR11_K(SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); + + reg_def VSR12 (SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def VSR12_H(SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def VSR12_J(SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + reg_def VSR12_K(SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); + + reg_def VSR13 (SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def VSR13_H(SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def VSR13_J(SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + reg_def VSR13_K(SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); + + reg_def VSR14 (SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def VSR14_H(SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def VSR14_J(SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + reg_def VSR14_K(SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); + + reg_def VSR15 (SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def VSR15_H(SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def VSR15_J(SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + reg_def VSR15_K(SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); + + reg_def VSR16 (SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + reg_def VSR16_H(SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + reg_def VSR16_J(SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + reg_def VSR16_K(SOC, SOC, Op_RegF, 16, VMRegImpl::Bad()); + + reg_def VSR17 (SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + reg_def VSR17_H(SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + reg_def VSR17_J(SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + reg_def VSR17_K(SOC, SOC, Op_RegF, 17, VMRegImpl::Bad()); + + reg_def VSR18 (SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + reg_def VSR18_H(SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + reg_def VSR18_J(SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + reg_def VSR18_K(SOC, SOC, Op_RegF, 18, VMRegImpl::Bad()); + + reg_def VSR19 (SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + reg_def VSR19_H(SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + reg_def VSR19_J(SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + reg_def VSR19_K(SOC, SOC, Op_RegF, 19, VMRegImpl::Bad()); + + reg_def VSR20 (SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + reg_def VSR20_H(SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + reg_def VSR20_J(SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + reg_def VSR20_K(SOC, SOC, Op_RegF, 20, VMRegImpl::Bad()); + + reg_def VSR21 (SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + reg_def VSR21_H(SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + reg_def VSR21_J(SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + reg_def VSR21_K(SOC, SOC, Op_RegF, 21, VMRegImpl::Bad()); + + reg_def VSR22 (SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + reg_def VSR22_H(SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + reg_def VSR22_J(SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + reg_def VSR22_K(SOC, SOC, Op_RegF, 22, VMRegImpl::Bad()); + + reg_def VSR23 (SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + reg_def VSR23_H(SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + reg_def VSR23_J(SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + reg_def VSR23_K(SOC, SOC, Op_RegF, 23, VMRegImpl::Bad()); + + reg_def VSR24 (SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + reg_def VSR24_H(SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + reg_def VSR24_J(SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + reg_def VSR24_K(SOC, SOC, Op_RegF, 24, VMRegImpl::Bad()); + + reg_def VSR25 (SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + reg_def VSR25_H(SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + reg_def VSR25_J(SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + reg_def VSR25_K(SOC, SOC, Op_RegF, 25, VMRegImpl::Bad()); + + reg_def VSR26 (SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + reg_def VSR26_H(SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + reg_def VSR26_J(SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + reg_def VSR26_K(SOC, SOC, Op_RegF, 26, VMRegImpl::Bad()); + + reg_def VSR27 (SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + reg_def VSR27_H(SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + reg_def VSR27_J(SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + reg_def VSR27_K(SOC, SOC, Op_RegF, 27, VMRegImpl::Bad()); + + reg_def VSR28 (SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + reg_def VSR28_H(SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + reg_def VSR28_J(SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + reg_def VSR28_K(SOC, SOC, Op_RegF, 28, VMRegImpl::Bad()); + + reg_def VSR29 (SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + reg_def VSR29_H(SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + reg_def VSR29_J(SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + reg_def VSR29_K(SOC, SOC, Op_RegF, 29, VMRegImpl::Bad()); + + reg_def VSR30 (SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + reg_def VSR30_H(SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + reg_def VSR30_J(SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + reg_def VSR30_K(SOC, SOC, Op_RegF, 30, VMRegImpl::Bad()); + + reg_def VSR31 (SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + reg_def VSR31_H(SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + reg_def VSR31_J(SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + reg_def VSR31_K(SOC, SOC, Op_RegF, 31, VMRegImpl::Bad()); + // 2nd 32 VSRs are aliases for the VRs which are only defined here. - reg_def VSR32 ( SOC, SOC, Op_VecX, 32, VSR32->as_VMReg()); - reg_def VSR33 ( SOC, SOC, Op_VecX, 33, VSR33->as_VMReg()); - reg_def VSR34 ( SOC, SOC, Op_VecX, 34, VSR34->as_VMReg()); - reg_def VSR35 ( SOC, SOC, Op_VecX, 35, VSR35->as_VMReg()); - reg_def VSR36 ( SOC, SOC, Op_VecX, 36, VSR36->as_VMReg()); - reg_def VSR37 ( SOC, SOC, Op_VecX, 37, VSR37->as_VMReg()); - reg_def VSR38 ( SOC, SOC, Op_VecX, 38, VSR38->as_VMReg()); - reg_def VSR39 ( SOC, SOC, Op_VecX, 39, VSR39->as_VMReg()); - reg_def VSR40 ( SOC, SOC, Op_VecX, 40, VSR40->as_VMReg()); - reg_def VSR41 ( SOC, SOC, Op_VecX, 41, VSR41->as_VMReg()); - reg_def VSR42 ( SOC, SOC, Op_VecX, 42, VSR42->as_VMReg()); - reg_def VSR43 ( SOC, SOC, Op_VecX, 43, VSR43->as_VMReg()); - reg_def VSR44 ( SOC, SOC, Op_VecX, 44, VSR44->as_VMReg()); - reg_def VSR45 ( SOC, SOC, Op_VecX, 45, VSR45->as_VMReg()); - reg_def VSR46 ( SOC, SOC, Op_VecX, 46, VSR46->as_VMReg()); - reg_def VSR47 ( SOC, SOC, Op_VecX, 47, VSR47->as_VMReg()); - reg_def VSR48 ( SOC, SOC, Op_VecX, 48, VSR48->as_VMReg()); - reg_def VSR49 ( SOC, SOC, Op_VecX, 49, VSR49->as_VMReg()); - reg_def VSR50 ( SOC, SOC, Op_VecX, 50, VSR50->as_VMReg()); - reg_def VSR51 ( SOC, SOC, Op_VecX, 51, VSR51->as_VMReg()); - reg_def VSR52 ( SOC, SOE, Op_VecX, 52, VSR52->as_VMReg()); - reg_def VSR53 ( SOC, SOE, Op_VecX, 53, VSR53->as_VMReg()); - reg_def VSR54 ( SOC, SOE, Op_VecX, 54, VSR54->as_VMReg()); - reg_def VSR55 ( SOC, SOE, Op_VecX, 55, VSR55->as_VMReg()); - reg_def VSR56 ( SOC, SOE, Op_VecX, 56, VSR56->as_VMReg()); - reg_def VSR57 ( SOC, SOE, Op_VecX, 57, VSR57->as_VMReg()); - reg_def VSR58 ( SOC, SOE, Op_VecX, 58, VSR58->as_VMReg()); - reg_def VSR59 ( SOC, SOE, Op_VecX, 59, VSR59->as_VMReg()); - reg_def VSR60 ( SOC, SOE, Op_VecX, 60, VSR60->as_VMReg()); - reg_def VSR61 ( SOC, SOE, Op_VecX, 61, VSR61->as_VMReg()); - reg_def VSR62 ( SOC, SOE, Op_VecX, 62, VSR62->as_VMReg()); - reg_def VSR63 ( SOC, SOE, Op_VecX, 63, VSR63->as_VMReg()); + reg_def VSR32 (SOC, SOC, Op_RegF, 32, VSR32->as_VMReg() ); + reg_def VSR32_H(SOC, SOC, Op_RegF, 32, VSR32->as_VMReg()->next() ); + reg_def VSR32_J(SOC, SOC, Op_RegF, 32, VSR32->as_VMReg()->next(2)); + reg_def VSR32_K(SOC, SOC, Op_RegF, 32, VSR32->as_VMReg()->next(3)); + + reg_def VSR33 (SOC, SOC, Op_RegF, 33, VSR33->as_VMReg() ); + reg_def VSR33_H(SOC, SOC, Op_RegF, 33, VSR33->as_VMReg()->next() ); + reg_def VSR33_J(SOC, SOC, Op_RegF, 33, VSR33->as_VMReg()->next(2)); + reg_def VSR33_K(SOC, SOC, Op_RegF, 33, VSR33->as_VMReg()->next(3)); + + reg_def VSR34 (SOC, SOC, Op_RegF, 34, VSR34->as_VMReg() ); + reg_def VSR34_H(SOC, SOC, Op_RegF, 34, VSR34->as_VMReg()->next() ); + reg_def VSR34_J(SOC, SOC, Op_RegF, 34, VSR34->as_VMReg()->next(2)); + reg_def VSR34_K(SOC, SOC, Op_RegF, 34, VSR34->as_VMReg()->next(3)); + + reg_def VSR35 (SOC, SOC, Op_RegF, 35, VSR35->as_VMReg() ); + reg_def VSR35_H(SOC, SOC, Op_RegF, 35, VSR35->as_VMReg()->next() ); + reg_def VSR35_J(SOC, SOC, Op_RegF, 35, VSR35->as_VMReg()->next(2)); + reg_def VSR35_K(SOC, SOC, Op_RegF, 35, VSR35->as_VMReg()->next(3)); + + reg_def VSR36 (SOC, SOC, Op_RegF, 36, VSR36->as_VMReg() ); + reg_def VSR36_H(SOC, SOC, Op_RegF, 36, VSR36->as_VMReg()->next() ); + reg_def VSR36_J(SOC, SOC, Op_RegF, 36, VSR36->as_VMReg()->next(2)); + reg_def VSR36_K(SOC, SOC, Op_RegF, 36, VSR36->as_VMReg()->next(3)); + + reg_def VSR37 (SOC, SOC, Op_RegF, 37, VSR37->as_VMReg() ); + reg_def VSR37_H(SOC, SOC, Op_RegF, 37, VSR37->as_VMReg()->next() ); + reg_def VSR37_J(SOC, SOC, Op_RegF, 37, VSR37->as_VMReg()->next(2)); + reg_def VSR37_K(SOC, SOC, Op_RegF, 37, VSR37->as_VMReg()->next(3)); + + reg_def VSR38 (SOC, SOC, Op_RegF, 38, VSR38->as_VMReg() ); + reg_def VSR38_H(SOC, SOC, Op_RegF, 38, VSR38->as_VMReg()->next() ); + reg_def VSR38_J(SOC, SOC, Op_RegF, 38, VSR38->as_VMReg()->next(2)); + reg_def VSR38_K(SOC, SOC, Op_RegF, 38, VSR38->as_VMReg()->next(3)); + + reg_def VSR39 (SOC, SOC, Op_RegF, 39, VSR39->as_VMReg() ); + reg_def VSR39_H(SOC, SOC, Op_RegF, 39, VSR39->as_VMReg()->next() ); + reg_def VSR39_J(SOC, SOC, Op_RegF, 39, VSR39->as_VMReg()->next(2)); + reg_def VSR39_K(SOC, SOC, Op_RegF, 39, VSR39->as_VMReg()->next(3)); + + reg_def VSR40 (SOC, SOC, Op_RegF, 40, VSR40->as_VMReg() ); + reg_def VSR40_H(SOC, SOC, Op_RegF, 40, VSR40->as_VMReg()->next() ); + reg_def VSR40_J(SOC, SOC, Op_RegF, 40, VSR40->as_VMReg()->next(2)); + reg_def VSR40_K(SOC, SOC, Op_RegF, 40, VSR40->as_VMReg()->next(3)); + + reg_def VSR41 (SOC, SOC, Op_RegF, 41, VSR41->as_VMReg() ); + reg_def VSR41_H(SOC, SOC, Op_RegF, 41, VSR41->as_VMReg()->next() ); + reg_def VSR41_J(SOC, SOC, Op_RegF, 41, VSR41->as_VMReg()->next(2)); + reg_def VSR41_K(SOC, SOC, Op_RegF, 41, VSR41->as_VMReg()->next(3)); + + reg_def VSR42 (SOC, SOC, Op_RegF, 42, VSR42->as_VMReg() ); + reg_def VSR42_H(SOC, SOC, Op_RegF, 42, VSR42->as_VMReg()->next() ); + reg_def VSR42_J(SOC, SOC, Op_RegF, 42, VSR42->as_VMReg()->next(2)); + reg_def VSR42_K(SOC, SOC, Op_RegF, 42, VSR42->as_VMReg()->next(3)); + + reg_def VSR43 (SOC, SOC, Op_RegF, 43, VSR43->as_VMReg() ); + reg_def VSR43_H(SOC, SOC, Op_RegF, 43, VSR43->as_VMReg()->next() ); + reg_def VSR43_J(SOC, SOC, Op_RegF, 43, VSR43->as_VMReg()->next(2)); + reg_def VSR43_K(SOC, SOC, Op_RegF, 43, VSR43->as_VMReg()->next(3)); + + reg_def VSR44 (SOC, SOC, Op_RegF, 44, VSR44->as_VMReg() ); + reg_def VSR44_H(SOC, SOC, Op_RegF, 44, VSR44->as_VMReg()->next() ); + reg_def VSR44_J(SOC, SOC, Op_RegF, 44, VSR44->as_VMReg()->next(2)); + reg_def VSR44_K(SOC, SOC, Op_RegF, 44, VSR44->as_VMReg()->next(3)); + + reg_def VSR45 (SOC, SOC, Op_RegF, 45, VSR45->as_VMReg() ); + reg_def VSR45_H(SOC, SOC, Op_RegF, 45, VSR45->as_VMReg()->next() ); + reg_def VSR45_J(SOC, SOC, Op_RegF, 45, VSR45->as_VMReg()->next(2)); + reg_def VSR45_K(SOC, SOC, Op_RegF, 45, VSR45->as_VMReg()->next(3)); + + reg_def VSR46 (SOC, SOC, Op_RegF, 46, VSR46->as_VMReg() ); + reg_def VSR46_H(SOC, SOC, Op_RegF, 46, VSR46->as_VMReg()->next() ); + reg_def VSR46_J(SOC, SOC, Op_RegF, 46, VSR46->as_VMReg()->next(2)); + reg_def VSR46_K(SOC, SOC, Op_RegF, 46, VSR46->as_VMReg()->next(3)); + + reg_def VSR47 (SOC, SOC, Op_RegF, 47, VSR47->as_VMReg() ); + reg_def VSR47_H(SOC, SOC, Op_RegF, 47, VSR47->as_VMReg()->next() ); + reg_def VSR47_J(SOC, SOC, Op_RegF, 47, VSR47->as_VMReg()->next(2)); + reg_def VSR47_K(SOC, SOC, Op_RegF, 47, VSR47->as_VMReg()->next(3)); + + reg_def VSR48 (SOC, SOC, Op_RegF, 48, VSR48->as_VMReg() ); + reg_def VSR48_H(SOC, SOC, Op_RegF, 48, VSR48->as_VMReg()->next() ); + reg_def VSR48_J(SOC, SOC, Op_RegF, 48, VSR48->as_VMReg()->next(2)); + reg_def VSR48_K(SOC, SOC, Op_RegF, 48, VSR48->as_VMReg()->next(3)); + + reg_def VSR49 (SOC, SOC, Op_RegF, 49, VSR49->as_VMReg() ); + reg_def VSR49_H(SOC, SOC, Op_RegF, 49, VSR49->as_VMReg()->next() ); + reg_def VSR49_J(SOC, SOC, Op_RegF, 49, VSR49->as_VMReg()->next(2)); + reg_def VSR49_K(SOC, SOC, Op_RegF, 49, VSR49->as_VMReg()->next(3)); + + reg_def VSR50 (SOC, SOC, Op_RegF, 50, VSR50->as_VMReg() ); + reg_def VSR50_H(SOC, SOC, Op_RegF, 50, VSR50->as_VMReg()->next() ); + reg_def VSR50_J(SOC, SOC, Op_RegF, 50, VSR50->as_VMReg()->next(2)); + reg_def VSR50_K(SOC, SOC, Op_RegF, 50, VSR50->as_VMReg()->next(3)); + + reg_def VSR51 (SOC, SOC, Op_RegF, 51, VSR51->as_VMReg() ); + reg_def VSR51_H(SOC, SOC, Op_RegF, 51, VSR51->as_VMReg()->next() ); + reg_def VSR51_J(SOC, SOC, Op_RegF, 51, VSR51->as_VMReg()->next(2)); + reg_def VSR51_K(SOC, SOC, Op_RegF, 51, VSR51->as_VMReg()->next(3)); + + reg_def VSR52 (SOC, SOE, Op_RegF, 52, VSR52->as_VMReg() ); + reg_def VSR52_H(SOC, SOE, Op_RegF, 52, VSR52->as_VMReg()->next() ); + reg_def VSR52_J(SOC, SOE, Op_RegF, 52, VSR52->as_VMReg()->next(2)); + reg_def VSR52_K(SOC, SOE, Op_RegF, 52, VSR52->as_VMReg()->next(3)); + + reg_def VSR53 (SOC, SOE, Op_RegF, 53, VSR53->as_VMReg() ); + reg_def VSR53_H(SOC, SOE, Op_RegF, 53, VSR53->as_VMReg()->next() ); + reg_def VSR53_J(SOC, SOE, Op_RegF, 53, VSR53->as_VMReg()->next(2)); + reg_def VSR53_K(SOC, SOE, Op_RegF, 53, VSR53->as_VMReg()->next(3)); + + reg_def VSR54 (SOC, SOE, Op_RegF, 54, VSR54->as_VMReg() ); + reg_def VSR54_H(SOC, SOE, Op_RegF, 54, VSR54->as_VMReg()->next() ); + reg_def VSR54_J(SOC, SOE, Op_RegF, 54, VSR54->as_VMReg()->next(2)); + reg_def VSR54_K(SOC, SOE, Op_RegF, 54, VSR54->as_VMReg()->next(3)); + + reg_def VSR55 (SOC, SOE, Op_RegF, 55, VSR55->as_VMReg() ); + reg_def VSR55_H(SOC, SOE, Op_RegF, 55, VSR55->as_VMReg()->next() ); + reg_def VSR55_J(SOC, SOE, Op_RegF, 55, VSR55->as_VMReg()->next(2)); + reg_def VSR55_K(SOC, SOE, Op_RegF, 55, VSR55->as_VMReg()->next(3)); + + reg_def VSR56 (SOC, SOE, Op_RegF, 56, VSR56->as_VMReg() ); + reg_def VSR56_H(SOC, SOE, Op_RegF, 56, VSR56->as_VMReg()->next() ); + reg_def VSR56_J(SOC, SOE, Op_RegF, 56, VSR56->as_VMReg()->next(2)); + reg_def VSR56_K(SOC, SOE, Op_RegF, 56, VSR56->as_VMReg()->next(3)); + + reg_def VSR57 (SOC, SOE, Op_RegF, 57, VSR57->as_VMReg() ); + reg_def VSR57_H(SOC, SOE, Op_RegF, 57, VSR57->as_VMReg()->next() ); + reg_def VSR57_J(SOC, SOE, Op_RegF, 57, VSR57->as_VMReg()->next(2)); + reg_def VSR57_K(SOC, SOE, Op_RegF, 57, VSR57->as_VMReg()->next(3)); + + reg_def VSR58 (SOC, SOE, Op_RegF, 58, VSR58->as_VMReg() ); + reg_def VSR58_H(SOC, SOE, Op_RegF, 58, VSR58->as_VMReg()->next() ); + reg_def VSR58_J(SOC, SOE, Op_RegF, 58, VSR58->as_VMReg()->next(2)); + reg_def VSR58_K(SOC, SOE, Op_RegF, 58, VSR58->as_VMReg()->next(3)); + + reg_def VSR59 (SOC, SOE, Op_RegF, 59, VSR59->as_VMReg() ); + reg_def VSR59_H(SOC, SOE, Op_RegF, 59, VSR59->as_VMReg()->next() ); + reg_def VSR59_J(SOC, SOE, Op_RegF, 59, VSR59->as_VMReg()->next(2)); + reg_def VSR59_K(SOC, SOE, Op_RegF, 59, VSR59->as_VMReg()->next(3)); + + reg_def VSR60 (SOC, SOE, Op_RegF, 60, VSR60->as_VMReg() ); + reg_def VSR60_H(SOC, SOE, Op_RegF, 60, VSR60->as_VMReg()->next() ); + reg_def VSR60_J(SOC, SOE, Op_RegF, 60, VSR60->as_VMReg()->next(2)); + reg_def VSR60_K(SOC, SOE, Op_RegF, 60, VSR60->as_VMReg()->next(3)); + + reg_def VSR61 (SOC, SOE, Op_RegF, 61, VSR61->as_VMReg() ); + reg_def VSR61_H(SOC, SOE, Op_RegF, 61, VSR61->as_VMReg()->next() ); + reg_def VSR61_J(SOC, SOE, Op_RegF, 61, VSR61->as_VMReg()->next(2)); + reg_def VSR61_K(SOC, SOE, Op_RegF, 61, VSR61->as_VMReg()->next(3)); + + reg_def VSR62 (SOC, SOE, Op_RegF, 62, VSR62->as_VMReg() ); + reg_def VSR62_H(SOC, SOE, Op_RegF, 62, VSR62->as_VMReg()->next() ); + reg_def VSR62_J(SOC, SOE, Op_RegF, 62, VSR62->as_VMReg()->next(2)); + reg_def VSR62_K(SOC, SOE, Op_RegF, 62, VSR62->as_VMReg()->next(3)); + + reg_def VSR63 (SOC, SOE, Op_RegF, 63, VSR63->as_VMReg() ); + reg_def VSR63_H(SOC, SOE, Op_RegF, 63, VSR63->as_VMReg()->next() ); + reg_def VSR63_J(SOC, SOE, Op_RegF, 63, VSR63->as_VMReg()->next(2)); + reg_def VSR63_K(SOC, SOE, Op_RegF, 63, VSR63->as_VMReg()->next(3)); // ---------------------------- // Specify priority of register selection within phases of register @@ -441,8 +696,74 @@ alloc_class chunk1 ( ); alloc_class chunk2 ( - // Chunk2 contains *all* 8 condition code registers. + VSR0 , VSR0_H , VSR0_J , VSR0_K , + VSR1 , VSR1_H , VSR1_J , VSR1_K , + VSR2 , VSR2_H , VSR2_J , VSR2_K , + VSR3 , VSR3_H , VSR3_J , VSR3_K , + VSR4 , VSR4_H , VSR4_J , VSR4_K , + VSR5 , VSR5_H , VSR5_J , VSR5_K , + VSR6 , VSR6_H , VSR6_J , VSR6_K , + VSR7 , VSR7_H , VSR7_J , VSR7_K , + VSR8 , VSR8_H , VSR8_J , VSR8_K , + VSR9 , VSR9_H , VSR9_J , VSR9_K , + VSR10, VSR10_H, VSR10_J, VSR10_K, + VSR11, VSR11_H, VSR11_J, VSR11_K, + VSR12, VSR12_H, VSR12_J, VSR12_K, + VSR13, VSR13_H, VSR13_J, VSR13_K, + VSR14, VSR14_H, VSR14_J, VSR14_K, + VSR15, VSR15_H, VSR15_J, VSR15_K, + VSR16, VSR16_H, VSR16_J, VSR16_K, + VSR17, VSR17_H, VSR17_J, VSR17_K, + VSR18, VSR18_H, VSR18_J, VSR18_K, + VSR19, VSR19_H, VSR19_J, VSR19_K, + VSR20, VSR20_H, VSR20_J, VSR20_K, + VSR21, VSR21_H, VSR21_J, VSR21_K, + VSR22, VSR22_H, VSR22_J, VSR22_K, + VSR23, VSR23_H, VSR23_J, VSR23_K, + VSR24, VSR24_H, VSR24_J, VSR24_K, + VSR25, VSR25_H, VSR25_J, VSR25_K, + VSR26, VSR26_H, VSR26_J, VSR26_K, + VSR27, VSR27_H, VSR27_J, VSR27_K, + VSR28, VSR28_H, VSR28_J, VSR28_K, + VSR29, VSR29_H, VSR29_J, VSR29_K, + VSR30, VSR30_H, VSR30_J, VSR30_K, + VSR31, VSR31_H, VSR31_J, VSR31_K, + VSR32, VSR32_H, VSR32_J, VSR32_K, + VSR33, VSR33_H, VSR33_J, VSR33_K, + VSR34, VSR34_H, VSR34_J, VSR34_K, + VSR35, VSR35_H, VSR35_J, VSR35_K, + VSR36, VSR36_H, VSR36_J, VSR36_K, + VSR37, VSR37_H, VSR37_J, VSR37_K, + VSR38, VSR38_H, VSR38_J, VSR38_K, + VSR39, VSR39_H, VSR39_J, VSR39_K, + VSR40, VSR40_H, VSR40_J, VSR40_K, + VSR41, VSR41_H, VSR41_J, VSR41_K, + VSR42, VSR42_H, VSR42_J, VSR42_K, + VSR43, VSR43_H, VSR43_J, VSR43_K, + VSR44, VSR44_H, VSR44_J, VSR44_K, + VSR45, VSR45_H, VSR45_J, VSR45_K, + VSR46, VSR46_H, VSR46_J, VSR46_K, + VSR47, VSR47_H, VSR47_J, VSR47_K, + VSR48, VSR48_H, VSR48_J, VSR48_K, + VSR49, VSR49_H, VSR49_J, VSR49_K, + VSR50, VSR50_H, VSR50_J, VSR50_K, + VSR51, VSR51_H, VSR51_J, VSR51_K, + VSR52, VSR52_H, VSR52_J, VSR52_K, + VSR53, VSR53_H, VSR53_J, VSR53_K, + VSR54, VSR54_H, VSR54_J, VSR54_K, + VSR55, VSR55_H, VSR55_J, VSR55_K, + VSR56, VSR56_H, VSR56_J, VSR56_K, + VSR57, VSR57_H, VSR57_J, VSR57_K, + VSR58, VSR58_H, VSR58_J, VSR58_K, + VSR59, VSR59_H, VSR59_J, VSR59_K, + VSR60, VSR60_H, VSR60_J, VSR60_K, + VSR61, VSR61_H, VSR61_J, VSR61_K, + VSR62, VSR62_H, VSR62_J, VSR62_K, + VSR63, VSR63_H, VSR63_J, VSR63_K +); +alloc_class chunk3 ( + // Chunk2 contains *all* 8 condition code registers. CR0, CR1, CR2, @@ -453,73 +774,6 @@ alloc_class chunk2 ( CR7 ); -alloc_class chunk3 ( - VSR0, - VSR1, - VSR2, - VSR3, - VSR4, - VSR5, - VSR6, - VSR7, - VSR8, - VSR9, - VSR10, - VSR11, - VSR12, - VSR13, - VSR14, - VSR15, - VSR16, - VSR17, - VSR18, - VSR19, - VSR20, - VSR21, - VSR22, - VSR23, - VSR24, - VSR25, - VSR26, - VSR27, - VSR28, - VSR29, - VSR30, - VSR31, - VSR32, - VSR33, - VSR34, - VSR35, - VSR36, - VSR37, - VSR38, - VSR39, - VSR40, - VSR41, - VSR42, - VSR43, - VSR44, - VSR45, - VSR46, - VSR47, - VSR48, - VSR49, - VSR50, - VSR51, - VSR52, - VSR53, - VSR54, - VSR55, - VSR56, - VSR57, - VSR58, - VSR59, - VSR60, - VSR61, - VSR62, - VSR63 -); - alloc_class chunk4 ( // special registers // These registers are not allocated, but used for nodes generated by postalloc expand. @@ -910,28 +1164,38 @@ reg_class dbl_reg( // ---------------------------- reg_class vs_reg( - // Attention: Only these ones are saved & restored at safepoint by RegisterSaver. - VSR32, - VSR33, - VSR34, - VSR35, - VSR36, - VSR37, - VSR38, - VSR39, - VSR40, - VSR41, - VSR42, - VSR43, - VSR44, - VSR45, - VSR46, - VSR47, - VSR48, - VSR49, - VSR50, - VSR51 - // VSR52-VSR63 // nv! + VSR32, VSR32_H, VSR32_J, VSR32_K, + VSR33, VSR33_H, VSR33_J, VSR33_K, + VSR34, VSR34_H, VSR34_J, VSR34_K, + VSR35, VSR35_H, VSR35_J, VSR35_K, + VSR36, VSR36_H, VSR36_J, VSR36_K, + VSR37, VSR37_H, VSR37_J, VSR37_K, + VSR38, VSR38_H, VSR38_J, VSR38_K, + VSR39, VSR39_H, VSR39_J, VSR39_K, + VSR40, VSR40_H, VSR40_J, VSR40_K, + VSR41, VSR41_H, VSR41_J, VSR41_K, + VSR42, VSR42_H, VSR42_J, VSR42_K, + VSR43, VSR43_H, VSR43_J, VSR43_K, + VSR44, VSR44_H, VSR44_J, VSR44_K, + VSR45, VSR45_H, VSR45_J, VSR45_K, + VSR46, VSR46_H, VSR46_J, VSR46_K, + VSR47, VSR47_H, VSR47_J, VSR47_K, + VSR48, VSR48_H, VSR48_J, VSR48_K, + VSR49, VSR49_H, VSR49_J, VSR49_K, + VSR50, VSR50_H, VSR50_J, VSR50_K, + VSR51, VSR51_H, VSR51_J, VSR51_K, + VSR52, VSR52_H, VSR52_J, VSR52_K, // non-volatile + VSR53, VSR53_H, VSR53_J, VSR53_K, // non-volatile + VSR54, VSR54_H, VSR54_J, VSR54_K, // non-volatile + VSR55, VSR55_H, VSR55_J, VSR55_K, // non-volatile + VSR56, VSR56_H, VSR56_J, VSR56_K, // non-volatile + VSR57, VSR57_H, VSR57_J, VSR57_K, // non-volatile + VSR58, VSR58_H, VSR58_J, VSR58_K, // non-volatile + VSR59, VSR59_H, VSR59_J, VSR59_K, // non-volatile + VSR60, VSR60_H, VSR60_J, VSR60_K, // non-volatile + VSR61, VSR61_H, VSR61_J, VSR61_K, // non-volatile + VSR62, VSR62_H, VSR62_J, VSR62_K, // non-volatile + VSR63, VSR63_H, VSR63_J, VSR63_K // non-volatile ); %} @@ -1656,17 +1920,19 @@ static enum RC rc_class(OptoReg::Name reg) { if (reg == OptoReg::Bad) return rc_bad; // We have 64 integer register halves, starting at index 0. - if (reg < 64) return rc_int; + STATIC_ASSERT((int)ConcreteRegisterImpl::max_gpr == (int)MachRegisterNumbers::F0_num); + if (reg < ConcreteRegisterImpl::max_gpr) return rc_int; // We have 64 floating-point register halves, starting at index 64. - if (reg < 64+64) return rc_float; + STATIC_ASSERT((int)ConcreteRegisterImpl::max_fpr == (int)MachRegisterNumbers::VSR0_num); + if (reg < ConcreteRegisterImpl::max_fpr) return rc_float; // We have 64 vector-scalar registers, starting at index 128. - if (reg < 64+64+64) return rc_vs; - - // Between float regs & stack are the flags regs. - assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags"); + STATIC_ASSERT((int)ConcreteRegisterImpl::max_vsr == (int)MachRegisterNumbers::CR0_num); + if (reg < ConcreteRegisterImpl::max_vsr) return rc_vs; + // Condition and special purpose registers are not allocated. We only accept stack from here. + assert(OptoReg::is_stack(reg), "what else is it?"); return rc_stack; } @@ -1743,21 +2009,53 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) { VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]); int dst_offset = ra_->reg2offset(dst_lo); - if (masm) { - __ addi(R0, R1_SP, dst_offset); - __ stxvd2x(Rsrc, R0); + if (PowerArchitecturePPC64 >= 9) { + if (is_aligned(dst_offset, 16)) { + if (masm) { + __ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16_Power9 + } + size += 4; + } else { + // Other alignment can be used by Vector API (VectorPayload in rearrangeOp, + // observed with VectorRearrangeTest.java on Power9). + if (masm) { + __ addi(R0, R1_SP, dst_offset); + __ stxvx(Rsrc, R0); // matches storeV16_Power9 (regarding element ordering) + } + size += 8; + } + } else { + if (masm) { + __ addi(R0, R1_SP, dst_offset); + __ stxvd2x(Rsrc, R0); // matches storeV16_Power8 + } + size += 8; } - size += 8; } // Memory->VectorSRegister Spill. else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) { VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]); int src_offset = ra_->reg2offset(src_lo); - if (masm) { - __ addi(R0, R1_SP, src_offset); - __ lxvd2x(Rdst, R0); + if (PowerArchitecturePPC64 >= 9) { + if (is_aligned(src_offset, 16)) { + if (masm) { + __ lxv(Rdst, src_offset, R1_SP); + } + size += 4; + } else { + if (masm) { + __ addi(R0, R1_SP, src_offset); + __ lxvx(Rdst, R0); + } + size += 8; + } + } else { + if (masm) { + __ addi(R0, R1_SP, src_offset); + __ lxvd2x(Rdst, R0); + } + size += 8; } - size += 8; } // VectorSRegister->VectorSRegister. else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) { @@ -2265,52 +2563,11 @@ bool Matcher::is_generic_vector(MachOper* opnd) { return false; } -// Constants for c2c and c calling conventions. - -const MachRegisterNumbers iarg_reg[8] = { - R3_num, R4_num, R5_num, R6_num, - R7_num, R8_num, R9_num, R10_num -}; - -const MachRegisterNumbers farg_reg[13] = { - F1_num, F2_num, F3_num, F4_num, - F5_num, F6_num, F7_num, F8_num, - F9_num, F10_num, F11_num, F12_num, - F13_num -}; - -const MachRegisterNumbers vsarg_reg[64] = { - VSR0_num, VSR1_num, VSR2_num, VSR3_num, - VSR4_num, VSR5_num, VSR6_num, VSR7_num, - VSR8_num, VSR9_num, VSR10_num, VSR11_num, - VSR12_num, VSR13_num, VSR14_num, VSR15_num, - VSR16_num, VSR17_num, VSR18_num, VSR19_num, - VSR20_num, VSR21_num, VSR22_num, VSR23_num, - VSR24_num, VSR23_num, VSR24_num, VSR25_num, - VSR28_num, VSR29_num, VSR30_num, VSR31_num, - VSR32_num, VSR33_num, VSR34_num, VSR35_num, - VSR36_num, VSR37_num, VSR38_num, VSR39_num, - VSR40_num, VSR41_num, VSR42_num, VSR43_num, - VSR44_num, VSR45_num, VSR46_num, VSR47_num, - VSR48_num, VSR49_num, VSR50_num, VSR51_num, - VSR52_num, VSR53_num, VSR54_num, VSR55_num, - VSR56_num, VSR57_num, VSR58_num, VSR59_num, - VSR60_num, VSR61_num, VSR62_num, VSR63_num -}; - -const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]); - -const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]); - -const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]); - // Return whether or not this register is ever used as an argument. This // function is used on startup to build the trampoline stubs in generateOptoStub. // Registers not mentioned will be killed by the VM call in the trampoline, and // arguments in those registers not be available to the callee. bool Matcher::can_be_java_arg(int reg) { - // We return true for all registers contained in iarg_reg[] and - // farg_reg[] and their virtual halves. // We must include the virtual halves in order to get STDs and LDs // instead of STWs and LWs in the trampoline stubs. @@ -4125,6 +4382,15 @@ operand immL16Alg4() %{ interface(CONST_INTER); %} +// Long Immediate: 16-bit, 16-aligned +operand immL16Alg16() %{ + predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0xf) == 0)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // Long Immediate: 32-bit, where lowest 16 bits are 0x0000. operand immL32hi16() %{ predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L)); @@ -4643,6 +4909,20 @@ operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{ %} %} +// Indirect with 16-aligned Offset +operand indOffset16Alg16(iRegPsrc reg, immL16Alg16 offset) %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(AddP reg offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + //----------Complex Operands for Compressed OOPs------------------------------- // Compressed OOPs with narrow_oop_shift == 0. @@ -4852,6 +5132,7 @@ operand cmpOp() %{ opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass); // Memory operand where offsets are 4-aligned. Required for ld, std. opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass); +opclass memoryAlg16(indirect, indOffset16Alg16); opclass indirectMemory(indirect, indirectNarrow); // Special opclass for I and ConvL2I. @@ -5392,8 +5673,9 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{ %} // Load Aligned Packed Byte -instruct loadV16(vecX dst, indirect mem) %{ - predicate(n->as_LoadVector()->memory_size() == 16); +// Note: The Power8 instruction loads the contents in a special order in Little Endian mode. +instruct loadV16_Power8(vecX dst, indirect mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16 && PowerArchitecturePPC64 == 8); match(Set dst (LoadVector mem)); ins_cost(MEMORY_REF_COST); @@ -5405,6 +5687,19 @@ instruct loadV16(vecX dst, indirect mem) %{ ins_pipe(pipe_class_default); %} +instruct loadV16_Power9(vecX dst, memoryAlg16 mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16 && PowerArchitecturePPC64 >= 9); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LXV $dst, $mem \t// load 16-byte Vector" %} + size(4); + ins_encode %{ + __ lxv($dst$$VectorSRegister, $mem$$disp, $mem$$Register); + %} + ins_pipe(pipe_class_default); +%} + // Load Range, range = array length (=jint) instruct loadRange(iRegIdst dst, memory mem) %{ match(Set dst (LoadRange mem)); @@ -6418,8 +6713,9 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{ %} // Store Packed Byte long register to memory -instruct storeV16(indirect mem, vecX src) %{ - predicate(n->as_StoreVector()->memory_size() == 16); +// Note: The Power8 instruction stores the contents in a special order in Little Endian mode. +instruct storeV16_Power8(indirect mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16 && PowerArchitecturePPC64 == 8); match(Set mem (StoreVector mem src)); ins_cost(MEMORY_REF_COST); @@ -6431,6 +6727,19 @@ instruct storeV16(indirect mem, vecX src) %{ ins_pipe(pipe_class_default); %} +instruct storeV16_Power9(memoryAlg16 mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16 && PowerArchitecturePPC64 >= 9); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STXV $mem, $src \t// store 16-byte Vector" %} + size(4); + ins_encode %{ + __ stxv($src$$VectorSRegister, $mem$$disp, $mem$$Register); + %} + ins_pipe(pipe_class_default); +%} + // Reinterpret: only one vector size used: either L or X instruct reinterpretL(iRegLdst dst) %{ match(Set dst (VectorReinterpret dst)); diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp index 565542ad7c0..2b5b25f449e 100644 --- a/src/hotspot/cpu/ppc/register_ppc.hpp +++ b/src/hotspot/cpu/ppc/register_ppc.hpp @@ -99,8 +99,8 @@ class Register { // testers constexpr bool is_valid() const { return ( 0 <= _encoding && _encoding < number_of_registers); } - constexpr bool is_volatile() const { return ( 0 <= _encoding && _encoding <= 13 ); } - constexpr bool is_nonvolatile() const { return (14 <= _encoding && _encoding <= 31 ); } + constexpr bool is_volatile() const { return ( 0 <= _encoding && _encoding <= 13); } + constexpr bool is_nonvolatile() const { return (14 <= _encoding && _encoding <= 31); } const char* name() const; }; @@ -169,7 +169,7 @@ class ConditionRegister { // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } - constexpr bool is_nonvolatile() const { return (2 <= _encoding && _encoding <= 4 ); } + constexpr bool is_nonvolatile() const { return (2 <= _encoding && _encoding <= 4); } const char* name() const; }; @@ -214,6 +214,7 @@ class FloatRegister { // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } + constexpr bool is_nonvolatile() const { return (14 <= _encoding && _encoding <= 31); } const char* name() const; @@ -323,6 +324,7 @@ class VectorRegister { // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } + constexpr bool is_nonvolatile() const { return (20 <= _encoding && _encoding <= 31); } const char* name() const; @@ -372,6 +374,7 @@ constexpr VectorRegister VR31 = as_VectorRegister(31); // The implementation of Vector-Scalar (VSX) registers on POWER architecture. +// VSR0-31 are aliases for F0-31 and VSR32-63 are aliases for VR0-31. class VectorSRegister { int _encoding; public: @@ -390,6 +393,7 @@ class VectorSRegister { // accessors constexpr int encoding() const { assert(is_valid(), "invalid register"); return _encoding; } inline VMReg as_VMReg() const; + VectorSRegister successor() const { return VectorSRegister(encoding() + 1); } // testers constexpr bool is_valid() const { return (0 <= _encoding && _encoding < number_of_registers); } @@ -480,7 +484,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { enum { max_gpr = Register::number_of_registers * 2, max_fpr = max_gpr + FloatRegister::number_of_registers * 2, - max_vsr = max_fpr + VectorSRegister::number_of_registers, + max_vsr = max_fpr + VectorSRegister::number_of_registers * 4, max_cnd = max_vsr + ConditionRegister::number_of_registers, max_spr = max_cnd + SpecialRegister::number_of_registers, // This number must be large enough to cover REG_COUNT (defined by c2) registers. diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index 5a33a14f79e..ebcfaa10f7c 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -243,7 +243,19 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveVSRegs[] = { RegisterSaver_LiveVSReg( VSR48 ), RegisterSaver_LiveVSReg( VSR49 ), RegisterSaver_LiveVSReg( VSR50 ), - RegisterSaver_LiveVSReg( VSR51 ) + RegisterSaver_LiveVSReg( VSR51 ), + RegisterSaver_LiveVSReg( VSR52 ), + RegisterSaver_LiveVSReg( VSR53 ), + RegisterSaver_LiveVSReg( VSR54 ), + RegisterSaver_LiveVSReg( VSR55 ), + RegisterSaver_LiveVSReg( VSR56 ), + RegisterSaver_LiveVSReg( VSR57 ), + RegisterSaver_LiveVSReg( VSR58 ), + RegisterSaver_LiveVSReg( VSR59 ), + RegisterSaver_LiveVSReg( VSR60 ), + RegisterSaver_LiveVSReg( VSR61 ), + RegisterSaver_LiveVSReg( VSR62 ), + RegisterSaver_LiveVSReg( VSR63 ) }; @@ -336,26 +348,50 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble } if (generate_oop_map) { - map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), RegisterSaver_LiveRegs[i].vmreg); - map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), - RegisterSaver_LiveRegs[i].vmreg->next()); } offset += reg_size; } - for (int i = 0; i < vsregstosave_num; i++) { - int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; - int reg_type = RegisterSaver_LiveVSRegs[i].reg_type; + // Note that generate_oop_map in the following loop is only used for the + // polling_page_vectors_safepoint_handler_blob. + // The order in which the vector contents are stored depends on Endianess and + // the utilized instructions (PowerArchitecturePPC64). + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + assert(is_even(vsregstosave_num), "expectation"); + for (int i = 0; i < vsregstosave_num; i += 2) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + assert(RegisterSaver_LiveVSRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!"); - __ li(R30, offset); - __ stxvd2x(as_VectorSRegister(reg_num), R30, R1_SP); - - if (generate_oop_map) { - map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), - RegisterSaver_LiveVSRegs[i].vmreg); + __ stxvp(as_VectorSRegister(reg_num), offset, R1_SP); + // Note: The contents were read in the same order (see loadV16_Power9 node in ppc.ad). + if (generate_oop_map) { + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), + RegisterSaver_LiveVSRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg); + map->set_callee_saved(VMRegImpl::stack2reg((offset + vs_reg_size) >> 2), + RegisterSaver_LiveVSRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg); + } + offset += (2 * vs_reg_size); + } + } else { + for (int i = 0; i < vsregstosave_num; i++) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + + if (PowerArchitecturePPC64 >= 9) { + __ stxv(as_VectorSRegister(reg_num), offset, R1_SP); + } else { + __ li(R31, offset); + __ stxvd2x(as_VectorSRegister(reg_num), R31, R1_SP); + } + // Note: The contents were read in the same order (see loadV16_Power8 / loadV16_Power9 node in ppc.ad). + if (generate_oop_map) { + VMReg vsr = RegisterSaver_LiveVSRegs[i].vmreg; + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr); + } + offset += vs_reg_size; } - offset += vs_reg_size; } assert(offset == frame_size_in_bytes, "consistency check"); @@ -418,14 +454,29 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, offset += reg_size; } - for (int i = 0; i < vsregstosave_num; i++) { - int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; - int reg_type = RegisterSaver_LiveVSRegs[i].reg_type; + assert(is_aligned(offset, StackAlignmentInBytes), "should be"); + if (PowerArchitecturePPC64 >= 10) { + for (int i = 0; i < vsregstosave_num; i += 2) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + assert(RegisterSaver_LiveVSRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!"); - __ li(R31, offset); - __ lxvd2x(as_VectorSRegister(reg_num), R31, R1_SP); + __ lxvp(as_VectorSRegister(reg_num), offset, R1_SP); - offset += vs_reg_size; + offset += (2 * vs_reg_size); + } + } else { + for (int i = 0; i < vsregstosave_num; i++) { + int reg_num = RegisterSaver_LiveVSRegs[i].reg_num; + + if (PowerArchitecturePPC64 >= 9) { + __ lxv(as_VectorSRegister(reg_num), offset, R1_SP); + } else { + __ li(R31, offset); + __ lxvd2x(as_VectorSRegister(reg_num), R31, R1_SP); + } + + offset += vs_reg_size; + } } assert(offset == frame_size_in_bytes, "consistency check"); diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp index 4a0ced42ed4..c4e89495503 100644 --- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp @@ -94,10 +94,13 @@ class StubGenerator: public StubCodeGenerator { address start = __ function_entry(); + int save_nonvolatile_registers_size = __ save_nonvolatile_registers_size(true, SuperwordUseVSX); + // some sanity checks + STATIC_ASSERT(StackAlignmentInBytes == 16); assert((sizeof(frame::native_abi_minframe) % 16) == 0, "unaligned"); assert((sizeof(frame::native_abi_reg_args) % 16) == 0, "unaligned"); - assert((sizeof(frame::spill_nonvolatiles) % 16) == 0, "unaligned"); + assert((save_nonvolatile_registers_size % 16) == 0, "unaligned"); assert((sizeof(frame::parent_ijava_frame_abi) % 16) == 0, "unaligned"); assert((sizeof(frame::entry_frame_locals) % 16) == 0, "unaligned"); @@ -106,93 +109,72 @@ class StubGenerator: public StubCodeGenerator { Register r_arg_result_type = R5; Register r_arg_method = R6; Register r_arg_entry = R7; + Register r_arg_argument_addr = R8; + Register r_arg_argument_count = R9; Register r_arg_thread = R10; - Register r_temp = R24; - Register r_top_of_arguments_addr = R25; - Register r_entryframe_fp = R26; + Register r_entryframe_fp = R2; // volatile + Register r_argument_size = R11_scratch1; // volatile + Register r_top_of_arguments_addr = R21_tmp1; { // Stack on entry to call_stub: // // F1 [C_FRAME] // ... - - Register r_arg_argument_addr = R8; - Register r_arg_argument_count = R9; - Register r_frame_alignment_in_bytes = R27; - Register r_argument_addr = R28; - Register r_argumentcopy_addr = R29; - Register r_argument_size_in_bytes = R30; - Register r_frame_size = R23; - + Register r_frame_size = R12_scratch2; // volatile Label arguments_copied; // Save LR/CR to caller's C_FRAME. __ save_LR_CR(R0); - // Zero extend arg_argument_count. - __ clrldi(r_arg_argument_count, r_arg_argument_count, 32); - - // Save non-volatiles GPRs to ENTRY_FRAME (not yet pushed, but it's safe). - __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); - // Keep copy of our frame pointer (caller's SP). __ mr(r_entryframe_fp, R1_SP); + // calculate frame size + STATIC_ASSERT(Interpreter::logStackElementSize == 3); + + // space for arguments aligned up: ((arg_count + 1) * 8) &~ 15 + __ addi(r_frame_size, r_arg_argument_count, 1); + __ rldicr(r_frame_size, r_frame_size, 3, 63 - 4); + + // this is the pure space for arguments (excluding alignment padding) + __ sldi(r_argument_size, r_arg_argument_count, 3); + + __ addi(r_frame_size, r_frame_size, + save_nonvolatile_registers_size + frame::entry_frame_locals_size + frame::top_ijava_frame_abi_size); + + // push ENTRY_FRAME + __ push_frame(r_frame_size, R0); + + // Save non-volatiles registers to ENTRY_FRAME. + __ save_nonvolatile_registers(r_entryframe_fp, -(frame::entry_frame_locals_size + save_nonvolatile_registers_size), + true, SuperwordUseVSX); + BLOCK_COMMENT("Push ENTRY_FRAME including arguments"); // Push ENTRY_FRAME including arguments: // // F0 [TOP_IJAVA_FRAME_ABI] // alignment (optional) // [outgoing Java arguments] + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... - // calculate frame size - - // unaligned size of arguments - __ sldi(r_argument_size_in_bytes, - r_arg_argument_count, Interpreter::logStackElementSize); - // arguments alignment (max 1 slot) - // FIXME: use round_to() here - __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1); - __ sldi(r_frame_alignment_in_bytes, - r_frame_alignment_in_bytes, Interpreter::logStackElementSize); - - // size = unaligned size of arguments + top abi's size - __ addi(r_frame_size, r_argument_size_in_bytes, - frame::top_ijava_frame_abi_size); - // size += arguments alignment - __ add(r_frame_size, - r_frame_size, r_frame_alignment_in_bytes); - // size += size of call_stub locals - __ addi(r_frame_size, - r_frame_size, frame::entry_frame_locals_size); - - // push ENTRY_FRAME - __ push_frame(r_frame_size, r_temp); - // initialize call_stub locals (step 1) - __ std(r_arg_call_wrapper_addr, - _entry_frame_locals_neg(call_wrapper_address), r_entryframe_fp); - __ std(r_arg_result_addr, - _entry_frame_locals_neg(result_address), r_entryframe_fp); - __ std(r_arg_result_type, - _entry_frame_locals_neg(result_type), r_entryframe_fp); + __ std(r_arg_call_wrapper_addr, _entry_frame_locals_neg(call_wrapper_address), r_entryframe_fp); + __ std(r_arg_result_addr, _entry_frame_locals_neg(result_address), r_entryframe_fp); + __ std(r_arg_result_type, _entry_frame_locals_neg(result_type), r_entryframe_fp); // we will save arguments_tos_address later - BLOCK_COMMENT("Copy Java arguments"); // copy Java arguments // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later. - // FIXME: why not simply use SP+frame::top_ijava_frame_size? - __ addi(r_top_of_arguments_addr, - R1_SP, frame::top_ijava_frame_abi_size); - __ add(r_top_of_arguments_addr, - r_top_of_arguments_addr, r_frame_alignment_in_bytes); + __ addi(r_top_of_arguments_addr, r_entryframe_fp, + -(save_nonvolatile_registers_size + frame::entry_frame_locals_size)); + __ sub(r_top_of_arguments_addr, r_top_of_arguments_addr, r_argument_size); // any arguments to copy? __ cmpdi(CR0, r_arg_argument_count, 0); @@ -200,6 +182,8 @@ class StubGenerator: public StubCodeGenerator { // prepare loop and copy arguments in reverse order { + Register r_argument_addr = R22_tmp2; + Register r_argumentcopy_addr = R23_tmp3; // init CTR with arg_argument_count __ mtctr(r_arg_argument_count); @@ -207,8 +191,7 @@ class StubGenerator: public StubCodeGenerator { __ mr(r_argumentcopy_addr, r_top_of_arguments_addr); // let r_argument_addr point to last incoming java argument - __ add(r_argument_addr, - r_arg_argument_addr, r_argument_size_in_bytes); + __ add(r_argument_addr, r_arg_argument_addr, r_argument_size); __ addi(r_argument_addr, r_argument_addr, -BytesPerWord); // now loop while CTR > 0 and copy arguments @@ -216,10 +199,10 @@ class StubGenerator: public StubCodeGenerator { Label next_argument; __ bind(next_argument); - __ ld(r_temp, 0, r_argument_addr); + __ ld(R0, 0, r_argument_addr); // argument_addr--; __ addi(r_argument_addr, r_argument_addr, -BytesPerWord); - __ std(r_temp, 0, r_argumentcopy_addr); + __ std(R0, 0, r_argumentcopy_addr); // argumentcopy_addr++; __ addi(r_argumentcopy_addr, r_argumentcopy_addr, BytesPerWord); @@ -234,11 +217,7 @@ class StubGenerator: public StubCodeGenerator { { BLOCK_COMMENT("Call frame manager or native entry."); // Call frame manager or native entry. - Register r_new_arg_entry = R14; - assert_different_registers(r_new_arg_entry, r_top_of_arguments_addr, - r_arg_method, r_arg_thread); - - __ mr(r_new_arg_entry, r_arg_entry); + assert_different_registers(r_arg_entry, r_top_of_arguments_addr, r_arg_method, r_arg_thread); // Register state on entry to frame manager / native entry: // @@ -262,31 +241,32 @@ class StubGenerator: public StubCodeGenerator { assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread"); // Set R15_prev_state to 0 for simplifying checks in callee. - __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); + __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R0); // Stack on entry to frame manager / native entry: // // F0 [TOP_IJAVA_FRAME_ABI] // alignment (optional) // [outgoing Java arguments] + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... // // global toc register - __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R11_scratch1); + __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); // Remember the senderSP so we interpreter can pop c2i arguments off of the stack // when called via a c2i. // Pass initial_caller_sp to framemanager. __ mr(R21_sender_SP, R1_SP); - // Do a light-weight C-call here, r_new_arg_entry holds the address + // Do a light-weight C-call here, r_arg_entry holds the address // of the interpreter entry point (frame manager or native entry) // and save runtime-value of LR in return_address. - assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread, - "trashed r_new_arg_entry"); - return_address = __ call_stub(r_new_arg_entry); + assert(r_arg_entry != tos && r_arg_entry != R19_method && r_arg_entry != R16_thread, + "trashed r_arg_entry"); + return_address = __ call_stub(r_arg_entry); } { @@ -298,6 +278,7 @@ class StubGenerator: public StubCodeGenerator { // // F0 [ABI] // ... + // [non-volatiles] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... @@ -310,39 +291,38 @@ class StubGenerator: public StubCodeGenerator { Label ret_is_float; Label ret_is_double; - Register r_entryframe_fp = R30; - Register r_lr = R7_ARG5; - Register r_cr = R8_ARG6; + Register r_lr = R11_scratch1; + Register r_cr = R12_scratch2; // Reload some volatile registers which we've spilled before the call // to frame manager / native entry. // Access all locals via frame pointer, because we know nothing about // the topmost frame's size. - __ ld(r_entryframe_fp, _abi0(callers_sp), R1_SP); + __ ld(r_entryframe_fp, _abi0(callers_sp), R1_SP); // restore after call assert_different_registers(r_entryframe_fp, R3_RET, r_arg_result_addr, r_arg_result_type, r_cr, r_lr); - __ ld(r_arg_result_addr, - _entry_frame_locals_neg(result_address), r_entryframe_fp); - __ ld(r_arg_result_type, - _entry_frame_locals_neg(result_type), r_entryframe_fp); + __ ld(r_arg_result_addr, _entry_frame_locals_neg(result_address), r_entryframe_fp); + __ ld(r_arg_result_type, _entry_frame_locals_neg(result_type), r_entryframe_fp); __ ld(r_cr, _abi0(cr), r_entryframe_fp); __ ld(r_lr, _abi0(lr), r_entryframe_fp); - - // pop frame and restore non-volatiles, LR and CR - __ mr(R1_SP, r_entryframe_fp); - __ pop_cont_fastpath(); - __ mtcr(r_cr); - __ mtlr(r_lr); + __ mtcr(r_cr); // restore CR + __ mtlr(r_lr); // restore LR // Store result depending on type. Everything that is not // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. - __ cmpwi(CR0, r_arg_result_type, T_OBJECT); - __ cmpwi(CR1, r_arg_result_type, T_LONG); - __ cmpwi(CR5, r_arg_result_type, T_FLOAT); - __ cmpwi(CR6, r_arg_result_type, T_DOUBLE); + // Using volatile CRs. + __ cmpwi(CR1, r_arg_result_type, T_OBJECT); + __ cmpwi(CR5, r_arg_result_type, T_LONG); + __ cmpwi(CR6, r_arg_result_type, T_FLOAT); + __ cmpwi(CR7, r_arg_result_type, T_DOUBLE); + + __ pop_cont_fastpath(); // kills CR0, uses R16_thread // restore non-volatile registers - __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ restore_nonvolatile_registers(r_entryframe_fp, -(frame::entry_frame_locals_size + save_nonvolatile_registers_size), + true, SuperwordUseVSX); + // pop frame + __ mr(R1_SP, r_entryframe_fp); // Stack on exit from call_stub: // @@ -351,24 +331,18 @@ class StubGenerator: public StubCodeGenerator { // // no call_stub frames left. - // All non-volatiles have been restored at this point!! - assert(R3_RET == R3, "R3_RET should be R3"); - - __ beq(CR0, ret_is_object); - __ beq(CR1, ret_is_long); - __ beq(CR5, ret_is_float); - __ beq(CR6, ret_is_double); + __ beq(CR1, ret_is_object); + __ beq(CR5, ret_is_long); + __ beq(CR6, ret_is_float); + __ beq(CR7, ret_is_double); // default: __ stw(R3_RET, 0, r_arg_result_addr); __ blr(); // return to caller // case T_OBJECT: - __ bind(ret_is_object); - __ std(R3_RET, 0, r_arg_result_addr); - __ blr(); // return to caller - // case T_LONG: + __ bind(ret_is_object); __ bind(ret_is_long); __ std(R3_RET, 0, r_arg_result_addr); __ blr(); // return to caller diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index a8f5dbda484..b0570101912 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -119,12 +119,13 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { const FloatRegister floatSlot = F0; address entry = __ function_entry(); + int save_nonvolatile_registers_size = __ save_nonvolatile_registers_size(false, false); __ save_LR(R0); - __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ save_nonvolatile_registers(R1_SP, -save_nonvolatile_registers_size, false, false); // We use target_sp for storing arguments in the C frame. __ mr(target_sp, R1_SP); - __ push_frame_reg_args_nonvolatiles(0, R11_scratch1); + __ push_frame(frame::native_abi_reg_args_size + save_nonvolatile_registers_size, R11_scratch1); __ mr(arg_java, R3_ARG1); @@ -309,7 +310,7 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() { __ bind(loop_end); __ pop_frame(); - __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); + __ restore_nonvolatile_registers(R1_SP, -save_nonvolatile_registers_size, false, false); __ restore_LR(R0); __ blr(); diff --git a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp index 5c7b0067c3a..ae5410b12df 100644 --- a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp +++ b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2023 SAP SE. All rights reserved. + * Copyright (c) 2023, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,86 +35,6 @@ #define __ _masm-> -// for callee saved regs, according to the caller's ABI -static int compute_reg_save_area_size(const ABIDescriptor& abi) { - int size = 0; - for (int i = 0; i < Register::number_of_registers; i++) { - Register reg = as_Register(i); - // R1 saved/restored by prologue/epilogue, R13 (system thread) won't get modified! - if (reg == R1_SP || reg == R13) continue; - if (!abi.is_volatile_reg(reg)) { - size += 8; // bytes - } - } - - for (int i = 0; i < FloatRegister::number_of_registers; i++) { - FloatRegister reg = as_FloatRegister(i); - if (!abi.is_volatile_reg(reg)) { - size += 8; // bytes - } - } - - return size; -} - -static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { - // 1. iterate all registers in the architecture - // - check if they are volatile or not for the given abi - // - if NOT, we need to save it here - - int offset = reg_save_area_offset; - - __ block_comment("{ preserve_callee_saved_regs "); - for (int i = 0; i < Register::number_of_registers; i++) { - Register reg = as_Register(i); - // R1 saved/restored by prologue/epilogue, R13 (system thread) won't get modified! - if (reg == R1_SP || reg == R13) continue; - if (!abi.is_volatile_reg(reg)) { - __ std(reg, offset, R1_SP); - offset += 8; - } - } - - for (int i = 0; i < FloatRegister::number_of_registers; i++) { - FloatRegister reg = as_FloatRegister(i); - if (!abi.is_volatile_reg(reg)) { - __ stfd(reg, offset, R1_SP); - offset += 8; - } - } - - __ block_comment("} preserve_callee_saved_regs "); -} - -static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { - // 1. iterate all registers in the architecture - // - check if they are volatile or not for the given abi - // - if NOT, we need to restore it here - - int offset = reg_save_area_offset; - - __ block_comment("{ restore_callee_saved_regs "); - for (int i = 0; i < Register::number_of_registers; i++) { - Register reg = as_Register(i); - // R1 saved/restored by prologue/epilogue, R13 (system thread) won't get modified! - if (reg == R1_SP || reg == R13) continue; - if (!abi.is_volatile_reg(reg)) { - __ ld(reg, offset, R1_SP); - offset += 8; - } - } - - for (int i = 0; i < FloatRegister::number_of_registers; i++) { - FloatRegister reg = as_FloatRegister(i); - if (!abi.is_volatile_reg(reg)) { - __ lfd(reg, offset, R1_SP); - offset += 8; - } - } - - __ block_comment("} restore_callee_saved_regs "); -} - static const int upcall_stub_code_base_size = 1024; static const int upcall_stub_size_per_arg = 16; // arg save & restore + move @@ -140,13 +60,17 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, // The Java call uses the JIT ABI, but we also call C. int out_arg_area = MAX2(frame::jit_out_preserve_size + out_arg_bytes, (int)frame::native_abi_reg_args_size); - int reg_save_area_size = compute_reg_save_area_size(abi); + MacroAssembler* _masm = new MacroAssembler(&buffer); + int reg_save_area_size = __ save_nonvolatile_registers_size(true, SuperwordUseVSX); RegSpiller arg_spiller(call_regs._arg_regs); RegSpiller result_spiller(call_regs._ret_regs); int res_save_area_offset = out_arg_area; int arg_save_area_offset = res_save_area_offset + result_spiller.spill_size_bytes(); int reg_save_area_offset = arg_save_area_offset + arg_spiller.spill_size_bytes(); + if (SuperwordUseVSX) { // VectorRegisters want alignment + reg_save_area_offset = align_up(reg_save_area_offset, StackAlignmentInBytes); + } int frame_data_offset = reg_save_area_offset + reg_save_area_size; int frame_bottom_offset = frame_data_offset + sizeof(UpcallStub::FrameData); @@ -201,7 +125,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, ////////////////////////////////////////////////////////////////////////////// - MacroAssembler* _masm = new MacroAssembler(&buffer); address start = __ function_entry(); // called by C __ save_LR_CR(R0); assert((abi._stack_alignment_bytes % 16) == 0, "must be 16 byte aligned"); @@ -212,7 +135,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, // (and maybe attach it). arg_spiller.generate_spill(_masm, arg_save_area_offset); // Java methods won't preserve them, so save them here: - preserve_callee_saved_registers(_masm, abi, reg_save_area_offset); + __ save_nonvolatile_registers(R1_SP, reg_save_area_offset, true, SuperwordUseVSX); // Java code uses TOC (pointer to code cache). __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); // reinit @@ -310,7 +233,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, __ call_c(call_target_address); __ block_comment("} on_exit"); - restore_callee_saved_registers(_masm, abi, reg_save_area_offset); + __ restore_nonvolatile_registers(R1_SP, reg_save_area_offset, true, SuperwordUseVSX); result_spiller.generate_fill(_masm, res_save_area_offset); diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.cpp b/src/hotspot/cpu/ppc/vmreg_ppc.cpp index d8a5c35cac0..2ed68578a80 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.cpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.cpp @@ -32,21 +32,29 @@ void VMRegImpl::set_regName() { for (i = 0; i < ConcreteRegisterImpl::max_gpr; ) { regName[i++] = reg->name(); regName[i++] = reg->name(); - if (reg->encoding() < Register::number_of_registers-1) + if (reg->encoding() < Register::number_of_registers - 1) { reg = reg->successor(); + } } FloatRegister freg = ::as_FloatRegister(0); for ( ; i < ConcreteRegisterImpl::max_fpr; ) { regName[i++] = freg->name(); regName[i++] = freg->name(); - if (reg->encoding() < FloatRegister::number_of_registers-1) + if (reg->encoding() < FloatRegister::number_of_registers - 1) { freg = freg->successor(); + } } VectorSRegister vsreg = ::as_VectorSRegister(0); for ( ; i < ConcreteRegisterImpl::max_vsr; ) { regName[i++] = vsreg->name(); + regName[i++] = vsreg->name(); + regName[i++] = vsreg->name(); + regName[i++] = vsreg->name(); + if (reg->encoding() < VectorSRegister::number_of_registers - 1) { + vsreg = vsreg->successor(); + } } for ( ; i < ConcreteRegisterImpl::number_of_registers; ) { diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.hpp b/src/hotspot/cpu/ppc/vmreg_ppc.hpp index b2d97a6d385..4e25c8b3cea 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.hpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2022 SAP SE. All rights reserved. + * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -62,12 +62,17 @@ inline FloatRegister as_FloatRegister() { inline VectorSRegister as_VectorSRegister() { assert(is_VectorSRegister(), "must be"); - return ::as_VectorSRegister(value() - ConcreteRegisterImpl::max_fpr); + return ::as_VectorSRegister((value() - ConcreteRegisterImpl::max_fpr) >> 2); } inline bool is_concrete() { assert(is_reg(), "must be"); - return is_even(value()); + if (is_Register() || is_FloatRegister()) return is_even(value()); + if (is_VectorSRegister()) { + int base = value() - ConcreteRegisterImpl::max_fpr; + return (base & 3) == 0; + } + return true; } #endif // CPU_PPC_VMREG_PPC_HPP diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp b/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp index afaefe50c97..2424df8da01 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.inline.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2022 SAP SE. All rights reserved. + * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,8 @@ inline VMReg FloatRegister::as_VMReg() const { } inline VMReg VectorSRegister::as_VMReg() const { - return VMRegImpl::as_VMReg((encoding()) + ConcreteRegisterImpl::max_fpr); + // Four halves, multiply by 4. + return VMRegImpl::as_VMReg((encoding() << 2) + ConcreteRegisterImpl::max_fpr); } inline VMReg ConditionRegister::as_VMReg() const {