jdk/src/hotspot/cpu/riscv/runtime_riscv.cpp
Fei Yang 8c87ea2bdf 8346478: RISC-V: Refactor add/sub assembler routines
Reviewed-by: fjiang, rehn, gcao
2025-01-09 11:36:40 +00:00

383 lines
14 KiB
C++

/*
* Copyright (c) 2024, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#ifdef COMPILER2
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/vmreg.hpp"
#include "interpreter/interpreter.hpp"
#include "opto/runtime.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/globalDefinitions.hpp"
#include "vmreg_riscv.inline.hpp"
class SimpleRuntimeFrame {
public:
// Most of the runtime stubs have this simple frame layout.
// This class exists to make the layout shared in one place.
// Offsets are for compiler stack slots, which are jints.
enum layout {
// The frame sender code expects that fp will be in the "natural" place and
// will override any oopMap setting for it. We must therefore force the layout
// so that it agrees with the frame sender code.
// we don't expect any arg reg save area so riscv asserts that
// frame::arg_reg_save_area_bytes == 0
fp_off = 0, fp_off2,
return_off, return_off2,
framesize
};
};
#define __ masm->
//------------------------------generate_uncommon_trap_blob--------------------
void OptoRuntime::generate_uncommon_trap_blob() {
// Allocate space for the code
ResourceMark rm;
// Setup code generation tools
CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
assert_cond(masm != nullptr);
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
address start = __ pc();
// Push self-frame. We get here with a return address in RA
// and sp should be 16 byte aligned
// push fp and retaddr by hand
__ subi(sp, sp, 2 * wordSize);
__ sd(ra, Address(sp, wordSize));
__ sd(fp, Address(sp, 0));
// we don't expect an arg reg save area
#ifndef PRODUCT
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
// compiler left unloaded_class_index in j_rarg0 move to where the
// runtime expects it.
__ sext(c_rarg1, j_rarg0, 32);
// we need to set the past SP to the stack pointer of the stub frame
// and the pc to the address where this runtime call will return
// although actually any pc in this code blob will do).
Label retaddr;
__ set_last_Java_frame(sp, noreg, retaddr, t0);
// Call C code. Need thread but NOT official VM entry
// crud. We cannot block on this call, no GC can happen. Call should
// capture callee-saved registers as well as return values.
//
// UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
//
// n.b. 3 gp args, 0 fp args, integral return type
__ mv(c_rarg0, xthread);
__ mv(c_rarg2, Deoptimization::Unpack_uncommon_trap);
__ rt_call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
__ bind(retaddr);
// Set an oopmap for the call site
OopMapSet* oop_maps = new OopMapSet();
OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
assert_cond(oop_maps != nullptr && map != nullptr);
// location of fp is known implicitly by the frame sender code
oop_maps->add_gc_map(__ pc() - start, map);
__ reset_last_Java_frame(false);
// move UnrollBlock* into x14
__ mv(x14, x10);
#ifdef ASSERT
{ Label L;
__ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset()));
__ mv(t1, Deoptimization::Unpack_uncommon_trap);
__ beq(t0, t1, L);
__ stop("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
__ bind(L);
}
#endif
// Pop all the frames we must move/replace.
//
// Frame picture (youngest to oldest)
// 1: self-frame (no frame link)
// 2: deopting frame (no frame link)
// 3: caller of deopting frame (could be compiled/interpreted).
__ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
// Pop deoptimized frame (int)
__ lwu(x12, Address(x14,
Deoptimization::UnrollBlock::
size_of_deoptimized_frame_offset()));
__ subi(x12, x12, 2 * wordSize);
__ add(sp, sp, x12);
__ ld(fp, Address(sp, 0));
__ ld(ra, Address(sp, wordSize));
__ addi(sp, sp, 2 * wordSize);
// RA should now be the return address to the caller (3) frame
#ifdef ASSERT
// Compilers generate code that bang the stack by as much as the
// interpreter would need. So this stack banging should never
// trigger a fault. Verify that it does not on non product builds.
__ lwu(x11, Address(x14,
Deoptimization::UnrollBlock::
total_frame_sizes_offset()));
__ bang_stack_size(x11, x12);
#endif
// Load address of array of frame pcs into x12 (address*)
__ ld(x12, Address(x14,
Deoptimization::UnrollBlock::frame_pcs_offset()));
// Load address of array of frame sizes into x15 (intptr_t*)
__ ld(x15, Address(x14,
Deoptimization::UnrollBlock::
frame_sizes_offset()));
// Counter
__ lwu(x13, Address(x14,
Deoptimization::UnrollBlock::
number_of_frames_offset())); // (int)
// Now adjust the caller's stack to make up for the extra locals but
// record the original sp so that we can save it in the skeletal
// interpreter frame and the stack walking of interpreter_sender
// will get the unextended sp value and not the "real" sp value.
const Register sender_sp = t1; // temporary register
__ lwu(x11, Address(x14,
Deoptimization::UnrollBlock::
caller_adjustment_offset())); // (int)
__ mv(sender_sp, sp);
__ sub(sp, sp, x11);
// Push interpreter frames in a loop
Label loop;
__ bind(loop);
__ ld(x11, Address(x15, 0)); // Load frame size
__ subi(x11, x11, 2 * wordSize); // We'll push pc and fp by hand
__ ld(ra, Address(x12, 0)); // Save return address
__ enter(); // and old fp & set new fp
__ sub(sp, sp, x11); // Prolog
__ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
// This value is corrected by layout_activation_impl
__ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
__ mv(sender_sp, sp); // Pass sender_sp to next frame
__ addi(x15, x15, wordSize); // Bump array pointer (sizes)
__ addi(x12, x12, wordSize); // Bump array pointer (pcs)
__ subiw(x13, x13, 1); // Decrement counter
__ bgtz(x13, loop);
__ ld(ra, Address(x12, 0)); // save final return address
// Re-push self-frame
__ enter(); // & old fp & set new fp
// Use fp because the frames look interpreted now
// Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
// Don't need the precise return PC here, just precise enough to point into this code blob.
address the_pc = __ pc();
__ set_last_Java_frame(sp, fp, the_pc, t0);
// Call C code. Need thread but NOT official VM entry
// crud. We cannot block on this call, no GC can happen. Call should
// restore return values to their stack-slots with the new SP.
//
// BasicType unpack_frames(JavaThread* thread, int exec_mode)
//
// n.b. 2 gp args, 0 fp args, integral return type
// sp should already be aligned
__ mv(c_rarg0, xthread);
__ mv(c_rarg1, Deoptimization::Unpack_uncommon_trap);
__ rt_call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
// Set an oopmap for the call site
// Use the same PC we used for the last java frame
oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
// Clear fp AND pc
__ reset_last_Java_frame(true);
// Pop self-frame.
__ leave(); // Epilog
// Jump to interpreter
__ ret();
// Make sure all code is generated
masm->flush();
_uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps,
SimpleRuntimeFrame::framesize >> 1);
}
//------------------------------generate_exception_blob---------------------------
// creates exception blob at the end
// Using exception blob, this code is jumped from a compiled method.
// (see emit_exception_handler in riscv.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
// (i.e. callee save registers) unwind the frame and jump to the
// exception handler for the nmethod if there is no Java level handler
// for the nmethod.
//
// This code is entered with a jmp.
//
// Arguments:
// x10: exception oop
// x13: exception pc
//
// Results:
// x10: exception oop
// x13: exception pc in caller
// destination: exception handler of caller
//
// Note: the exception pc MUST be at a call (precise debug information)
// Registers x10, x13, x12, x14, x15, t0 are not callee saved.
//
void OptoRuntime::generate_exception_blob() {
assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
// Allocate space for the code
ResourceMark rm;
// Setup code generation tools
CodeBuffer buffer("exception_blob", 2048, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
assert_cond(masm != nullptr);
// TODO check various assumptions made here
//
// make sure we do so before running this
address start = __ pc();
// push fp and retaddr by hand
// Exception pc is 'return address' for stack walker
__ subi(sp, sp, 2 * wordSize);
__ sd(ra, Address(sp, wordSize));
__ sd(fp, Address(sp));
// there are no callee save registers and we don't expect an
// arg reg save area
#ifndef PRODUCT
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
// Store exception in Thread object. We cannot pass any arguments to the
// handle_exception call, since we do not want to make any assumption
// about the size of the frame where the exception happened in.
__ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
__ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
// This call does all the hard work. It checks if an exception handler
// exists in the method.
// If so, it returns the handler address.
// If not, it prepares for stack-unwinding, restoring the callee-save
// registers of the frame being removed.
//
// address OptoRuntime::handle_exception_C(JavaThread* thread)
//
// n.b. 1 gp arg, 0 fp args, integral return type
// the stack should always be aligned
address the_pc = __ pc();
__ set_last_Java_frame(sp, noreg, the_pc, t0);
__ mv(c_rarg0, xthread);
__ rt_call(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C));
// handle_exception_C is a special VM call which does not require an explicit
// instruction sync afterwards.
// Set an oopmap for the call site. This oopmap will only be used if we
// are unwinding the stack. Hence, all locations will be dead.
// Callee-saved registers will be the same as the frame above (i.e.,
// handle_exception_stub), since they were restored when we got the
// exception.
OopMapSet* oop_maps = new OopMapSet();
assert_cond(oop_maps != nullptr);
oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
__ reset_last_Java_frame(false);
// Restore callee-saved registers
// fp is an implicitly saved callee saved register (i.e. the calling
// convention will save restore it in prolog/epilog) Other than that
// there are no callee save registers now that adapter frames are gone.
// and we dont' expect an arg reg save area
__ ld(fp, Address(sp));
__ ld(x13, Address(sp, wordSize));
__ addi(sp, sp, 2 * wordSize);
// x10: exception handler
// We have a handler in x10 (could be deopt blob).
__ mv(t1, x10);
// Get the exception oop
__ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
// Get the exception pc in case we are deoptimized
__ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
#ifdef ASSERT
__ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
__ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
#endif
// Clear the exception oop so GC no longer processes it as a root.
__ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
// x10: exception oop
// t1: exception handler
// x14: exception pc
// Jump to handler
__ jr(t1);
// Make sure all code is generated
masm->flush();
// Set exception blob
_exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
}
#endif // COMPILER2