jdk/src/hotspot/cpu/riscv/assembler_riscv.hpp
Anjian Wen e635330ae1 8373069: RISC-V: implement GHASH intrinsic
Reviewed-by: fjiang, fyang
2025-12-17 02:41:19 +00:00

4084 lines
161 KiB
C++

/*
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef CPU_RISCV_ASSEMBLER_RISCV_HPP
#define CPU_RISCV_ASSEMBLER_RISCV_HPP
#include "asm/assembler.hpp"
#include "asm/register.hpp"
#include "code/codeCache.hpp"
#include "cppstdlib/type_traits.hpp"
#include "metaprogramming/enableIf.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
#define XLEN 64
// definitions of various symbolic names for machine registers
// First intercalls between C and Java which use 8 general registers
// and 8 floating registers
class Argument {
public:
enum {
// check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
n_vector_register_parameters_c = 16, // v8, v9, ... v23
n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...)
};
};
// function argument(caller-save registers)
constexpr Register c_rarg0 = x10;
constexpr Register c_rarg1 = x11;
constexpr Register c_rarg2 = x12;
constexpr Register c_rarg3 = x13;
constexpr Register c_rarg4 = x14;
constexpr Register c_rarg5 = x15;
constexpr Register c_rarg6 = x16;
constexpr Register c_rarg7 = x17;
constexpr FloatRegister c_farg0 = f10;
constexpr FloatRegister c_farg1 = f11;
constexpr FloatRegister c_farg2 = f12;
constexpr FloatRegister c_farg3 = f13;
constexpr FloatRegister c_farg4 = f14;
constexpr FloatRegister c_farg5 = f15;
constexpr FloatRegister c_farg6 = f16;
constexpr FloatRegister c_farg7 = f17;
// Symbolically name the register arguments used by the Java calling convention.
// We have control over the convention for java so we can do what we please.
// What pleases us is to offset the java calling convention so that when
// we call a suitable jni method the arguments are lined up and we don't
// have to do much shuffling. A suitable jni method is non-static and a
// small number of arguments.
//
// |------------------------------------------------------------------------|
// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 |
// |------------------------------------------------------------------------|
// | x10 x11 x12 x13 x14 x15 x16 x17 |
// |------------------------------------------------------------------------|
// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 |
// |------------------------------------------------------------------------|
constexpr Register j_rarg0 = c_rarg1;
constexpr Register j_rarg1 = c_rarg2;
constexpr Register j_rarg2 = c_rarg3;
constexpr Register j_rarg3 = c_rarg4;
constexpr Register j_rarg4 = c_rarg5;
constexpr Register j_rarg5 = c_rarg6;
constexpr Register j_rarg6 = c_rarg7;
constexpr Register j_rarg7 = c_rarg0;
// Java floating args are passed as per C
constexpr FloatRegister j_farg0 = f10;
constexpr FloatRegister j_farg1 = f11;
constexpr FloatRegister j_farg2 = f12;
constexpr FloatRegister j_farg3 = f13;
constexpr FloatRegister j_farg4 = f14;
constexpr FloatRegister j_farg5 = f15;
constexpr FloatRegister j_farg6 = f16;
constexpr FloatRegister j_farg7 = f17;
// zero rigster
constexpr Register zr = x0;
// global pointer
constexpr Register gp = x3;
// thread pointer
constexpr Register tp = x4;
// registers used to hold VM data either temporarily within a method
// or across method calls
// volatile (caller-save) registers
// current method -- must be in a call-clobbered register
constexpr Register xmethod = x31;
// return address
constexpr Register ra = x1;
// non-volatile (callee-save) registers
constexpr Register sp = x2; // stack pointer
constexpr Register fp = x8; // frame pointer
constexpr Register xheapbase = x27; // base of heap
constexpr Register xcpool = x26; // constant pool cache
constexpr Register xmonitors = x25; // monitors allocated on stack
constexpr Register xlocals = x24; // locals on stack
constexpr Register xthread = x23; // java thread pointer
constexpr Register xbcp = x22; // bytecode pointer
constexpr Register xdispatch = x21; // Dispatch table base
constexpr Register esp = x20; // Java expression stack pointer
constexpr Register x19_sender_sp = x19; // Sender's SP while in interpreter
// temporary register(caller-save registers)
constexpr Register t0 = x5;
constexpr Register t1 = x6;
constexpr Register t2 = x7;
constexpr Register t3 = x28;
constexpr Register t4 = x29;
constexpr Register t5 = x30;
constexpr Register t6 = x31;
const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
};
const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = {
c_farg0, c_farg1, c_farg2, c_farg3, c_farg4, c_farg5, c_farg6, c_farg7
};
#define assert_cond(ARG1) assert(ARG1, #ARG1)
// Addressing modes
class Address {
public:
enum mode { no_mode, base_plus_offset, literal };
private:
struct Nonliteral {
Nonliteral(Register base, Register index, int64_t offset)
: _base(base), _index(index), _offset(offset) {}
Register _base;
Register _index;
int64_t _offset;
};
struct Literal {
Literal(address target, const RelocationHolder& rspec)
: _target(target), _rspec(rspec) {}
// If the target is far we'll need to load the ea of this to a
// register to reach it. Otherwise if near we can do PC-relative
// addressing.
address _target;
RelocationHolder _rspec;
};
void assert_is_nonliteral() const NOT_DEBUG_RETURN;
void assert_is_literal() const NOT_DEBUG_RETURN;
// Discriminated union, based on _mode.
// - no_mode: uses dummy _nonliteral, for ease of copying.
// - literal: only _literal is used.
// - others: only _nonliteral is used.
enum mode _mode;
union {
Nonliteral _nonliteral;
Literal _literal;
};
// Helper for copy constructor and assignment operator.
// Copy mode-relevant part of a into this.
void copy_data(const Address& a) {
assert(_mode == a._mode, "precondition");
if (_mode == literal) {
new (&_literal) Literal(a._literal);
} else {
// non-literal mode or no_mode.
new (&_nonliteral) Nonliteral(a._nonliteral);
}
}
public:
// no_mode initializes _nonliteral for ease of copying.
Address() :
_mode(no_mode),
_nonliteral(noreg, noreg, 0)
{}
Address(Register r) :
_mode(base_plus_offset),
_nonliteral(r, noreg, 0)
{}
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
Address(Register r, T o) :
_mode(base_plus_offset),
_nonliteral(r, noreg, o)
{}
Address(Register r, ByteSize disp) : Address(r, in_bytes(disp)) {}
Address(address target, const RelocationHolder& rspec) :
_mode(literal),
_literal(target, rspec)
{}
Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
Address(const Address& a) : _mode(a._mode) { copy_data(a); }
// Verify the value is trivially destructible regardless of mode, so our
// destructor can also be trivial, and so our assignment operator doesn't
// need to destruct the old value before copying over it.
static_assert(std::is_trivially_destructible<Literal>::value, "must be");
static_assert(std::is_trivially_destructible<Nonliteral>::value, "must be");
Address& operator=(const Address& a) {
_mode = a._mode;
copy_data(a);
return *this;
}
~Address() = default;
const Register base() const {
assert_is_nonliteral();
return _nonliteral._base;
}
long offset() const {
assert_is_nonliteral();
return _nonliteral._offset;
}
Register index() const {
assert_is_nonliteral();
return _nonliteral._index;
}
mode getMode() const {
return _mode;
}
bool uses(Register reg) const {
return _mode != literal && base() == reg;
}
address target() const {
assert_is_literal();
return _literal._target;
}
const RelocationHolder& rspec() const {
assert_is_literal();
return _literal._rspec;
}
};
// Convenience classes
class RuntimeAddress: public Address {
public:
RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
~RuntimeAddress() {}
};
class OopAddress: public Address {
public:
OopAddress(address target) : Address(target, relocInfo::oop_type) {}
~OopAddress() {}
};
class ExternalAddress: public Address {
private:
static relocInfo::relocType reloc_for_target(address target) {
// Sometimes ExternalAddress is used for values which aren't
// exactly addresses, like the card table base.
// external_word_type can't be used for values in the first page
// so just skip the reloc in that case.
return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
}
public:
ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
~ExternalAddress() {}
};
class InternalAddress: public Address {
public:
InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
~InternalAddress() {}
};
class Assembler : public AbstractAssembler {
protected:
static int zfa_zli_lookup_double(uint64_t value) {
switch(value) {
case 0xbff0000000000000 : return 0;
case 0x0010000000000000 : return 1;
case 0x3ef0000000000000 : return 2;
case 0x3f00000000000000 : return 3;
case 0x3f70000000000000 : return 4;
case 0x3f80000000000000 : return 5;
case 0x3fb0000000000000 : return 6;
case 0x3fc0000000000000 : return 7;
case 0x3fd0000000000000 : return 8;
case 0x3fd4000000000000 : return 9;
case 0x3fd8000000000000 : return 10;
case 0x3fdc000000000000 : return 11;
case 0x3fe0000000000000 : return 12;
case 0x3fe4000000000000 : return 13;
case 0x3fe8000000000000 : return 14;
case 0x3fec000000000000 : return 15;
case 0x3ff0000000000000 : return 16;
case 0x3ff4000000000000 : return 17;
case 0x3ff8000000000000 : return 18;
case 0x3ffc000000000000 : return 19;
case 0x4000000000000000 : return 20;
case 0x4004000000000000 : return 21;
case 0x4008000000000000 : return 22;
case 0x4010000000000000 : return 23;
case 0x4020000000000000 : return 24;
case 0x4030000000000000 : return 25;
case 0x4060000000000000 : return 26;
case 0x4070000000000000 : return 27;
case 0x40e0000000000000 : return 28;
case 0x40f0000000000000 : return 29;
case 0x7ff0000000000000 : return 30;
case 0x7ff8000000000000 : return 31;
default: break;
}
return -1;
}
static int zfa_zli_lookup_float(uint32_t value) {
switch(value) {
case 0xbf800000 : return 0;
case 0x00800000 : return 1;
case 0x37800000 : return 2;
case 0x38000000 : return 3;
case 0x3b800000 : return 4;
case 0x3c000000 : return 5;
case 0x3d800000 : return 6;
case 0x3e000000 : return 7;
case 0x3e800000 : return 8;
case 0x3ea00000 : return 9;
case 0x3ec00000 : return 10;
case 0x3ee00000 : return 11;
case 0x3f000000 : return 12;
case 0x3f200000 : return 13;
case 0x3f400000 : return 14;
case 0x3f600000 : return 15;
case 0x3f800000 : return 16;
case 0x3fa00000 : return 17;
case 0x3fc00000 : return 18;
case 0x3fe00000 : return 19;
case 0x40000000 : return 20;
case 0x40200000 : return 21;
case 0x40400000 : return 22;
case 0x40800000 : return 23;
case 0x41000000 : return 24;
case 0x41800000 : return 25;
case 0x43000000 : return 26;
case 0x43800000 : return 27;
case 0x47000000 : return 28;
case 0x47800000 : return 29;
case 0x7f800000 : return 30;
case 0x7fc00000 : return 31;
default: break;
}
return -1;
}
static int zfa_zli_lookup_half_float(uint16_t value) {
switch(value) {
case 0xbc00 : return 0;
case 0x0400 : return 1;
case 0x0100 : return 2;
case 0x0200 : return 3;
case 0x1c00 : return 4;
case 0x2000 : return 5;
case 0x2c00 : return 6;
case 0x3000 : return 7;
case 0x3400 : return 8;
case 0x3500 : return 9;
case 0x3600 : return 10;
case 0x3700 : return 11;
case 0x3800 : return 12;
case 0x3900 : return 13;
case 0x3a00 : return 14;
case 0x3b00 : return 15;
case 0x3c00 : return 16;
case 0x3d00 : return 17;
case 0x3e00 : return 18;
case 0x3f00 : return 19;
case 0x4000 : return 20;
case 0x4100 : return 21;
case 0x4200 : return 22;
case 0x4400 : return 23;
case 0x4800 : return 24;
case 0x4c00 : return 25;
case 0x5800 : return 26;
case 0x5c00 : return 27;
case 0x7800 : return 28;
case 0x7c00 : return 29;
// case 0x7c00 : return 30; // redundant with 29
case 0x7e00 : return 31;
default: break;
}
return -1;
}
public:
static bool can_zfa_zli_half_float(jshort hf) {
if (!UseZfa || !UseZfh) {
return false;
}
uint16_t hf_bits = (uint16_t)hf;
return zfa_zli_lookup_half_float(hf_bits) != -1;
}
static bool can_zfa_zli_float(jfloat f) {
if (!UseZfa) {
return false;
}
uint32_t f_bits = (uint32_t)jint_cast(f);
return zfa_zli_lookup_float(f_bits) != -1;
}
static bool can_zfa_zli_double(jdouble d) {
if (!UseZfa) {
return false;
}
uint64_t d_bits = (uint64_t)julong_cast(d);
return zfa_zli_lookup_double(d_bits) != -1;
}
enum {
instruction_size = 4,
compressed_instruction_size = 2,
};
// instruction must start at passed address
static bool is_compressed_instr(address instr) {
// The RISC-V ISA Manual, Section 'Base Instruction-Length Encoding':
// Instructions are stored in memory as a sequence of 16-bit little-endian parcels, regardless of
// memory system endianness. Parcels forming one instruction are stored at increasing halfword
// addresses, with the lowest-addressed parcel holding the lowest-numbered bits in the instruction
// specification.
if (UseRVC && (((uint16_t *)instr)[0] & 0b11) != 0b11) {
// 16-bit instructions have their lowest two bits equal to 0b00, 0b01, or 0b10
return true;
}
// 32-bit instructions have their lowest two bits set to 0b11
return false;
}
//---< calculate length of instruction >---
// We just use the values set above.
// instruction must start at passed address
static unsigned int instr_len(address instr) {
return is_compressed_instr(instr) ? compressed_instruction_size : instruction_size;
}
//---< longest instructions >---
static unsigned int instr_maxlen() { return instruction_size; }
enum RoundingMode {
rne = 0b000, // round to Nearest, ties to Even
rtz = 0b001, // round towards Zero
rdn = 0b010, // round Down (towards eegative infinity)
rup = 0b011, // round Up (towards infinity)
rmm = 0b100, // round to Nearest, ties to Max Magnitude
rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid.
};
// handle unaligned access
static inline uint16_t ld_c_instr(address addr) {
return Bytes::get_native_u2(addr);
}
static inline void sd_c_instr(address addr, uint16_t c_instr) {
Bytes::put_native_u2(addr, c_instr);
}
// handle unaligned access
static inline uint32_t ld_instr(address addr) {
return Bytes::get_native_u4(addr);
}
static inline void sd_instr(address addr, uint32_t instr) {
Bytes::put_native_u4(addr, instr);
}
static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) {
assert_cond(msb >= lsb && msb <= 31);
unsigned nbits = msb - lsb + 1;
uint32_t mask = (1U << nbits) - 1;
uint32_t result = val >> lsb;
result &= mask;
return result;
}
static inline int32_t sextract(uint32_t val, unsigned msb, unsigned lsb) {
assert_cond(msb >= lsb && msb <= 31);
int32_t result = val << (31 - msb);
result >>= (31 - msb + lsb);
return result;
}
static void patch(address a, unsigned msb, unsigned lsb, unsigned val) {
assert_cond(a != nullptr);
assert_cond(msb >= lsb && msb <= 31);
unsigned nbits = msb - lsb + 1;
guarantee(val < (1U << nbits), "Field too big for insn");
unsigned mask = (1U << nbits) - 1;
val <<= lsb;
mask <<= lsb;
unsigned target = ld_instr(a);
target &= ~mask;
target |= val;
sd_instr(a, target);
}
static void patch(address a, unsigned bit, unsigned val) {
patch(a, bit, bit, val);
}
static void patch_reg(address a, unsigned lsb, Register reg) {
patch(a, lsb + 4, lsb, reg->raw_encoding());
}
static void patch_reg(address a, unsigned lsb, FloatRegister reg) {
patch(a, lsb + 4, lsb, reg->raw_encoding());
}
static void patch_reg(address a, unsigned lsb, VectorRegister reg) {
patch(a, lsb + 4, lsb, reg->raw_encoding());
}
void emit(unsigned insn) {
emit_int32((jint)insn);
}
enum csr {
cycle = 0xc00,
time,
instret,
hpmcounter3,
hpmcounter4,
hpmcounter5,
hpmcounter6,
hpmcounter7,
hpmcounter8,
hpmcounter9,
hpmcounter10,
hpmcounter11,
hpmcounter12,
hpmcounter13,
hpmcounter14,
hpmcounter15,
hpmcounter16,
hpmcounter17,
hpmcounter18,
hpmcounter19,
hpmcounter20,
hpmcounter21,
hpmcounter22,
hpmcounter23,
hpmcounter24,
hpmcounter25,
hpmcounter26,
hpmcounter27,
hpmcounter28,
hpmcounter29,
hpmcounter30,
hpmcounter31 = 0xc1f
};
// Emit an illegal instruction that's known to trap, with 32 read-only CSR
// to choose as the input operand.
// According to the RISC-V Assembly Programmer's Manual, a de facto implementation
// of this instruction is the UNIMP pseduo-instruction, 'CSRRW x0, cycle, x0',
// attempting to write zero to a read-only CSR 'cycle' (0xC00).
// RISC-V ISAs provide a set of up to 32 read-only CSR registers 0xC00-0xC1F,
// and an attempt to write into any read-only CSR (whether it exists or not)
// will generate an illegal instruction exception.
void illegal_instruction(csr csr_reg) {
csrrw(x0, (unsigned)csr_reg, x0);
}
// Register Instruction
#define INSN(NAME, op, funct3, funct7) \
void NAME(Register Rd, Register Rs1, Register Rs2) { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 31, 25, funct7); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
patch_reg((address)&insn, 20, Rs2); \
emit(insn); \
}
INSN(_add, 0b0110011, 0b000, 0b0000000);
INSN(_sub, 0b0110011, 0b000, 0b0100000);
INSN(_andr, 0b0110011, 0b111, 0b0000000);
INSN(_orr, 0b0110011, 0b110, 0b0000000);
INSN(_xorr, 0b0110011, 0b100, 0b0000000);
INSN(sll, 0b0110011, 0b001, 0b0000000);
INSN(sra, 0b0110011, 0b101, 0b0100000);
INSN(srl, 0b0110011, 0b101, 0b0000000);
INSN(slt, 0b0110011, 0b010, 0b0000000);
INSN(sltu, 0b0110011, 0b011, 0b0000000);
INSN(_addw, 0b0111011, 0b000, 0b0000000);
INSN(_subw, 0b0111011, 0b000, 0b0100000);
INSN(sllw, 0b0111011, 0b001, 0b0000000);
INSN(sraw, 0b0111011, 0b101, 0b0100000);
INSN(srlw, 0b0111011, 0b101, 0b0000000);
INSN(_mul, 0b0110011, 0b000, 0b0000001);
INSN(mulh, 0b0110011, 0b001, 0b0000001);
INSN(mulhsu,0b0110011, 0b010, 0b0000001);
INSN(mulhu, 0b0110011, 0b011, 0b0000001);
INSN(mulw, 0b0111011, 0b000, 0b0000001);
INSN(div, 0b0110011, 0b100, 0b0000001);
INSN(divu, 0b0110011, 0b101, 0b0000001);
INSN(divw, 0b0111011, 0b100, 0b0000001);
INSN(divuw, 0b0111011, 0b101, 0b0000001);
INSN(rem, 0b0110011, 0b110, 0b0000001);
INSN(remu, 0b0110011, 0b111, 0b0000001);
INSN(remw, 0b0111011, 0b110, 0b0000001);
INSN(remuw, 0b0111011, 0b111, 0b0000001);
#undef INSN
private:
// Load
enum LoadWidthFunct3 : uint8_t {
LOAD_WIDTH_BYTE = 0b000,
LOAD_WIDTH_HALFWORD = 0b001,
LOAD_WIDTH_WORD = 0b010,
LOAD_WIDTH_DOUBLEWORD = 0b011,
LOAD_WIDTH_BYTE_UNSIGNED = 0b100,
LOAD_WIDTH_HALFWORD_UNSIGNED = 0b101,
LOAD_WIDTH_WORD_UNSIGNED = 0b110,
// 0b111 is reserved
};
static constexpr uint8_t OP_LOAD_MAJOR = 0b0000011;
static constexpr uint8_t OP_FP_LOAD_MAJOR = 0b0000111;
template <uint8_t op_major, LoadWidthFunct3 width>
void load_base(uint8_t Rd, Register Rs, const int32_t offset) {
guarantee(is_simm12(offset), "offset is invalid.");
unsigned insn = 0;
int32_t val = offset & 0xfff;
patch((address)&insn, 6, 0, op_major);
patch((address)&insn, 11, 7, Rd);
patch((address)&insn, 14, 12, width);
patch_reg((address)&insn, 15, Rs);
patch((address)&insn, 31, 20, val);
emit(insn);
}
template <LoadWidthFunct3 width>
void load_base(Register Rd, Register Rs, const int32_t offset) {
load_base<OP_LOAD_MAJOR, width>(Rd->raw_encoding(), Rs, offset);
}
template <LoadWidthFunct3 width>
void load_base(FloatRegister Rd, Register Rs, const int32_t offset) {
load_base<OP_FP_LOAD_MAJOR, width>(Rd->raw_encoding(), Rs, offset);
}
public:
void lb(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_BYTE>(Rd, Rs, offset);
}
void _lbu(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_BYTE_UNSIGNED>(Rd, Rs, offset);
}
void _lh(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_HALFWORD>(Rd, Rs, offset);
}
void _lhu(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_HALFWORD_UNSIGNED>(Rd, Rs, offset);
}
void _lw(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_WORD>(Rd, Rs, offset);
}
void lwu(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_WORD_UNSIGNED>(Rd, Rs, offset);
}
void _ld(Register Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_DOUBLEWORD>(Rd, Rs, offset);
}
void flh(FloatRegister Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_HALFWORD>(Rd, Rs, offset);
}
void flw(FloatRegister Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_WORD>(Rd, Rs, offset);
}
void _fld(FloatRegister Rd, Register Rs, const int32_t offset) {
load_base<LOAD_WIDTH_DOUBLEWORD>(Rd, Rs, offset);
}
#define INSN(NAME, op, funct3) \
void NAME(Register Rs1, Register Rs2, const int64_t offset) { \
guarantee(is_simm13(offset) && ((offset % 2) == 0), "offset is invalid."); \
unsigned insn = 0; \
uint32_t val = offset & 0x1fff; \
uint32_t val11 = (val >> 11) & 0x1; \
uint32_t val12 = (val >> 12) & 0x1; \
uint32_t low = (val >> 1) & 0xf; \
uint32_t high = (val >> 5) & 0x3f; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch_reg((address)&insn, 15, Rs1); \
patch_reg((address)&insn, 20, Rs2); \
patch((address)&insn, 7, val11); \
patch((address)&insn, 11, 8, low); \
patch((address)&insn, 30, 25, high); \
patch((address)&insn, 31, val12); \
emit(insn); \
}
INSN(beq, 0b1100011, 0b000);
INSN(bne, 0b1100011, 0b001);
INSN(bge, 0b1100011, 0b101);
INSN(bgeu, 0b1100011, 0b111);
INSN(blt, 0b1100011, 0b100);
INSN(bltu, 0b1100011, 0b110);
#undef INSN
private:
enum StoreWidthFunct3 : uint8_t {
STORE_WIDTH_BYTE = 0b000,
STORE_WIDTH_HALFWORD = 0b001,
STORE_WIDTH_WORD = 0b010,
STORE_WIDTH_DOUBLEWORD = 0b011,
// 0b100 to 0b111 are reserved for this opcode
};
static constexpr uint8_t OP_STORE_MAJOR = 0b0100011;
static constexpr uint8_t OP_FP_STORE_MAJOR = 0b0100111;
template <uint8_t op_code, StoreWidthFunct3 width>
void store_base(uint8_t Rs2, Register Rs1, const int32_t offset) {
guarantee(is_simm12(offset), "offset is invalid.");
unsigned insn = 0;
uint32_t val = offset & 0xfff;
uint32_t low = val & 0x1f;
uint32_t high = (val >> 5) & 0x7f;
patch((address)&insn, 6, 0, op_code);
patch((address)&insn, 11, 7, low);
patch((address)&insn, 14, 12, width);
patch_reg((address)&insn, 15, Rs1);
patch((address)&insn, 24, 20, Rs2);
patch((address)&insn, 31, 25, high);
emit(insn);
}
template <StoreWidthFunct3 width>
void store_base(Register Rs2, Register Rs1, const int32_t offset) {
store_base<OP_STORE_MAJOR, width>(Rs2->raw_encoding(), Rs1, offset);
}
template <StoreWidthFunct3 width>
void store_base(FloatRegister Rs2, Register Rs1, const int32_t offset) {
store_base<OP_FP_STORE_MAJOR, width>(Rs2->raw_encoding(), Rs1, offset);
}
public:
void _sb(Register Rs2, Register Rs1, const int32_t offset) {
store_base<STORE_WIDTH_BYTE>(Rs2, Rs1, offset);
}
void _sh(Register Rs2, Register Rs1, const int32_t offset) {
store_base<STORE_WIDTH_HALFWORD>(Rs2, Rs1, offset);
}
void _sw(Register Rs2, Register Rs1, const int32_t offset) {
store_base<STORE_WIDTH_WORD>(Rs2, Rs1, offset);
}
void _sd(Register Rs2, Register Rs1, const int32_t offset) {
store_base<STORE_WIDTH_DOUBLEWORD>(Rs2, Rs1, offset);
}
void fsw(FloatRegister Rs2, Register Rs1, const int32_t offset) {
store_base<STORE_WIDTH_WORD>(Rs2, Rs1, offset);
}
void _fsd(FloatRegister Rs2, Register Rs1, const int32_t offset) {
store_base<STORE_WIDTH_DOUBLEWORD>(Rs2, Rs1, offset);
}
#define INSN(NAME, op, funct3) \
void NAME(Register Rd, const uint32_t csr, Register Rs1) { \
guarantee(is_uimm12(csr), "csr is invalid"); \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
patch((address)&insn, 31, 20, csr); \
emit(insn); \
}
INSN(csrrw, 0b1110011, 0b001);
INSN(csrrs, 0b1110011, 0b010);
INSN(csrrc, 0b1110011, 0b011);
#undef INSN
#define INSN(NAME, op, funct3) \
void NAME(Register Rd, const uint32_t csr, const uint32_t uimm) { \
guarantee(is_uimm12(csr), "csr is invalid"); \
guarantee(is_uimm5(uimm), "uimm is invalid"); \
unsigned insn = 0; \
uint32_t val = uimm & 0x1f; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch_reg((address)&insn, 7, Rd); \
patch((address)&insn, 19, 15, val); \
patch((address)&insn, 31, 20, csr); \
emit(insn); \
}
INSN(csrrwi, 0b1110011, 0b101);
INSN(csrrsi, 0b1110011, 0b110);
INSN(csrrci, 0b1110011, 0b111);
#undef INSN
private:
// All calls and jumps must go via MASM.
// Format J-type
void _jal(Register Rd, const int32_t offset) {
guarantee(is_simm21(offset) && ((offset % 2) == 0), "offset is invalid.");
unsigned insn = 0;
patch((address)&insn, 6, 0, 0b1101111);
patch_reg((address)&insn, 7, Rd);
patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));
patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1));
patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));
patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));
emit(insn);
}
// Format I-type
void _jalr(Register Rd, Register Rs, const int32_t offset) {
guarantee(is_simm12(offset), "offset is invalid.");
unsigned insn = 0;
patch((address)&insn, 6, 0, 0b1100111);
patch_reg((address)&insn, 7, Rd);
patch((address)&insn, 14, 12, 0b000);
patch_reg((address)&insn, 15, Rs);
int32_t val = offset & 0xfff;
patch((address)&insn, 31, 20, val);
emit(insn);
}
public:
static uint32_t encode_csrrw(Register Rd, const uint32_t csr, Register Rs1) {
guarantee(is_uimm12(csr), "csr is invalid");
uint32_t insn = 0;
patch((address)&insn, 6, 0, 0b1110011);
patch((address)&insn, 14, 12, 0b001);
patch_reg((address)&insn, 7, Rd);
patch_reg((address)&insn, 15, Rs1);
patch((address)&insn, 31, 20, csr);
return insn;
}
static uint32_t encode_jal(Register Rd, const int32_t offset) {
guarantee(is_simm21(offset) && ((offset % 2) == 0), "offset is invalid.");
uint32_t insn = 0;
patch((address)&insn, 6, 0, 0b1101111);
patch_reg((address)&insn, 7, Rd);
patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));
patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1));
patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));
patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));
return insn;
}
static uint32_t encode_jalr(Register Rd, Register Rs, const int32_t offset) {
guarantee(is_simm12(offset), "offset is invalid.");
uint32_t insn = 0;
patch((address)&insn, 6, 0, 0b1100111);
patch_reg((address)&insn, 7, Rd);
patch((address)&insn, 14, 12, 0b000);
patch_reg((address)&insn, 15, Rs);
int32_t val = offset & 0xfff;
patch((address)&insn, 31, 20, val);
return insn;
}
protected:
enum barrier {
i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001,
ir = i | r, ow = o | w, iorw = i | o | r | w
};
void fence(const uint32_t predecessor, const uint32_t successor) {
unsigned insn = 0;
guarantee(predecessor < 16, "predecessor is invalid");
guarantee(successor < 16, "successor is invalid");
patch((address)&insn, 6, 0, 0b001111); // opcode
patch((address)&insn, 11, 7, 0b00000); // rd
patch((address)&insn, 14, 12, 0b000);
patch((address)&insn, 19, 15, 0b00000); // rs1
patch((address)&insn, 23, 20, successor); // succ
patch((address)&insn, 27, 24, predecessor); // pred
patch((address)&insn, 31, 28, 0b0000); // fm
emit(insn);
}
void fencei() {
unsigned insn = 0;
patch((address)&insn, 6, 0, 0b0001111); // opcode
patch((address)&insn, 11, 7, 0b00000); // rd
patch((address)&insn, 14, 12, 0b001); // func
patch((address)&insn, 19, 15, 0b00000); // rs1
patch((address)&insn, 31, 20, 0b000000000000); // fm
emit(insn);
}
public:
#define INSN(NAME, op, funct3, funct7) \
void NAME() { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 11, 7, 0b00000); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 19, 15, 0b00000); \
patch((address)&insn, 31, 20, funct7); \
emit(insn); \
}
INSN(ecall, 0b1110011, 0b000, 0b000000000000);
INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
#undef INSN
enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
private:
enum AmoWidthFunct3 : uint8_t {
AMO_WIDTH_BYTE = 0b000, // Zabha extension
AMO_WIDTH_HALFWORD = 0b001, // Zabha extension
AMO_WIDTH_WORD = 0b010,
AMO_WIDTH_DOUBLEWORD = 0b011,
AMO_WIDTH_QUADWORD = 0b100,
// 0b101 to 0b111 are reserved
};
enum AmoOperationFunct5 : uint8_t {
AMO_ADD = 0b00000,
AMO_SWAP = 0b00001,
AMO_LR = 0b00010,
AMO_SC = 0b00011,
AMO_XOR = 0b00100,
AMO_OR = 0b01000,
AMO_AND = 0b01100,
AMO_MIN = 0b10000,
AMO_MAX = 0b10100,
AMO_MINU = 0b11000,
AMO_MAXU = 0b11100,
AMO_CAS = 0b00101 // Zacas
};
static constexpr uint32_t OP_AMO_MAJOR = 0b0101111;
template <AmoOperationFunct5 funct5, AmoWidthFunct3 width>
void amo_base(Register Rd, Register Rs1, uint8_t Rs2, Aqrl memory_order = aqrl) {
assert(width > AMO_WIDTH_HALFWORD || UseZabha, "Must be");
assert(funct5 != AMO_CAS || UseZacas, "Must be");
unsigned insn = 0;
patch((address)&insn, 6, 0, OP_AMO_MAJOR);
patch_reg((address)&insn, 7, Rd);
patch((address)&insn, 14, 12, width);
patch_reg((address)&insn, 15, Rs1);
patch((address)&insn, 24, 20, Rs2);
patch((address)&insn, 26, 25, memory_order);
patch((address)&insn, 31, 27, funct5);
emit(insn);
}
template <AmoOperationFunct5 funct5, AmoWidthFunct3 width>
void amo_base(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<funct5, width>(Rd, Rs1, Rs2->raw_encoding(), memory_order);
}
public:
void amoadd_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_ADD, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amoadd_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_ADD, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoadd_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_ADD, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amoadd_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_ADD, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoswap_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_SWAP, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amoswap_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_SWAP, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoswap_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_SWAP, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amoswap_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_SWAP, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoxor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_XOR, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amoxor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_XOR, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoxor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_XOR, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amoxor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_XOR, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_OR, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amoor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_OR, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_OR, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amoor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_OR, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoand_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_AND, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amoand_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_AND, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amoand_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_AND, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amoand_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_AND, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amomin_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MIN, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amomin_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MIN, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amomin_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MIN, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amomin_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MIN, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amominu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MINU, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amominu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MINU, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amominu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MINU, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amominu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MINU, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amomax_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAX, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amomax_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAX, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amomax_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAX, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amomax_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAX, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amomaxu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAXU, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amomaxu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAXU, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amomaxu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAXU, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amomaxu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_MAXU, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
protected:
void lr_w(Register Rd, Register Rs1, Aqrl memory_order = aqrl) {
amo_base<AMO_LR, AMO_WIDTH_WORD>(Rd, Rs1, 0, memory_order);
}
void lr_d(Register Rd, Register Rs1, Aqrl memory_order = aqrl) {
amo_base<AMO_LR, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, 0, memory_order);
}
void sc_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_SC, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void sc_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_SC, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
void amocas_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_CAS, AMO_WIDTH_BYTE>(Rd, Rs1, Rs2, memory_order);
}
void amocas_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_CAS, AMO_WIDTH_HALFWORD>(Rd, Rs1, Rs2, memory_order);
}
void amocas_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_CAS, AMO_WIDTH_WORD>(Rd, Rs1, Rs2, memory_order);
}
void amocas_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {
amo_base<AMO_CAS, AMO_WIDTH_DOUBLEWORD>(Rd, Rs1, Rs2, memory_order);
}
public:
enum operand_size { int8, int16, int32, uint32, int64 };
// Immediate Instruction
#define INSN(NAME, op, funct3) \
void NAME(Register Rd, Register Rs1, int64_t imm) { \
guarantee(is_simm12(imm), "Immediate is out of validity"); \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 31, 20, imm & 0x00000fff); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(_addi, 0b0010011, 0b000);
INSN(_addiw, 0b0011011, 0b000);
INSN(_andi, 0b0010011, 0b111);
INSN(ori, 0b0010011, 0b110);
INSN(xori, 0b0010011, 0b100);
INSN(slti, 0b0010011, 0b010);
#undef INSN
#define INSN(NAME, op, funct3) \
void NAME(Register Rd, Register Rs1, uint64_t imm) { \
guarantee(is_uimm12(imm), "Immediate is out of validity"); \
unsigned insn = 0; \
patch((address)&insn,6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 31, 20, imm & 0x00000fff); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(sltiu, 0b0010011, 0b011);
#undef INSN
// Shift Immediate Instruction
#define INSN(NAME, op, funct3, funct6) \
void NAME(Register Rd, Register Rs1, unsigned shamt) { \
guarantee(shamt <= 0x3f, "Shamt is invalid"); \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 25, 20, shamt); \
patch((address)&insn, 31, 26, funct6); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(_slli, 0b0010011, 0b001, 0b000000);
INSN(_srai, 0b0010011, 0b101, 0b010000);
INSN(_srli, 0b0010011, 0b101, 0b000000);
#undef INSN
// Shift Word Immediate Instruction
#define INSN(NAME, op, funct3, funct7) \
void NAME(Register Rd, Register Rs1, unsigned shamt) { \
guarantee(shamt <= 0x1f, "Shamt is invalid"); \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 24, 20, shamt); \
patch((address)&insn, 31, 25, funct7); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(slliw, 0b0011011, 0b001, 0b0000000);
INSN(sraiw, 0b0011011, 0b101, 0b0100000);
INSN(srliw, 0b0011011, 0b101, 0b0000000);
#undef INSN
// Upper Immediate Instruction
#define INSN(NAME, op) \
void NAME(Register Rd, int32_t imm) { \
int32_t upperImm = imm >> 12; \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch_reg((address)&insn, 7, Rd); \
upperImm &= 0x000fffff; \
patch((address)&insn, 31, 12, upperImm); \
emit(insn); \
}
INSN(_lui, 0b0110111);
INSN(auipc, 0b0010111);
#undef INSN
// ==========================
// Floating Point Instructions
// ==========================
static constexpr uint32_t OP_FP_MAJOR = 0b1010011;
enum FmtPrecision : uint8_t {
S_32_sp = 0b00,
D_64_dp = 0b01,
H_16_hp = 0b10,
Q_128_qp = 0b11
};
private:
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(uint8_t Rd, uint8_t Rs1, uint8_t Rs2, RoundingMode rm) {
assert(Fmt != H_16_hp || UseZfh || UseZfhmin, "No half precision enabled");
assert_cond(Fmt != Q_128_qp);
guarantee(is_uimm3(rm), "Rounding mode is out of validity");
guarantee(is_uimm2(Fmt), "FMT is out of validity");
guarantee(is_uimm5(funct5), "Funct5 is out of validity");
uint32_t insn = 0;
patch((address)&insn, 6, 0, OP_FP_MAJOR);
patch((address)&insn, 11, 7, Rd);
patch((address)&insn, 14, 12, rm);
patch((address)&insn, 19, 15, Rs1);
patch((address)&insn, 24, 20, Rs2);
patch((address)&insn, 26, 25, Fmt);
patch((address)&insn, 31, 27, funct5);
emit(insn);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm) {
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2->raw_encoding(), rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, int8_t rm) {
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2->raw_encoding(), (RoundingMode)rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(Register Rd, FloatRegister Rs1, FloatRegister Rs2, int8_t rm) {
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2->raw_encoding(), (RoundingMode)rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, FloatRegister Rs1, int8_t Rs2, int8_t rm) {
guarantee(is_uimm5(Rs2), "Rs2 is out of validity");
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2, (RoundingMode)rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, Register Rs1, FloatRegister Rs2, RoundingMode rm) {
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2->raw_encoding(), rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(Register Rd, FloatRegister Rs1, uint8_t Rs2, RoundingMode rm) {
guarantee(is_uimm5(Rs2), "Rs2 is out of validity");
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2, rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(Register Rd, FloatRegister Rs1, uint8_t Rs2, uint8_t rm) {
guarantee(is_uimm5(Rs2), "Rs2 is out of validity");
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2, (RoundingMode)rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, Register Rs1, uint8_t Rs2, RoundingMode rm) {
guarantee(is_uimm5(Rs2), "Rs2 is out of validity");
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2, rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, Register Rs1, uint8_t Rs2, int8_t rm) {
guarantee(is_uimm5(Rs2), "Rs2 is out of validity");
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1->raw_encoding(), Rs2, (RoundingMode)rm);
}
template <FmtPrecision Fmt, uint8_t funct5>
void fp_base(FloatRegister Rd, uint8_t Rs1, uint8_t Rs2, int8_t rm) {
guarantee(is_uimm5(Rs1), "Rs1 is out of validity");
guarantee(is_uimm5(Rs2), "Rs2 is out of validity");
fp_base<Fmt, funct5>(Rd->raw_encoding(), Rs1, Rs2, (RoundingMode)rm);
}
public:
enum FClassBits {
minf = 1 << 0, // negative infinite
mnorm = 1 << 1, // negative normal number
msubnorm = 1 << 2, // negative subnormal number
mzero = 1 << 3, // negative zero
pzero = 1 << 4, // positive zero
psubnorm = 1 << 5, // positive subnormal number
pnorm = 1 << 6, // positive normal number
pinf = 1 << 7, // positive infinite
snan = 1 << 8, // signaling NaN
qnan = 1 << 9, // quiet NaN
zero = mzero | pzero,
subnorm = msubnorm | psubnorm,
norm = mnorm | pnorm,
inf = minf | pinf,
nan = snan | qnan,
finite = zero | subnorm | norm,
};
void fsqrt_s(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b01011>(Rd, Rs1, 0b00000, rm);
}
void fsqrt_d(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b01011>(Rd, Rs1, 0b00000, rm);
}
void fcvt_s_d(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b01000>(Rd, Rs1, 0b00001, rm);
}
void fcvt_d_s(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b01000>(Rd, Rs1, 0b00000, rm);
}
void fsgnj_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b00100>(Rd, Rs1, Rs2, 0b000);
}
void fsgnjn_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b00100>(Rd, Rs1, Rs2, 0b001);
}
void fsgnjx_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b00100>(Rd, Rs1, Rs2, 0b010);
}
void fmin_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b00101>(Rd, Rs1, Rs2, 0b000);
}
void fmax_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b00101>(Rd, Rs1, Rs2, 0b001);
}
void fsgnj_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b00100>(Rd, Rs1, Rs2, 0b000);
}
void fsgnjn_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b00100>(Rd, Rs1, Rs2, 0b001);
}
void fsgnjx_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b00100>(Rd, Rs1, Rs2, 0b010);
}
void fmin_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b00101>(Rd, Rs1, Rs2, 0b000);
}
void fmax_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b00101>(Rd, Rs1, Rs2, 0b001);
}
void feq_s(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b10100>(Rd, Rs1, Rs2, 0b010);
}
void flt_s(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b10100>(Rd, Rs1, Rs2, 0b001);
}
void fle_s(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<S_32_sp, 0b10100>(Rd, Rs1, Rs2, 0b000);
}
void feq_d(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b10100>(Rd, Rs1, Rs2, 0b010);
}
void fle_d(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b10100>(Rd, Rs1, Rs2, 0b000);
}
void flt_d(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {
fp_base<D_64_dp, 0b10100>(Rd, Rs1, Rs2, 0b001);
}
void fadd_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b00000>(Rd, Rs1, Rs2, rm);
}
void fsub_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b00001>(Rd, Rs1, Rs2, rm);
}
void fmul_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b00010>(Rd, Rs1, Rs2, rm);
}
void fdiv_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b00011>(Rd, Rs1, Rs2, rm);
}
void fadd_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b00000>(Rd, Rs1, Rs2, rm);
}
void fsub_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b00001>(Rd, Rs1, Rs2, rm);
}
void fmul_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b00010>(Rd, Rs1, Rs2, rm);
}
void fdiv_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b00011>(Rd, Rs1, Rs2, rm);
}
void fcvt_s_w(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b11010>(Rd, Rs1, 0b00000, rm);
}
void fcvt_s_wu(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b11010>(Rd, Rs1, 0b00001, rm);
}
void fcvt_s_l(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b11010>(Rd, Rs1, 0b00010, rm);
}
void fcvt_s_lu(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<S_32_sp, 0b11010>(Rd, Rs1, 0b00011, rm);
}
void fcvt_d_w(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b11010>(Rd, Rs1, 0b00000, rm);
}
void fcvt_d_wu(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b11010>(Rd, Rs1, 0b00001, rm);
}
void fcvt_d_l(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b11010>(Rd, Rs1, 0b00010, rm);
}
void fcvt_d_lu(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {
fp_base<D_64_dp, 0b11010>(Rd, Rs1, 0b00011, rm);
}
void fcvt_w_s(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<S_32_sp, 0b11000>(Rd, Rs1, 0b00000, rm);
}
void fcvt_l_s(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<S_32_sp, 0b11000>(Rd, Rs1, 0b00010, rm);
}
void fcvt_wu_s(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<S_32_sp, 0b11000>(Rd, Rs1, 0b00001, rm);
}
void fcvt_lu_s(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<S_32_sp, 0b11000>(Rd, Rs1, 0b00011, rm);
}
void fcvt_w_d(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<D_64_dp, 0b11000>(Rd, Rs1, 0b00000, rm);
}
void fcvt_wu_d(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<D_64_dp, 0b11000>(Rd, Rs1, 0b00001, rm);
}
void fcvt_l_d(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<D_64_dp, 0b11000>(Rd, Rs1, 0b00010, rm);
}
void fcvt_lu_d(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {
fp_base<D_64_dp, 0b11000>(Rd, Rs1, 0b00011, rm);
}
void fmv_w_x(FloatRegister Rd, Register Rs1) {
fp_base<S_32_sp, 0b11110>(Rd, Rs1, 0b00000, 0b000);
}
void fmv_d_x(FloatRegister Rd, Register Rs1) {
fp_base<D_64_dp, 0b11110>(Rd, Rs1, 0b00000, 0b000);
}
void fclass_s(Register Rd, FloatRegister Rs1) {
fp_base<S_32_sp, 0b11100>(Rd, Rs1, 0b00000, 0b001);
}
void fclass_d(Register Rd, FloatRegister Rs1) {
fp_base<D_64_dp, 0b11100>(Rd, Rs1, 0b00000, 0b001);
}
void fmv_x_w(Register Rd, FloatRegister Rs1) {
fp_base<S_32_sp, 0b11100>(Rd, Rs1, 0b00000, 0b000);
}
void fmv_x_d(Register Rd, FloatRegister Rs1) {
fp_base<D_64_dp, 0b11100>(Rd, Rs1, 0b00000, 0b000);
}
private:
template <FmtPrecision Fmt, uint8_t OpVal>
void fp_fm(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm) {
assert_cond(Fmt != Q_128_qp);
guarantee(is_uimm3(rm), "Rounding mode is out of validity");
guarantee(is_uimm2(Fmt), "FMT is out of validity");
unsigned insn = 0;
patch((address)&insn, 6, 0, OpVal);
patch_reg((address)&insn, 7, Rd);
patch((address)&insn, 14, 12, rm);
patch_reg((address)&insn, 15, Rs1);
patch_reg((address)&insn, 20, Rs2);
patch((address)&insn, 26, 25, Fmt);
patch_reg((address)&insn, 27, Rs3);
emit(insn);
}
public:
void fmadd_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<S_32_sp, 0b1000011>(Rd, Rs1, Rs2, Rs3, rm);
}
void fmsub_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<S_32_sp, 0b1000111>(Rd, Rs1, Rs2, Rs3, rm);
}
void fnmsub_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<S_32_sp, 0b1001011>(Rd, Rs1, Rs2, Rs3, rm);
}
void fnmadd_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<S_32_sp, 0b1001111>(Rd, Rs1, Rs2, Rs3, rm);
}
void fmadd_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<D_64_dp, 0b1000011>(Rd, Rs1, Rs2, Rs3, rm);
}
void fmsub_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<D_64_dp, 0b1000111>(Rd, Rs1, Rs2, Rs3, rm);
}
void fnmsub_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<D_64_dp, 0b1001011>(Rd, Rs1, Rs2, Rs3, rm);
}
void fnmadd_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
fp_fm<D_64_dp, 0b1001111>(Rd, Rs1, Rs2, Rs3, rm);
}
// -------------- ZFH Instruction Definitions --------------
// Zfh Standard Extensions for Half-Precision Floating-Point
void fclass_h(Register Rd, FloatRegister Rs1) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b11100>(Rd, Rs1, 0b00000, 0b001);
}
// Zfh and Zfhmin Half-Precision Floating-Point
void fcvt_s_h(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
assert_cond(UseZfh || UseZfhmin);
fp_base<S_32_sp, 0b01000>(Rd, Rs1, 0b00010, rm);
}
void fcvt_h_s(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
assert_cond(UseZfh || UseZfhmin);
fp_base<H_16_hp, 0b01000>(Rd, Rs1, 0b00000, rm);
}
void fmv_h_x(FloatRegister Rd, Register Rs1) {
assert_cond(UseZfh || UseZfhmin);
fp_base<H_16_hp, 0b11110>(Rd, Rs1, 0b00000, 0b000);
}
void fmv_x_h(Register Rd, FloatRegister Rs1) {
assert_cond(UseZfh || UseZfhmin);
fp_base<H_16_hp, 0b11100>(Rd, Rs1, 0b00000, 0b000);
}
void fadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00000>(Rd, Rs1, Rs2, rm);
}
void fsub_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00001>(Rd, Rs1, Rs2, rm);
}
void fmul_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00010>(Rd, Rs1, Rs2, rm);
}
void fdiv_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00011>(Rd, Rs1, Rs2, rm);
}
void fsqrt_h(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b01011>(Rd, Rs1, 0b00000, rm);
}
void fmin_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b000);
}
void fmax_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b001);
}
void fmadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_fm<H_16_hp, 0b1000011>(Rd, Rs1, Rs2, Rs3, rm);
}
// -------------- ZFA Instruction Definitions --------------
// Zfa Extension for Additional Floating-Point Instructions
void _fli_h(FloatRegister Rd, uint8_t Rs1) {
assert_cond(UseZfa && UseZfh);
fp_base<H_16_hp, 0b11110>(Rd, Rs1, 0b00001, 0b000);
}
void _fli_s(FloatRegister Rd, uint8_t Rs1) {
assert_cond(UseZfa);
fp_base<S_32_sp, 0b11110>(Rd, Rs1, 0b00001, 0b000);
}
void _fli_d(FloatRegister Rd, uint8_t Rs1) {
assert_cond(UseZfa);
fp_base<D_64_dp, 0b11110>(Rd, Rs1, 0b00001, 0b000);
}
void fminm_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfa && UseZfh);
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b010);
}
void fmaxm_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfa && UseZfh);
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b011);
}
void fminm_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfa);
fp_base<S_32_sp, 0b00101>(Rd, Rs1, Rs2, 0b010);
}
void fmaxm_s(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfa);
fp_base<S_32_sp, 0b00101>(Rd, Rs1, Rs2, 0b011);
}
void fminm_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfa);
fp_base<D_64_dp, 0b00101>(Rd, Rs1, Rs2, 0b010);
}
void fmaxm_d(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfa);
fp_base<D_64_dp, 0b00101>(Rd, Rs1, Rs2, 0b011);
}
// ==========================
// RISC-V Vector Extension
// ==========================
enum SEW {
e8,
e16,
e32,
e64,
RESERVED,
};
enum LMUL {
mf8 = 0b101,
mf4 = 0b110,
mf2 = 0b111,
m1 = 0b000,
m2 = 0b001,
m4 = 0b010,
m8 = 0b011,
};
enum VMA {
mu, // undisturbed
ma, // agnostic
};
enum VTA {
tu, // undisturbed
ta, // agnostic
};
static Assembler::SEW elembytes_to_sew(int ebytes) {
assert(ebytes > 0 && ebytes <= 8, "unsupported element size");
return (Assembler::SEW) exact_log2(ebytes);
}
static Assembler::SEW elemtype_to_sew(BasicType etype) {
return Assembler::elembytes_to_sew(type2aelembytes(etype));
}
#define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill) \
/* If vill then other bits of vtype must be zero. */ \
guarantee(!vill, "vill not supported"); \
patch((address)&insn, lsb + 2, lsb, vlmul); \
patch((address)&insn, lsb + 5, lsb + 3, vsew); \
patch((address)&insn, lsb + 6, vta); \
patch((address)&insn, lsb + 7, vma); \
patch((address)&insn, hsb - 1, lsb + 8, 0); \
patch((address)&insn, hsb, vill)
#define INSN(NAME, op, funct3) \
void NAME(Register Rd, Register Rs1, SEW sew, LMUL lmul = m1, \
VMA vma = mu, VTA vta = tu, bool vill = false) { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch_vtype(30, 20, lmul, sew, vta, vma, vill); \
patch((address)&insn, 31, 0); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(vsetvli, 0b1010111, 0b111);
#undef INSN
#define INSN(NAME, op, funct3) \
void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1, \
VMA vma = mu, VTA vta = tu, bool vill = false) { \
unsigned insn = 0; \
guarantee(is_uimm5(imm), "uimm is invalid"); \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 19, 15, imm); \
patch_vtype(29, 20, lmul, sew, vta, vma, vill); \
patch((address)&insn, 31, 30, 0b11); \
patch_reg((address)&insn, 7, Rd); \
emit(insn); \
}
INSN(vsetivli, 0b1010111, 0b111);
#undef INSN
#undef patch_vtype
#define INSN(NAME, op, funct3, funct7) \
void NAME(Register Rd, Register Rs1, Register Rs2) { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 31, 25, funct7); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
patch_reg((address)&insn, 20, Rs2); \
emit(insn); \
}
// Vector Configuration Instruction
INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
#undef INSN
enum VectorMask {
v0_t = 0b0,
unmasked = 0b1
};
#define patch_VArith(op, Reg, funct3, Reg_or_Imm5, Vs2, vm, funct6) \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 19, 15, Reg_or_Imm5); \
patch((address)&insn, 25, vm); \
patch((address)&insn, 31, 26, funct6); \
patch_reg((address)&insn, 7, Reg); \
patch_reg((address)&insn, 20, Vs2); \
emit(insn)
// r2_vm
#define INSN(NAME, op, funct3, Vs1, funct6) \
void NAME(Register Rd, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \
}
// Vector Mask
INSN(vcpop_m, 0b1010111, 0b010, 0b10000, 0b010000);
INSN(vfirst_m, 0b1010111, 0b010, 0b10001, 0b010000);
#undef INSN
#define INSN(NAME, op, funct3, Vs1, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \
}
// Vector Integer Extension
INSN(vzext_vf2, 0b1010111, 0b010, 0b00110, 0b010010);
INSN(vzext_vf4, 0b1010111, 0b010, 0b00100, 0b010010);
INSN(vzext_vf8, 0b1010111, 0b010, 0b00010, 0b010010);
INSN(vsext_vf2, 0b1010111, 0b010, 0b00111, 0b010010);
INSN(vsext_vf4, 0b1010111, 0b010, 0b00101, 0b010010);
INSN(vsext_vf8, 0b1010111, 0b010, 0b00011, 0b010010);
// Vector Mask
INSN(vmsbf_m, 0b1010111, 0b010, 0b00001, 0b010100);
INSN(vmsif_m, 0b1010111, 0b010, 0b00011, 0b010100);
INSN(vmsof_m, 0b1010111, 0b010, 0b00010, 0b010100);
INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100);
// Vector Single-Width Floating-Point/Integer Type-Convert Instructions
INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010);
INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010);
// Vector Widening Floating-Point/Integer Type-Convert Instructions
INSN(vfwcvt_f_x_v, 0b1010111, 0b001, 0b01011, 0b010010);
INSN(vfwcvt_f_f_v, 0b1010111, 0b001, 0b01100, 0b010010);
INSN(vfwcvt_rtz_x_f_v, 0b1010111, 0b001, 0b01111, 0b010010);
// Vector Narrowing Floating-Point/Integer Type-Convert Instructions
INSN(vfncvt_f_x_w, 0b1010111, 0b001, 0b10011, 0b010010);
INSN(vfncvt_f_f_w, 0b1010111, 0b001, 0b10100, 0b010010);
INSN(vfncvt_rtz_x_f_w, 0b1010111, 0b001, 0b10111, 0b010010);
// Vector Floating-Point Instruction
INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011);
INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011);
#undef INSN
// r2rd
#define INSN(NAME, op, funct3, simm5, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2) { \
patch_VArith(op, Vd, funct3, simm5, Vs2, vm, funct6); \
}
// Vector Whole Vector Register Move
INSN(vmv1r_v, 0b1010111, 0b011, 0b00000, 0b1, 0b100111);
INSN(vmv2r_v, 0b1010111, 0b011, 0b00001, 0b1, 0b100111);
INSN(vmv4r_v, 0b1010111, 0b011, 0b00011, 0b1, 0b100111);
INSN(vmv8r_v, 0b1010111, 0b011, 0b00111, 0b1, 0b100111);
#undef INSN
#define INSN(NAME, op, funct3, Vs1, vm, funct6) \
void NAME(FloatRegister Rd, VectorRegister Vs2) { \
patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \
}
// Vector Floating-Point Move Instruction
INSN(vfmv_f_s, 0b1010111, 0b001, 0b00000, 0b1, 0b010000);
#undef INSN
#define INSN(NAME, op, funct3, Vs1, vm, funct6) \
void NAME(Register Rd, VectorRegister Vs2) { \
patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \
}
// Vector Integer Scalar Move Instructions
INSN(vmv_x_s, 0b1010111, 0b010, 0b00000, 0b1, 0b010000);
#undef INSN
// r_vm
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \
guarantee(is_uimm5(imm), "uimm is invalid"); \
patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
}
// Vector Single-Width Bit Shift Instructions
INSN(vsra_vi, 0b1010111, 0b011, 0b101001);
INSN(vsrl_vi, 0b1010111, 0b011, 0b101000);
INSN(vsll_vi, 0b1010111, 0b011, 0b100101);
// Vector Slide Instructions
INSN(vslideup_vi, 0b1010111, 0b011, 0b001110);
INSN(vslidedown_vi, 0b1010111, 0b011, 0b001111);
// Vector Narrowing Integer Right Shift Instructions
INSN(vnsra_wi, 0b1010111, 0b011, 0b101101);
INSN(vnsrl_wi, 0b1010111, 0b011, 0b101100);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
INSN(vfnmsub_vv, 0b1010111, 0b001, 0b101011);
INSN(vfmsub_vv, 0b1010111, 0b001, 0b101010);
INSN(vfnmadd_vv, 0b1010111, 0b001, 0b101001);
INSN(vfmadd_vv, 0b1010111, 0b001, 0b101000);
INSN(vfnmsac_vv, 0b1010111, 0b001, 0b101111);
INSN(vfmsac_vv, 0b1010111, 0b001, 0b101110);
INSN(vfmacc_vv, 0b1010111, 0b001, 0b101100);
INSN(vfnmacc_vv, 0b1010111, 0b001, 0b101101);
// Vector Single-Width Integer Multiply-Add Instructions
INSN(vnmsub_vv, 0b1010111, 0b010, 0b101011);
INSN(vmadd_vv, 0b1010111, 0b010, 0b101001);
INSN(vnmsac_vv, 0b1010111, 0b010, 0b101111);
INSN(vmacc_vv, 0b1010111, 0b010, 0b101101);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Single-Width Integer Multiply-Add Instructions
INSN(vnmsub_vx, 0b1010111, 0b110, 0b101011);
INSN(vmadd_vx, 0b1010111, 0b110, 0b101001);
INSN(vnmsac_vx, 0b1010111, 0b110, 0b101111);
INSN(vmacc_vx, 0b1010111, 0b110, 0b101101);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, FloatRegister Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
INSN(vfnmsub_vf, 0b1010111, 0b101, 0b101011);
INSN(vfmsub_vf, 0b1010111, 0b101, 0b101010);
INSN(vfnmadd_vf, 0b1010111, 0b101, 0b101001);
INSN(vfmadd_vf, 0b1010111, 0b101, 0b101000);
INSN(vfnmsac_vf, 0b1010111, 0b101, 0b101111);
INSN(vfmsac_vf, 0b1010111, 0b101, 0b101110);
INSN(vfmacc_vf, 0b1010111, 0b101, 0b101100);
INSN(vfnmacc_vf, 0b1010111, 0b101, 0b101101);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Single-Width Floating-Point Reduction Instructions
INSN(vfredusum_vs, 0b1010111, 0b001, 0b000001);
INSN(vfredosum_vs, 0b1010111, 0b001, 0b000011);
INSN(vfredmin_vs, 0b1010111, 0b001, 0b000101);
INSN(vfredmax_vs, 0b1010111, 0b001, 0b000111);
// Vector Single-Width Integer Reduction Instructions
INSN(vredsum_vs, 0b1010111, 0b010, 0b000000);
INSN(vredand_vs, 0b1010111, 0b010, 0b000001);
INSN(vredor_vs, 0b1010111, 0b010, 0b000010);
INSN(vredxor_vs, 0b1010111, 0b010, 0b000011);
INSN(vredminu_vs, 0b1010111, 0b010, 0b000100);
INSN(vredmin_vs, 0b1010111, 0b010, 0b000101);
INSN(vredmaxu_vs, 0b1010111, 0b010, 0b000110);
INSN(vredmax_vs, 0b1010111, 0b010, 0b000111);
// Vector Widening Integer Reduction Instructions
INSN(vwredsum_vs, 0b1010111, 0b000, 0b110001);
INSN(vwredsumu_vs, 0b1010111, 0b000, 0b110000);
// Vector Floating-Point Compare Instructions
INSN(vmfle_vv, 0b1010111, 0b001, 0b011001);
INSN(vmflt_vv, 0b1010111, 0b001, 0b011011);
INSN(vmfne_vv, 0b1010111, 0b001, 0b011100);
INSN(vmfeq_vv, 0b1010111, 0b001, 0b011000);
// Vector Floating-Point Sign-Injection Instructions
INSN(vfsgnj_vv, 0b1010111, 0b001, 0b001000);
INSN(vfsgnjx_vv, 0b1010111, 0b001, 0b001010);
INSN(vfsgnjn_vv, 0b1010111, 0b001, 0b001001);
// Vector Floating-Point MIN/MAX Instructions
INSN(vfmax_vv, 0b1010111, 0b001, 0b000110);
INSN(vfmin_vv, 0b1010111, 0b001, 0b000100);
// Vector Single-Width Floating-Point Multiply/Divide Instructions
INSN(vfdiv_vv, 0b1010111, 0b001, 0b100000);
INSN(vfmul_vv, 0b1010111, 0b001, 0b100100);
// Vector Single-Width Floating-Point Add/Subtract Instructions
INSN(vfsub_vv, 0b1010111, 0b001, 0b000010);
INSN(vfadd_vv, 0b1010111, 0b001, 0b000000);
// Vector Single-Width Fractional Multiply with Rounding and Saturation
INSN(vsmul_vv, 0b1010111, 0b000, 0b100111);
// Vector Integer Divide Instructions
INSN(vrem_vv, 0b1010111, 0b010, 0b100011);
INSN(vremu_vv, 0b1010111, 0b010, 0b100010);
INSN(vdiv_vv, 0b1010111, 0b010, 0b100001);
INSN(vdivu_vv, 0b1010111, 0b010, 0b100000);
// Vector Single-Width Integer Multiply Instructions
INSN(vmulhsu_vv, 0b1010111, 0b010, 0b100110);
INSN(vmulhu_vv, 0b1010111, 0b010, 0b100100);
INSN(vmulh_vv, 0b1010111, 0b010, 0b100111);
INSN(vmul_vv, 0b1010111, 0b010, 0b100101);
// Vector Widening Integer Multiply Instructions
INSN(vwmul_vv, 0b1010111, 0b010, 0b111011);
INSN(vwmulu_vv, 0b1010111, 0b010, 0b111000);
// Vector Integer Min/Max Instructions
INSN(vmax_vv, 0b1010111, 0b000, 0b000111);
INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110);
INSN(vmin_vv, 0b1010111, 0b000, 0b000101);
INSN(vminu_vv, 0b1010111, 0b000, 0b000100);
// Vector Integer Comparison Instructions
INSN(vmsle_vv, 0b1010111, 0b000, 0b011101);
INSN(vmsleu_vv, 0b1010111, 0b000, 0b011100);
INSN(vmslt_vv, 0b1010111, 0b000, 0b011011);
INSN(vmsltu_vv, 0b1010111, 0b000, 0b011010);
INSN(vmsne_vv, 0b1010111, 0b000, 0b011001);
INSN(vmseq_vv, 0b1010111, 0b000, 0b011000);
// Vector Single-Width Bit Shift Instructions
INSN(vsra_vv, 0b1010111, 0b000, 0b101001);
INSN(vsrl_vv, 0b1010111, 0b000, 0b101000);
INSN(vsll_vv, 0b1010111, 0b000, 0b100101);
// Vector Bitwise Logical Instructions
INSN(vxor_vv, 0b1010111, 0b000, 0b001011);
INSN(vor_vv, 0b1010111, 0b000, 0b001010);
INSN(vand_vv, 0b1010111, 0b000, 0b001001);
// Vector Single-Width Integer Add and Subtract
INSN(vadd_vv, 0b1010111, 0b000, 0b000000);
INSN(vsub_vv, 0b1010111, 0b000, 0b000010);
// Vector Saturating Integer Add and Subtract
INSN(vsadd_vv, 0b1010111, 0b000, 0b100001);
INSN(vsaddu_vv, 0b1010111, 0b000, 0b100000);
INSN(vssub_vv, 0b1010111, 0b000, 0b100011);
INSN(vssubu_vv, 0b1010111, 0b000, 0b100010);
// Vector Register Gather Instructions
INSN(vrgather_vv, 0b1010111, 0b000, 0b001100);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Integer Divide Instructions
INSN(vrem_vx, 0b1010111, 0b110, 0b100011);
INSN(vremu_vx, 0b1010111, 0b110, 0b100010);
INSN(vdiv_vx, 0b1010111, 0b110, 0b100001);
INSN(vdivu_vx, 0b1010111, 0b110, 0b100000);
// Vector Single-Width Integer Multiply Instructions
INSN(vmulhsu_vx, 0b1010111, 0b110, 0b100110);
INSN(vmulhu_vx, 0b1010111, 0b110, 0b100100);
INSN(vmulh_vx, 0b1010111, 0b110, 0b100111);
INSN(vmul_vx, 0b1010111, 0b110, 0b100101);
// Vector Widening Integer Add/Subtract
INSN(vwadd_vx, 0b1010111, 0b110, 0b110001);
// Vector Integer Min/Max Instructions
INSN(vmax_vx, 0b1010111, 0b100, 0b000111);
INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110);
INSN(vmin_vx, 0b1010111, 0b100, 0b000101);
INSN(vminu_vx, 0b1010111, 0b100, 0b000100);
// Vector Integer Comparison Instructions
INSN(vmsgt_vx, 0b1010111, 0b100, 0b011111);
INSN(vmsgtu_vx, 0b1010111, 0b100, 0b011110);
INSN(vmsle_vx, 0b1010111, 0b100, 0b011101);
INSN(vmsleu_vx, 0b1010111, 0b100, 0b011100);
INSN(vmslt_vx, 0b1010111, 0b100, 0b011011);
INSN(vmsltu_vx, 0b1010111, 0b100, 0b011010);
INSN(vmsne_vx, 0b1010111, 0b100, 0b011001);
INSN(vmseq_vx, 0b1010111, 0b100, 0b011000);
// Vector Narrowing Integer Right Shift Instructions
INSN(vnsra_wx, 0b1010111, 0b100, 0b101101);
INSN(vnsrl_wx, 0b1010111, 0b100, 0b101100);
// Vector Single-Width Bit Shift Instructions
INSN(vsra_vx, 0b1010111, 0b100, 0b101001);
INSN(vsrl_vx, 0b1010111, 0b100, 0b101000);
INSN(vsll_vx, 0b1010111, 0b100, 0b100101);
// Vector Bitwise Logical Instructions
INSN(vxor_vx, 0b1010111, 0b100, 0b001011);
INSN(vor_vx, 0b1010111, 0b100, 0b001010);
INSN(vand_vx, 0b1010111, 0b100, 0b001001);
// Vector Single-Width Integer Add and Subtract
INSN(vsub_vx, 0b1010111, 0b100, 0b000010);
INSN(vadd_vx, 0b1010111, 0b100, 0b000000);
// Vector Integer reverse subtract
INSN(vrsub_vx, 0b1010111, 0b100, 0b000011);
// Vector Slide Instructions
INSN(vslidedown_vx, 0b1010111, 0b100, 0b001111);
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Integer Merge Instructions
INSN(vmerge_vxm, 0b1010111, 0b100, 0b0, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Floating-Point Merge Instruction
INSN(vfmerge_vfm, 0b1010111, 0b101, 0b0, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Floating-Point Compare Instructions
INSN(vmfge_vf, 0b1010111, 0b101, 0b011111);
INSN(vmfgt_vf, 0b1010111, 0b101, 0b011101);
INSN(vmfle_vf, 0b1010111, 0b101, 0b011001);
INSN(vmflt_vf, 0b1010111, 0b101, 0b011011);
INSN(vmfne_vf, 0b1010111, 0b101, 0b011100);
INSN(vmfeq_vf, 0b1010111, 0b101, 0b011000);
// Vector Floating-Point MIN/MAX Instructions
INSN(vfmax_vf, 0b1010111, 0b101, 0b000110);
INSN(vfmin_vf, 0b1010111, 0b101, 0b000100);
// Vector Single-Width Floating-Point Multiply/Divide Instructions
INSN(vfdiv_vf, 0b1010111, 0b101, 0b100000);
INSN(vfmul_vf, 0b1010111, 0b101, 0b100100);
INSN(vfrdiv_vf, 0b1010111, 0b101, 0b100001);
// Vector Single-Width Floating-Point Add/Subtract Instructions
INSN(vfsub_vf, 0b1010111, 0b101, 0b000010);
INSN(vfadd_vf, 0b1010111, 0b101, 0b000000);
INSN(vfrsub_vf, 0b1010111, 0b101, 0b100111);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { \
guarantee(is_simm5(imm), "imm is invalid"); \
patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
}
// Vector Integer Comparison Instructions
INSN(vmsgt_vi, 0b1010111, 0b011, 0b011111);
INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110);
INSN(vmsle_vi, 0b1010111, 0b011, 0b011101);
INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100);
INSN(vmsne_vi, 0b1010111, 0b011, 0b011001);
INSN(vmseq_vi, 0b1010111, 0b011, 0b011000);
// Vector Bitwise Logical Instructions
INSN(vxor_vi, 0b1010111, 0b011, 0b001011);
INSN(vor_vi, 0b1010111, 0b011, 0b001010);
INSN(vand_vi, 0b1010111, 0b011, 0b001001);
// Vector Single-Width Integer Add and Subtract
INSN(vadd_vi, 0b1010111, 0b011, 0b000000);
// Vector Integer reverse subtract
INSN(vrsub_vi, 0b1010111, 0b011, 0b000011);
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm) { \
guarantee(is_simm5(imm), "imm is invalid"); \
patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
}
// Vector Integer Merge Instructions
INSN(vmerge_vim, 0b1010111, 0b011, 0b0, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \
patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Compress Instruction
INSN(vcompress_vm, 0b1010111, 0b010, 0b1, 0b010111);
// Vector Mask-Register Logical Instructions
INSN(vmxnor_mm, 0b1010111, 0b010, 0b1, 0b011111);
INSN(vmorn_mm, 0b1010111, 0b010, 0b1, 0b011100);
INSN(vmnor_mm, 0b1010111, 0b010, 0b1, 0b011110);
INSN(vmor_mm, 0b1010111, 0b010, 0b1, 0b011010);
INSN(vmxor_mm, 0b1010111, 0b010, 0b1, 0b011011);
INSN(vmandn_mm, 0b1010111, 0b010, 0b1, 0b011000);
INSN(vmnand_mm, 0b1010111, 0b010, 0b1, 0b011101);
INSN(vmand_mm, 0b1010111, 0b010, 0b1, 0b011001);
// Vector Integer Merge Instructions
INSN(vmerge_vvm, 0b1010111, 0b000, 0b0, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
void NAME(VectorRegister Vd, int32_t imm) { \
guarantee(is_simm5(imm), "imm is invalid"); \
patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
}
// Vector Integer Move Instructions
INSN(vmv_v_i, 0b1010111, 0b011, v0, 0b1, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
void NAME(VectorRegister Vd, FloatRegister Rs1) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Floating-Point Scalar Move Instructions
INSN(vfmv_s_f, 0b1010111, 0b101, v0, 0b1, 0b010000);
// Vector Floating-Point Move Instruction
INSN(vfmv_v_f, 0b1010111, 0b101, v0, 0b1, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs1) { \
patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Integer Move Instructions
INSN(vmv_v_v, 0b1010111, 0b000, v0, 0b1, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
void NAME(VectorRegister Vd, Register Rs1) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Integer Scalar Move Instructions
INSN(vmv_s_x, 0b1010111, 0b110, v0, 0b1, 0b010000);
// Vector Integer Move Instructions
INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111);
#undef INSN
#define INSN(NAME, op, funct13, funct6) \
void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 24, 12, funct13); \
patch((address)&insn, 25, vm); \
patch((address)&insn, 31, 26, funct6); \
patch_reg((address)&insn, 7, Vd); \
emit(insn); \
}
// Vector Element Index Instruction
INSN(vid_v, 0b1010111, 0b0000010001010, 0b010100);
#undef INSN
enum Nf {
g1 = 0b000,
g2 = 0b001,
g3 = 0b010,
g4 = 0b011,
g5 = 0b100,
g6 = 0b101,
g7 = 0b110,
g8 = 0b111
};
#define patch_VLdSt(op, VReg, width, Rs1, Reg_or_umop, vm, mop, mew, nf) \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, width); \
patch((address)&insn, 24, 20, Reg_or_umop); \
patch((address)&insn, 25, vm); \
patch((address)&insn, 27, 26, mop); \
patch((address)&insn, 28, mew); \
patch((address)&insn, 31, 29, nf); \
patch_reg((address)&insn, 7, VReg); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn)
#define INSN(NAME, op, width, lumop, vm, mop, mew, nf) \
void NAME(VectorRegister Vd, Register Rs1) { \
guarantee(is_uimm3(width), "width is invalid"); \
patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf); \
}
// Vector Load/Store Instructions
INSN(vl1re8_v, 0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1);
INSN(vl1re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g1);
INSN(vl1re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g1);
INSN(vl1re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g1);
INSN(vl2re8_v, 0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g2);
INSN(vl2re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g2);
INSN(vl2re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g2);
INSN(vl2re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g2);
INSN(vl4re8_v, 0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g4);
INSN(vl4re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g4);
INSN(vl4re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g4);
INSN(vl4re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g4);
INSN(vl8re8_v, 0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g8);
INSN(vl8re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g8);
INSN(vl8re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g8);
INSN(vl8re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g8);
#undef INSN
#define INSN(NAME, op, width, sumop, vm, mop, mew, nf) \
void NAME(VectorRegister Vs3, Register Rs1) { \
patch_VLdSt(op, Vs3, width, Rs1, sumop, vm, mop, mew, nf); \
}
// Vector Load/Store Instructions
INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1);
INSN(vs2r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g2);
INSN(vs4r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g4);
INSN(vs8r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g8);
#undef INSN
// r2_nfvm
#define INSN(NAME, op, width, umop, mop, mew) \
void NAME(VectorRegister Vd_or_Vs3, Register Rs1, Nf nf = g1) { \
patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, 1, mop, mew, nf); \
}
// Vector Unit-Stride Instructions
INSN(vlm_v, 0b0000111, 0b000, 0b01011, 0b00, 0b0);
INSN(vsm_v, 0b0100111, 0b000, 0b01011, 0b00, 0b0);
#undef INSN
#define INSN(NAME, op, width, umop, mop, mew) \
void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked, Nf nf = g1) { \
patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf); \
}
// Vector Unit-Stride Instructions
INSN(vle8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0);
INSN(vle16_v, 0b0000111, 0b101, 0b00000, 0b00, 0b0);
INSN(vle32_v, 0b0000111, 0b110, 0b00000, 0b00, 0b0);
INSN(vle64_v, 0b0000111, 0b111, 0b00000, 0b00, 0b0);
// Vector unit-stride fault-only-first Instructions
INSN(vle8ff_v, 0b0000111, 0b000, 0b10000, 0b00, 0b0);
INSN(vle16ff_v, 0b0000111, 0b101, 0b10000, 0b00, 0b0);
INSN(vle32ff_v, 0b0000111, 0b110, 0b10000, 0b00, 0b0);
INSN(vle64ff_v, 0b0000111, 0b111, 0b10000, 0b00, 0b0);
INSN(vse8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0);
INSN(vse16_v, 0b0100111, 0b101, 0b00000, 0b00, 0b0);
INSN(vse32_v, 0b0100111, 0b110, 0b00000, 0b00, 0b0);
INSN(vse64_v, 0b0100111, 0b111, 0b00000, 0b00, 0b0);
#undef INSN
#define INSN(NAME, op, width, umop, mop, mew, nf) \
void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked) { \
patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf); \
}
// Vector Unit-Stride Segment Load Instructions
INSN(vlseg3e8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0, g3);
INSN(vlseg4e8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0, g4);
// Vector Unit-Stride Segment Store Instructions
INSN(vsseg3e8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0, g3);
INSN(vsseg4e8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0, g4);
#undef INSN
#define INSN(NAME, op, width, mop, mew) \
void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked, Nf nf = g1) { \
patch_VLdSt(op, Vd, width, Rs1, Vs2->raw_encoding(), vm, mop, mew, nf); \
}
// Vector unordered indexed load instructions
INSN( vluxei8_v, 0b0000111, 0b000, 0b01, 0b0);
INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0);
INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0);
// Vector unordered indexed store instructions
INSN( vsuxei8_v, 0b0100111, 0b000, 0b01, 0b0);
INSN(vsuxei32_v, 0b0100111, 0b110, 0b01, 0b0);
INSN(vsuxei64_v, 0b0100111, 0b111, 0b01, 0b0);
#undef INSN
#define INSN(NAME, op, width, mop, mew) \
void NAME(VectorRegister Vd, Register Rs1, Register Rs2, VectorMask vm = unmasked, Nf nf = g1) { \
patch_VLdSt(op, Vd, width, Rs1, Rs2->raw_encoding(), vm, mop, mew, nf); \
}
// Vector Strided Instructions
INSN(vlse8_v, 0b0000111, 0b000, 0b10, 0b0);
INSN(vlse16_v, 0b0000111, 0b101, 0b10, 0b0);
INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0);
INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0);
INSN(vsse8_v, 0b0100111, 0b000, 0b10, 0b0);
INSN(vsse16_v, 0b0100111, 0b101, 0b10, 0b0);
INSN(vsse32_v, 0b0100111, 0b110, 0b10, 0b0);
INSN(vsse64_v, 0b0100111, 0b111, 0b10, 0b0);
#undef INSN
#undef patch_VLdSt
// ====================================
// RISC-V Vector Crypto Extension
// ====================================
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Bit-manipulation used in Cryptography (Zvbb) Extension
INSN(vandn_vv, 0b1010111, 0b000, 0b000001);
INSN(vror_vv, 0b1010111, 0b000, 0b010100);
INSN(vrol_vv, 0b1010111, 0b000, 0b010101);
// Vector Bit-manipulation used in Cryptography (Zvbc) Extension
INSN(vclmul_vv, 0b1010111, 0b010, 0b001100);
INSN(vclmulh_vv, 0b1010111, 0b010, 0b001101);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Bit-manipulation used in Cryptography (Zvbb) Extension
INSN(vandn_vx, 0b1010111, 0b100, 0b000001);
INSN(vrol_vx, 0b1010111, 0b100, 0b010101);
INSN(vror_vx, 0b1010111, 0b100, 0b010100);
#undef INSN
#define patch_VArith_imm6(op, Reg, funct3, Reg_or_Imm5, I5, Vs2, vm, funct6) \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 19, 15, Reg_or_Imm5); \
patch((address)&insn, 25, vm); \
patch((address)&insn, 26, I5); \
patch((address)&insn, 31, 27, funct6); \
patch_reg((address)&insn, 7, Reg); \
patch_reg((address)&insn, 20, Vs2); \
emit(insn)
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \
guarantee(is_uimm6(imm), "uimm is invalid"); \
patch_VArith_imm6(op, Vd, funct3, (uint32_t)(imm & 0x1f), (uint32_t)((imm >> 5) & 0x1), Vs2, vm, funct6); \
}
// Vector Bit-manipulation used in Cryptography (Zvbb) Extension
// NOTE: there is no corresponding vrol.vi supplied by the extension, but it can be emulated with vror.vi easily.
INSN(vror_vi, 0b1010111, 0b011, 0b01010);
#undef INSN
#undef patch_VArith_imm6
#define INSN(NAME, op, funct3, Vs1, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \
}
// Vector Bit-manipulation used in Cryptography (Zvkb) Extension
INSN(vbrev_v, 0b1010111, 0b010, 0b01010, 0b010010); // reverse bits in every element
INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element
INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt
// Vector AES instructions (Zvkned extension)
INSN(vaesem_vv, 0b1110111, 0b010, 0b00010, 0b101000);
INSN(vaesef_vv, 0b1110111, 0b010, 0b00011, 0b101000);
INSN(vaesdm_vv, 0b1110111, 0b010, 0b00000, 0b101000);
INSN(vaesdf_vv, 0b1110111, 0b010, 0b00001, 0b101000);
INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \
patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector SHA-2 Secure Hash (Zvknh[ab]) Extension
INSN(vsha2ms_vv, 0b1110111, 0b010, 0b1, 0b101101);
INSN(vsha2ch_vv, 0b1110111, 0b010, 0b1, 0b101110);
INSN(vsha2cl_vv, 0b1110111, 0b010, 0b1, 0b101111);
// Vector GHASH (Zvkg) Extension
INSN(vghsh_vv, 0b1110111, 0b010, 0b1, 0b101100);
#undef INSN
#define INSN(NAME, op, funct3, Vs1, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \
}
// Vector Basic Bit-manipulation (Zvbb) Extension
INSN(vcpop_v, 0b1010111, 0b010, 0b01110, 0b010010);
#undef INSN
#undef patch_VArith
// ====================================
// RISC-V Bit-Manipulation Extension
// Currently only support Zba, Zbb and Zbs bitmanip extensions.
// ====================================
#define INSN(NAME, op, funct3, funct7) \
void NAME(Register Rd, Register Rs1, Register Rs2) { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 31, 25, funct7); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
patch_reg((address)&insn, 20, Rs2); \
emit(insn); \
}
INSN(add_uw, 0b0111011, 0b000, 0b0000100);
INSN(rolr, 0b0110011, 0b001, 0b0110000);
INSN(rolrw, 0b0111011, 0b001, 0b0110000);
INSN(rorr, 0b0110011, 0b101, 0b0110000);
INSN(rorrw, 0b0111011, 0b101, 0b0110000);
INSN(sh1add, 0b0110011, 0b010, 0b0010000);
INSN(sh2add, 0b0110011, 0b100, 0b0010000);
INSN(sh3add, 0b0110011, 0b110, 0b0010000);
INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000);
INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000);
INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000);
INSN(andn, 0b0110011, 0b111, 0b0100000);
INSN(orn, 0b0110011, 0b110, 0b0100000);
INSN(xnor, 0b0110011, 0b100, 0b0100000);
INSN(max, 0b0110011, 0b110, 0b0000101);
INSN(maxu, 0b0110011, 0b111, 0b0000101);
INSN(min, 0b0110011, 0b100, 0b0000101);
INSN(minu, 0b0110011, 0b101, 0b0000101);
#undef INSN
#define INSN(NAME, op, funct3, funct12) \
void NAME(Register Rd, Register Rs1) { \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 31, 20, funct12); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(brev8, 0b0010011, 0b101, 0b011010000111);
INSN(rev8, 0b0010011, 0b101, 0b011010111000);
INSN(_sext_b, 0b0010011, 0b001, 0b011000000100);
INSN(_sext_h, 0b0010011, 0b001, 0b011000000101);
INSN(_zext_h, 0b0111011, 0b100, 0b000010000000);
INSN(clz, 0b0010011, 0b001, 0b011000000000);
INSN(clzw, 0b0011011, 0b001, 0b011000000000);
INSN(ctz, 0b0010011, 0b001, 0b011000000001);
INSN(ctzw, 0b0011011, 0b001, 0b011000000001);
INSN(cpop, 0b0010011, 0b001, 0b011000000010);
INSN(cpopw, 0b0011011, 0b001, 0b011000000010);
INSN(orc_b, 0b0010011, 0b101, 0b001010000111);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(Register Rd, Register Rs1, unsigned shamt) {\
guarantee(shamt <= 0x3f, "Shamt is invalid"); \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 25, 20, shamt); \
patch((address)&insn, 31, 26, funct6); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(rori, 0b0010011, 0b101, 0b011000);
INSN(slli_uw, 0b0011011, 0b001, 0b000010);
INSN(bexti, 0b0010011, 0b101, 0b010010);
#undef INSN
#define INSN(NAME, op, funct3, funct7) \
void NAME(Register Rd, Register Rs1, unsigned shamt) {\
guarantee(shamt <= 0x1f, "Shamt is invalid"); \
unsigned insn = 0; \
patch((address)&insn, 6, 0, op); \
patch((address)&insn, 14, 12, funct3); \
patch((address)&insn, 24, 20, shamt); \
patch((address)&insn, 31, 25, funct7); \
patch_reg((address)&insn, 7, Rd); \
patch_reg((address)&insn, 15, Rs1); \
emit(insn); \
}
INSN(roriw, 0b0011011, 0b101, 0b0110000);
#undef INSN
// ========================================
// RISC-V Compressed Instructions Extension
// ========================================
// Note:
// 1. Assembler functions encoding 16-bit compressed instructions always begin with a 'c_'
// prefix, such as 'c_add'. Correspondingly, assembler functions encoding normal 32-bit
// instructions with begin with a '_' prefix, such as "_add". Most of time users have no
// need to explicitly emit these compressed instructions. Instead, they still use unified
// wrappers such as 'add' which do the compressing work through 'c_add' depending on the
// the operands of the instruction and availability of the RVC hardware extension.
//
// 2. 'CompressibleScope' and 'IncompressibleScope' are introduced to mark assembler scopes
// within which instructions are qualified or unqualified to be compressed into their 16-bit
// versions. An example:
//
// CompressibleScope scope(_masm);
// __ add(...); // this instruction will be compressed into 'c.add' when possible
// {
// IncompressibleScope scope(_masm);
// __ add(...); // this instruction will not be compressed
// {
// CompressibleScope scope(_masm);
// __ add(...); // this instruction will be compressed into 'c.add' when possible
// }
// }
//
// 3. When printing JIT assembly code, using -XX:PrintAssemblyOptions=no-aliases could help
// distinguish compressed 16-bit instructions from normal 32-bit ones.
private:
bool _in_compressible_scope;
public:
bool in_compressible_scope() const { return _in_compressible_scope; }
void set_in_compressible_scope(bool b) { _in_compressible_scope = b; }
public:
// An abstract compressible scope
class AbstractCompressibleScope : public StackObj {
protected:
Assembler *_masm;
bool _saved_in_compressible_scope;
protected:
AbstractCompressibleScope(Assembler *_masm)
: _masm(_masm)
, _saved_in_compressible_scope(_masm->in_compressible_scope()) {}
};
// A compressible scope
class CompressibleScope : public AbstractCompressibleScope {
public:
CompressibleScope(Assembler *_masm) : AbstractCompressibleScope(_masm) {
_masm->set_in_compressible_scope(true);
}
~CompressibleScope() {
_masm->set_in_compressible_scope(_saved_in_compressible_scope);
}
};
// An incompressible scope
class IncompressibleScope : public AbstractCompressibleScope {
public:
IncompressibleScope(Assembler *_masm) : AbstractCompressibleScope(_masm) {
_masm->set_in_compressible_scope(false);
}
~IncompressibleScope() {
_masm->set_in_compressible_scope(_saved_in_compressible_scope);
}
};
public:
// Emit a relocation.
void relocate(RelocationHolder const& rspec, int format = 0) {
AbstractAssembler::relocate(rspec, format);
}
void relocate(relocInfo::relocType rtype, int format = 0) {
AbstractAssembler::relocate(rtype, format);
}
template <typename Callback>
void relocate(RelocationHolder const& rspec, Callback emit_insts, int format = 0) {
AbstractAssembler::relocate(rspec, format);
IncompressibleScope scope(this); // relocations
emit_insts();
}
template <typename Callback>
void relocate(relocInfo::relocType rtype, Callback emit_insts, int format = 0) {
AbstractAssembler::relocate(rtype, format);
IncompressibleScope scope(this); // relocations
emit_insts();
}
// patch a 16-bit instruction.
static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) {
assert_cond(a != nullptr);
assert_cond(msb >= lsb && msb <= 15);
unsigned nbits = msb - lsb + 1;
guarantee(val < (1U << nbits), "Field too big for insn");
uint16_t mask = (1U << nbits) - 1;
val <<= lsb;
mask <<= lsb;
uint16_t target = ld_c_instr(a);
target &= ~mask;
target |= val;
sd_c_instr(a, target);
}
static void c_patch(address a, unsigned bit, uint16_t val) {
c_patch(a, bit, bit, val);
}
// patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits)
static void c_patch_reg(address a, unsigned lsb, Register reg) {
c_patch(a, lsb + 4, lsb, reg->raw_encoding());
}
// patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits)
static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) {
c_patch(a, lsb + 2, lsb, reg->compressed_raw_encoding());
}
// patch a 16-bit instruction with a float register ranging [0, 31] (5 bits)
static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) {
c_patch(a, lsb + 4, lsb, reg->raw_encoding());
}
// patch a 16-bit instruction with a float register ranging [8, 15] (3 bits)
static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) {
c_patch(a, lsb + 2, lsb, reg->compressed_raw_encoding());
}
// -------------- RVC Instruction Definitions --------------
void c_nop() {
c_addi(x0, 0);
}
#define INSN(NAME, funct3, op) \
void NAME(Register Rd_Rs1, int64_t imm) { \
assert_cond(is_simm6(imm)); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \
c_patch_reg((address)&insn, 7, Rd_Rs1); \
c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_addi, 0b000, 0b01);
INSN(c_addiw, 0b001, 0b01);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(int64_t imm) { \
assert_cond(is_simm10(imm)); \
assert_cond((imm & 0b1111) == 0); \
assert_cond(imm != 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \
c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \
c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \
c_patch_reg((address)&insn, 7, sp); \
c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_addi16sp, 0b011, 0b01);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd, uint64_t uimm) { \
assert_cond(is_uimm10(uimm)); \
assert_cond((uimm & 0b11) == 0); \
assert_cond(uimm != 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_compressed_reg((address)&insn, 2, Rd); \
c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \
c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \
c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \
c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_addi4spn, 0b000, 0b00);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd_Rs1, uint32_t shamt) { \
assert_cond(is_uimm6(shamt)); \
assert_cond(shamt != 0); \
assert_cond(Rd_Rs1 != x0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \
c_patch_reg((address)&insn, 7, Rd_Rs1); \
c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_slli, 0b000, 0b10);
#undef INSN
#define INSN(NAME, funct3, funct2, op) \
void NAME(Register Rd_Rs1, uint32_t shamt) { \
assert_cond(is_uimm6(shamt)); \
assert_cond(shamt != 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \
c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \
c_patch((address)&insn, 11, 10, funct2); \
c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_srli, 0b100, 0b00, 0b01);
INSN(c_srai, 0b100, 0b01, 0b01);
#undef INSN
#define INSN(NAME, funct3, funct2, op) \
void NAME(Register Rd_Rs1, int64_t imm) { \
assert_cond(is_simm6(imm)); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \
c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \
c_patch((address)&insn, 11, 10, funct2); \
c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_andi, 0b100, 0b10, 0b01);
#undef INSN
#define INSN(NAME, funct6, funct2, op) \
void NAME(Register Rd_Rs1, Register Rs2) { \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_compressed_reg((address)&insn, 2, Rs2); \
c_patch((address)&insn, 6, 5, funct2); \
c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \
c_patch((address)&insn, 15, 10, funct6); \
emit_int16(insn); \
}
INSN(c_sub, 0b100011, 0b00, 0b01);
INSN(c_xor, 0b100011, 0b01, 0b01);
INSN(c_or, 0b100011, 0b10, 0b01);
INSN(c_and, 0b100011, 0b11, 0b01);
INSN(c_subw, 0b100111, 0b00, 0b01);
INSN(c_addw, 0b100111, 0b01, 0b01);
#undef INSN
#define INSN(NAME, funct4, op) \
void NAME(Register Rd_Rs1, Register Rs2) { \
assert_cond(Rd_Rs1 != x0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_reg((address)&insn, 2, Rs2); \
c_patch_reg((address)&insn, 7, Rd_Rs1); \
c_patch((address)&insn, 15, 12, funct4); \
emit_int16(insn); \
}
INSN(c_mv, 0b1000, 0b10);
INSN(c_add, 0b1001, 0b10);
#undef INSN
private:
// All calls and jumps must go via MASM.
// Format CR, c.jr/c.jalr
// Note C instruction can't be changed, i.e. relocation patching.
template <uint8_t InstructionType, uint8_t FunctionType>
void c_cr_if(Register Rs1) {
assert_cond(Rs1 != x0);
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, FunctionType);
c_patch_reg((address)&insn, 2, x0);
c_patch_reg((address)&insn, 7, Rs1);
c_patch((address)&insn, 15, 12, InstructionType);
emit_int16(insn);
}
void c_jr(Register Rs1) { c_cr_if<0b1000, 0b10>(Rs1); }
void c_jalr(Register Rs1) { c_cr_if<0b1001, 0b10>(Rs1); }
typedef void (Assembler::* j_c_insn)(address dest);
typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
void wrap_label(Label &L, j_c_insn insn) {
if (L.is_bound()) {
(this->*insn)(target(L));
} else {
L.add_patch_at(code(), locator());
(this->*insn)(pc());
}
}
void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) {
if (L.is_bound()) {
(this->*insn)(r, target(L));
} else {
L.add_patch_at(code(), locator());
(this->*insn)(r, pc());
}
}
// Format CJ, c.j (c.jal)
// Note C instruction can't be changed, i.e. relocation patching.
void c_j(int32_t offset) {
assert(is_simm12(offset) && ((offset % 2) == 0), "invalid encoding");
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b01);
c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);
c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);
c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);
c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);
c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);
c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);
c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);
c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);
c_patch((address)&insn, 15, 13, 0b101);
emit_int16(insn);
}
void c_j(address dest) {
assert_cond(dest != nullptr);
int64_t distance = dest - pc();
assert(is_simm12(distance) && ((distance % 2) == 0), "invalid encoding");
c_j(distance);
}
void c_j(Label &L) {
wrap_label(L, &Assembler::c_j);
}
public:
#define INSN(NAME, funct3, op) \
void NAME(Register Rs1, int32_t imm) { \
assert(is_simm9(imm) && ((imm % 2) == 0), "invalid encoding"); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \
c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \
c_patch_compressed_reg((address)&insn, 7, Rs1); \
c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \
c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
} \
void NAME(Register Rs1, address dest) { \
assert_cond(dest != nullptr); \
int64_t distance = dest - pc(); \
assert(is_simm9(distance) && ((distance % 2) == 0), "invalid encoding"); \
NAME(Rs1, distance); \
} \
void NAME(Register Rs1, Label &L) { \
wrap_label(L, Rs1, &Assembler::NAME); \
}
INSN(c_beqz, 0b110, 0b01);
INSN(c_bnez, 0b111, 0b01);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd, int32_t imm) { \
assert_cond(is_simm18(imm)); \
assert_cond((imm & 0xfff) == 0); \
assert_cond(imm != 0); \
assert_cond(Rd != x0 && Rd != x2); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \
c_patch_reg((address)&insn, 7, Rd); \
c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_lui, 0b011, 0b01);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd, int32_t imm) { \
assert_cond(is_simm6(imm)); \
assert_cond(Rd != x0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \
c_patch_reg((address)&insn, 7, Rd); \
c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_li, 0b010, 0b01);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd, uint32_t uimm) { \
assert_cond(is_uimm9(uimm)); \
assert_cond((uimm & 0b111) == 0); \
assert_cond(Rd != x0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \
c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \
c_patch_reg((address)&insn, 7, Rd); \
c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_ldsp, 0b011, 0b10);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(FloatRegister Rd, uint32_t uimm) { \
assert_cond(is_uimm9(uimm)); \
assert_cond((uimm & 0b111) == 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \
c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \
c_patch_reg((address)&insn, 7, Rd); \
c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_fldsp, 0b001, 0b10);
#undef INSN
#define INSN(NAME, funct3, op, REGISTER_TYPE) \
void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \
assert_cond(is_uimm8(uimm)); \
assert_cond((uimm & 0b111) == 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \
c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \
c_patch_compressed_reg((address)&insn, 7, Rs1); \
c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_ld, 0b011, 0b00, Register);
INSN(c_sd, 0b111, 0b00, Register);
INSN(c_fld, 0b001, 0b00, FloatRegister);
INSN(c_fsd, 0b101, 0b00, FloatRegister);
#undef INSN
#define INSN(NAME, funct3, op, REGISTER_TYPE) \
void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \
assert_cond(is_uimm9(uimm)); \
assert_cond((uimm & 0b111) == 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_reg((address)&insn, 2, Rs2); \
c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \
c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_sdsp, 0b111, 0b10, Register);
INSN(c_fsdsp, 0b101, 0b10, FloatRegister);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rs2, uint32_t uimm) { \
assert_cond(is_uimm8(uimm)); \
assert_cond((uimm & 0b11) == 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_reg((address)&insn, 2, Rs2); \
c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \
c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_swsp, 0b110, 0b10);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd, uint32_t uimm) { \
assert_cond(is_uimm8(uimm)); \
assert_cond((uimm & 0b11) == 0); \
assert_cond(Rd != x0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \
c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \
c_patch_reg((address)&insn, 7, Rd); \
c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_lwsp, 0b010, 0b10);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \
assert_cond(is_uimm7(uimm)); \
assert_cond((uimm & 0b11) == 0); \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \
c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \
c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \
c_patch_compressed_reg((address)&insn, 7, Rs1); \
c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_lw, 0b010, 0b00);
INSN(c_sw, 0b110, 0b00);
#undef INSN
#define INSN(NAME, funct3, op) \
void NAME() { \
uint16_t insn = 0; \
c_patch((address)&insn, 1, 0, op); \
c_patch((address)&insn, 11, 2, 0x0); \
c_patch((address)&insn, 12, 12, 0b1); \
c_patch((address)&insn, 15, 13, funct3); \
emit_int16(insn); \
}
INSN(c_ebreak, 0b100, 0b10);
#undef INSN
// -------------- RVC Transformation Functions --------------
// --------------------------
// Register instructions
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, Register Rs1, Register Rs2) { \
/* add -> c.add */ \
if (do_compress()) { \
Register src = noreg; \
if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \
c_add(Rd, src); \
return; \
} \
} \
_add(Rd, Rs1, Rs2); \
}
INSN(add);
#undef INSN
// --------------------------
#define INSN(NAME, C_NAME, NORMAL_NAME) \
void NAME(Register Rd, Register Rs1, Register Rs2) { \
/* sub/subw -> c.sub/c.subw */ \
if (do_compress() && \
(Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \
C_NAME(Rd, Rs2); \
return; \
} \
NORMAL_NAME(Rd, Rs1, Rs2); \
}
INSN(sub, c_sub, _sub);
INSN(subw, c_subw, _subw);
#undef INSN
// --------------------------
#define INSN(NAME, C_NAME, NORMAL_NAME) \
void NAME(Register Rd, Register Rs1, Register Rs2) { \
/* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \
if (do_compress()) { \
Register src = noreg; \
if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \
((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \
C_NAME(Rd, src); \
return; \
} \
} \
NORMAL_NAME(Rd, Rs1, Rs2); \
}
INSN(andr, c_and, _andr);
INSN(orr, c_or, _orr);
INSN(xorr, c_xor, _xorr);
INSN(addw, c_addw, _addw);
#undef INSN
private:
// some helper functions
#define FUNC(NAME, funct3, bits) \
bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \
return rs1 == sp && \
is_uimm(imm12, bits) && \
(intx(imm12) & funct3) == 0x0 && \
(!ld || rd_rs2 != x0); \
} \
FUNC(is_c_ldsdsp, 0b111, 9);
FUNC(is_c_lwswsp, 0b011, 8);
#undef FUNC
#define FUNC(NAME, funct3, bits) \
bool NAME(Register rs1, int32_t imm12) { \
return rs1 == sp && \
is_uimm(imm12, bits) && \
(intx(imm12) & funct3) == 0x0; \
} \
FUNC(is_c_fldsdsp, 0b111, 9);
#undef FUNC
#define FUNC(NAME, REG_TYPE, funct3, bits) \
bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \
return rs1->is_compressed_valid() && \
rd_rs2->is_compressed_valid() && \
is_uimm(imm12, bits) && \
(intx(imm12) & funct3) == 0x0; \
} \
FUNC(is_c_ldsd, Register, 0b111, 8);
FUNC(is_c_lwsw, Register, 0b011, 7);
FUNC(is_c_fldsd, FloatRegister, 0b111, 8);
#undef FUNC
public:
bool do_compress() const {
return UseRVC && in_compressible_scope();
}
bool do_compress_zcb(Register reg1 = noreg, Register reg2 = noreg) const {
return do_compress() && UseZcb &&
(reg1 == noreg || reg1->is_compressed_valid()) && (reg2 == noreg || reg2->is_compressed_valid());
}
bool do_compress_zcb_zbb(Register reg1 = noreg, Register reg2 = noreg) const {
return do_compress_zcb(reg1, reg2) && UseZbb;
}
// --------------------------
// Load/store register
// --------------------------
void lw(Register Rd, Register Rs, const int32_t offset) {
/* lw -> c.lwsp/c.lw */
if (do_compress()) {
if (is_c_lwswsp(Rs, Rd, offset, true)) {
c_lwsp(Rd, offset);
return;
} else if (is_c_lwsw(Rs, Rd, offset)) {
c_lw(Rd, Rs, offset);
return;
}
}
_lw(Rd, Rs, offset);
}
// --------------------------
void ld(Register Rd, Register Rs, const int32_t offset) {
/* ld -> c.ldsp/c.ld */
if (do_compress()) {
if (is_c_ldsdsp(Rs, Rd, offset, true)) {
c_ldsp(Rd, offset);
return;
} else if (is_c_ldsd(Rs, Rd, offset)) {
c_ld(Rd, Rs, offset);
return;
}
}
_ld(Rd, Rs, offset);
}
// --------------------------
void fld(FloatRegister Rd, Register Rs, const int32_t offset) {
/* fld -> c.fldsp/c.fld */
if (do_compress()) {
if (is_c_fldsdsp(Rs, offset)) {
c_fldsp(Rd, offset);
return;
} else if (is_c_fldsd(Rs, Rd, offset)) {
c_fld(Rd, Rs, offset);
return;
}
}
_fld(Rd, Rs, offset);
}
// --------------------------
void sd(Register Rs2, Register Rs1, const int32_t offset) {
/* sd -> c.sdsp/c.sd */
if (do_compress()) {
if (is_c_ldsdsp(Rs1, Rs2, offset, false)) {
c_sdsp(Rs2, offset);
return;
} else if (is_c_ldsd(Rs1, Rs2, offset)) {
c_sd(Rs2, Rs1, offset);
return;
}
}
_sd(Rs2, Rs1, offset);
}
// --------------------------
void sw(Register Rs2, Register Rs1, const int32_t offset) {
/* sw -> c.swsp/c.sw */
if (do_compress()) {
if (is_c_lwswsp(Rs1, Rs2, offset, false)) {
c_swsp(Rs2, offset);
return;
} else if (is_c_lwsw(Rs1, Rs2, offset)) {
c_sw(Rs2, Rs1, offset);
return;
}
}
_sw(Rs2, Rs1, offset);
}
// --------------------------
void fsd(FloatRegister Rs2, Register Rs1, const int32_t offset) {
/* fsd -> c.fsdsp/c.fsd */
if (do_compress()) {
if (is_c_fldsdsp(Rs1, offset)) {
c_fsdsp(Rs2, offset);
return;
} else if (is_c_fldsd(Rs1, Rs2, offset)) {
c_fsd(Rs2, Rs1, offset);
return;
}
}
_fsd(Rs2, Rs1, offset);
}
// --------------------------
// Unconditional branch instructions
// --------------------------
protected:
// All calls and jumps must go via MASM. Only use x1 (aka ra) as link register for now.
void jalr(Register Rd, Register Rs, const int32_t offset) {
assert(Rd != x5 && Rs != x5, "Register x5 must not be used for calls/jumps.");
/* jalr -> c.jr/c.jalr */
if (do_compress() && (offset == 0 && Rs != x0)) {
if (Rd == x1) {
c_jalr(Rs);
return;
} else if (Rd == x0) {
c_jr(Rs);
return;
}
}
_jalr(Rd, Rs, offset);
}
void jal(Register Rd, const int32_t offset) {
assert(Rd != x5, "Register x5 must not be used for calls/jumps.");
/* jal -> c.j, note c.jal is RV32C only */
if (do_compress() &&
Rd == x0 &&
is_simm12(offset) && ((offset % 2) == 0)) {
c_j(offset);
return;
}
_jal(Rd, offset);
}
public:
// --------------------------
// Miscellaneous Instructions
// --------------------------
#define INSN(NAME) \
void NAME() { \
/* ebreak -> c.ebreak */ \
if (do_compress()) { \
c_ebreak(); \
return; \
} \
_ebreak(); \
}
INSN(ebreak);
#undef INSN
// --------------------------
// Immediate Instructions
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, Register Rs1, int64_t imm) { \
/* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \
if (do_compress()) { \
if (Rd == Rs1 && is_simm6(imm)) { \
c_addi(Rd, imm); \
return; \
} else if (imm == 0 && Rd != x0 && Rs1 != x0) { \
c_mv(Rd, Rs1); \
return; \
} else if (Rs1 == sp && imm != 0) { \
if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_simm10(imm)) { \
c_addi16sp(imm); \
return; \
} else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_uimm10(imm)) { \
c_addi4spn(Rd, imm); \
return; \
} \
} \
} \
_addi(Rd, Rs1, imm); \
}
INSN(addi);
#undef INSN
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, Register Rs1, int64_t imm) { \
/* addiw -> c.addiw */ \
if (do_compress() && (Rd == Rs1 && Rd != x0 && is_simm6(imm))) { \
c_addiw(Rd, imm); \
return; \
} \
_addiw(Rd, Rs1, imm); \
}
INSN(addiw);
#undef INSN
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, Register Rs1, int64_t imm) { \
/* andi -> c.andi */ \
if (do_compress() && \
(Rd == Rs1 && Rd->is_compressed_valid() && is_simm6(imm))) { \
c_andi(Rd, imm); \
return; \
} \
_andi(Rd, Rs1, imm); \
}
INSN(andi);
#undef INSN
// --------------------------
// Shift Immediate Instructions
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, Register Rs1, unsigned shamt) { \
/* slli -> c.slli */ \
if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \
c_slli(Rd, shamt); \
return; \
} \
if (shamt != 0) { \
_slli(Rd, Rs1, shamt); \
} else { \
if (Rd != Rs1) { \
addi(Rd, Rs1, 0); \
} \
} \
}
INSN(slli);
#undef INSN
// --------------------------
#define INSN(NAME, C_NAME, NORMAL_NAME) \
void NAME(Register Rd, Register Rs1, unsigned shamt) { \
/* srai/srli -> c.srai/c.srli */ \
if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \
C_NAME(Rd, shamt); \
return; \
} \
if (shamt != 0) { \
NORMAL_NAME(Rd, Rs1, shamt); \
} else { \
if (Rd != Rs1) { \
addi(Rd, Rs1, 0); \
} \
} \
}
INSN(srai, c_srai, _srai);
INSN(srli, c_srli, _srli);
#undef INSN
// --------------------------
// Upper Immediate Instruction
// --------------------------
void lui(Register Rd, int32_t imm) {
/* lui -> c.lui */
if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_simm18(imm))) {
c_lui(Rd, imm);
return;
}
_lui(Rd, imm);
}
// Cache Management Operations
// These instruction may be turned off for user space.
private:
enum CBO_FUNCT : unsigned int {
CBO_INVAL = 0b0000000000000,
CBO_CLEAN = 0b0000000000001,
CBO_FLUSH = 0b0000000000010,
CBO_ZERO = 0b0000000000100
};
template <CBO_FUNCT FUNCT>
void cbo_base(Register Rs1) {
assert((UseZicbom && FUNCT != CBO_ZERO) || UseZicboz, "sanity");
unsigned insn = 0;
patch((address)&insn, 6, 0, 0b0001111);
patch((address)&insn, 14, 12, 0b010);
patch_reg((address)&insn, 15, Rs1);
patch((address)&insn, 31, 20, FUNCT);
emit(insn);
}
// This instruction have some security implication.
// At this time it's not likely to be enabled for user mode.
void cbo_inval(Register Rs1) { cbo_base<CBO_INVAL>(Rs1); }
public:
// Zicbom
void cbo_clean(Register Rs1) { cbo_base<CBO_CLEAN>(Rs1); }
void cbo_flush(Register Rs1) { cbo_base<CBO_FLUSH>(Rs1); }
// Zicboz
void cbo_zero(Register Rs1) { cbo_base<CBO_ZERO>(Rs1); }
private:
enum PREFETCH_FUNCT : unsigned int {
PREFETCH_I = 0b0000000000000,
PREFETCH_R = 0b0000000000001,
PREFETCH_W = 0b0000000000011
};
template <PREFETCH_FUNCT FUNCT>
void prefetch_base(Register Rs1, int32_t offset) {
assert_cond(UseZicbop);
guarantee((offset & 0x1f) == 0, "offset lowest 5 bits must be zero");
int32_t upperOffset = offset >> 5;
unsigned insn = 0;
patch((address)&insn, 6, 0, 0b0010011);
patch((address)&insn, 14, 12, 0b110);
patch_reg((address)&insn, 15, Rs1);
patch((address)&insn, 24, 20, FUNCT);
upperOffset &= 0x7f;
patch((address)&insn, 31, 25, upperOffset);
emit(insn);
}
public:
// Zicbop
void prefetch_i(Register Rs1, int32_t offset) { prefetch_base<PREFETCH_I>(Rs1, offset); }
void prefetch_r(Register Rs1, int32_t offset) { prefetch_base<PREFETCH_R>(Rs1, offset); }
void prefetch_w(Register Rs1, int32_t offset) { prefetch_base<PREFETCH_W>(Rs1, offset); }
// -------------- Zicond Instruction Definitions --------------
// Zicond conditional operations extension
private:
enum CZERO_OP : unsigned int {
CZERO_NEZ = 0b111,
CZERO_EQZ = 0b101
};
template <CZERO_OP OP_VALUE>
void czero(Register Rd, Register Rs1, Register Rs2) {
assert_cond(UseZicond);
uint32_t insn = 0;
patch ((address)&insn, 6, 0, 0b0110011); // bits: 7, name: 0x33, attr: ['OP']
patch_reg((address)&insn, 7, Rd); // bits: 5, name: 'rd'
patch ((address)&insn, 14, 12, OP_VALUE); // bits: 3, name: 0x7, attr: ['CZERO.NEZ'] / 0x5, attr: ['CZERO.EQZ']}
patch_reg((address)&insn, 15, Rs1); // bits: 5, name: 'rs1', attr: ['value']
patch_reg((address)&insn, 20, Rs2); // bits: 5, name: 'rs2', attr: ['condition']
patch ((address)&insn, 31, 25, 0b0000111); // bits: 7, name: 0x7, attr: ['CZERO']
emit_int32(insn);
}
public:
// Moves zero to a register rd, if the condition rs2 is equal to zero, otherwise moves rs1 to rd.
void czero_eqz(Register rd, Register rs1_value, Register rs2_condition) {
czero<CZERO_EQZ>(rd, rs1_value, rs2_condition);
}
// Moves zero to a register rd, if the condition rs2 is nonzero, otherwise moves rs1 to rd.
void czero_nez(Register rd, Register rs1_value, Register rs2_condition) {
czero<CZERO_NEZ>(rd, rs1_value, rs2_condition);
}
// -------------- ZCB Instruction Definitions --------------
// Zcb additional C instructions
private:
// Format CLH, c.lh/c.lhu
template <bool Unsigned>
void c_lh_if(Register Rd_Rs2, Register Rs1, uint32_t uimm) {
assert_cond(uimm == 0 || uimm == 2);
assert_cond(do_compress_zcb(Rd_Rs2, Rs1));
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b00);
c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);
c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1);
c_patch((address)&insn, 6, 6, Unsigned ? 0 : 1);
c_patch_compressed_reg((address)&insn, 7, Rs1);
c_patch((address)&insn, 12, 10, 0b001);
c_patch((address)&insn, 15, 13, 0b100);
emit_int16(insn);
}
template <bool Unsigned>
void lh_c_mux(Register Rd_Rs2, Register Rs1, const int32_t uimm) {
if (do_compress_zcb(Rd_Rs2, Rs1) &&
(uimm == 0 || uimm == 2)) {
c_lh_if<Unsigned>(Rd_Rs2, Rs1, uimm);
} else {
if (Unsigned) {
_lhu(Rd_Rs2, Rs1, uimm);
} else {
_lh(Rd_Rs2, Rs1, uimm);
}
}
}
// Format CU, c.[sz]ext.*, c.not
template <uint8_t InstructionType>
void c_u_if(Register Rs1) {
assert_cond(do_compress_zcb(Rs1));
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b01);
c_patch((address)&insn, 4, 2, InstructionType);
c_patch((address)&insn, 6, 5, 0b11);
c_patch_compressed_reg((address)&insn, 7, Rs1);
c_patch((address)&insn, 12, 10, 0b111);
c_patch((address)&insn, 15, 13, 0b100);
emit_int16(insn);
}
public:
// Prerequisites: Zcb
void c_lh(Register Rd_Rs2, Register Rs1, const int32_t uimm) { c_lh_if<false>(Rd_Rs2, Rs1, uimm); }
void lh(Register Rd_Rs2, Register Rs1, const int32_t uimm) { lh_c_mux<false>(Rd_Rs2, Rs1, uimm); }
// Prerequisites: Zcb
void c_lhu(Register Rd_Rs2, Register Rs1, const int32_t uimm) { c_lh_if<true>(Rd_Rs2, Rs1, uimm); }
void lhu(Register Rd_Rs2, Register Rs1, const int32_t uimm) { lh_c_mux<true>(Rd_Rs2, Rs1, uimm); }
// Prerequisites: Zcb
// Format CLB, single instruction
void c_lbu(Register Rd_Rs2, Register Rs1, uint32_t uimm) {
assert_cond(uimm <= 3);
assert_cond(do_compress_zcb(Rd_Rs2, Rs1));
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b00);
c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);
c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1);
c_patch((address)&insn, 6, 6, (uimm & nth_bit(0)) >> 0);
c_patch_compressed_reg((address)&insn, 7, Rs1);
c_patch((address)&insn, 12, 10, 0b000);
c_patch((address)&insn, 15, 13, 0b100);
emit_int16(insn);
}
void lbu(Register Rd_Rs2, Register Rs1, const int32_t uimm) {
if (do_compress_zcb(Rd_Rs2, Rs1) &&
uimm >= 0 && uimm <= 3) {
c_lbu(Rd_Rs2, Rs1, uimm);
} else {
_lbu(Rd_Rs2, Rs1, uimm);
}
}
// Prerequisites: Zcb
// Format CSB, single instruction
void c_sb(Register Rd_Rs2, Register Rs1, uint32_t uimm) {
assert_cond(uimm <= 3);
assert_cond(do_compress_zcb(Rd_Rs2, Rs1));
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b00);
c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);
c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1);
c_patch((address)&insn, 6, 6, (uimm & nth_bit(0)) >> 0);
c_patch_compressed_reg((address)&insn, 7, Rs1);
c_patch((address)&insn, 12, 10, 0b010);
c_patch((address)&insn, 15, 13, 0b100);
emit_int16(insn);
}
void sb(Register Rd_Rs2, Register Rs1, const int32_t uimm) {
if (do_compress_zcb(Rd_Rs2, Rs1) &&
uimm >= 0 && uimm <= 3) {
c_sb(Rd_Rs2, Rs1, uimm);
} else {
_sb(Rd_Rs2, Rs1, uimm);
}
}
// Prerequisites: Zcb
// Format CSH, single instruction
void c_sh(Register Rd_Rs2, Register Rs1, uint32_t uimm) {
assert_cond(uimm == 0 || uimm == 2);
assert_cond(do_compress_zcb(Rd_Rs2, Rs1));
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b00);
c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);
c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1);
c_patch((address)&insn, 6, 6, 0);
c_patch_compressed_reg((address)&insn, 7, Rs1);
c_patch((address)&insn, 12, 10, 0b011);
c_patch((address)&insn, 15, 13, 0b100);
emit_int16(insn);
}
void sh(Register Rd_Rs2, Register Rs1, const int32_t uimm) {
if (do_compress_zcb(Rd_Rs2, Rs1) &&
(uimm == 0 || uimm == 2)) {
c_sh(Rd_Rs2, Rs1, uimm);
} else {
_sh(Rd_Rs2, Rs1, uimm);
}
}
// Prerequisites: Zcb
// Format CS
void c_zext_b(Register Rs1) {
assert_cond(do_compress_zcb(Rs1));
c_u_if<0b000>(Rs1);
}
// Prerequisites: Zbb
void sext_b(Register Rd_Rs2, Register Rs1) {
assert_cond(UseZbb);
if (do_compress_zcb_zbb(Rd_Rs2, Rs1) && (Rd_Rs2 == Rs1)) {
c_sext_b(Rd_Rs2);
} else {
_sext_b(Rd_Rs2, Rs1);
}
}
// Prerequisites: Zcb, Zbb
// Format CS
void c_sext_b(Register Rs1) {
c_u_if<0b001>(Rs1);
}
// Prerequisites: Zbb
void zext_h(Register Rd_Rs2, Register Rs1) {
assert_cond(UseZbb);
if (do_compress_zcb_zbb(Rd_Rs2, Rs1) && (Rd_Rs2 == Rs1)) {
c_zext_h(Rd_Rs2);
} else {
_zext_h(Rd_Rs2, Rs1);
}
}
// Prerequisites: Zcb, Zbb
// Format CS
void c_zext_h(Register Rs1) {
c_u_if<0b010>(Rs1);
}
// Prerequisites: Zbb
void sext_h(Register Rd_Rs2, Register Rs1) {
assert_cond(UseZbb);
if (do_compress_zcb_zbb(Rd_Rs2, Rs1) && (Rd_Rs2 == Rs1)) {
c_sext_h(Rd_Rs2);
} else {
_sext_h(Rd_Rs2, Rs1);
}
}
// Prerequisites: Zcb, Zbb
// Format CS
void c_sext_h(Register Rs1) {
c_u_if<0b011>(Rs1);
}
// Prerequisites: Zcb, Zba
// Format CS
void c_zext_w(Register Rs1) {
c_u_if<0b100>(Rs1);
}
// Prerequisites: Zcb
// Format CS
void c_not(Register Rs1) {
c_u_if<0b101>(Rs1);
}
// Prerequisites: Zcb (M or Zmmul)
// Format CA, c.mul
void c_mul(Register Rd_Rs1, Register Rs2) {
uint16_t insn = 0;
c_patch((address)&insn, 1, 0, 0b01);
c_patch_compressed_reg((address)&insn, 2, Rs2);
c_patch((address)&insn, 6, 5, 0b10);
c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);
c_patch((address)&insn, 12, 10, 0b111);
c_patch((address)&insn, 15, 13, 0b100);
emit_int16(insn);
}
void mul(Register Rd, Register Rs1, Register Rs2) {
if (Rd != Rs1 && Rd != Rs2) {
// Three registers needed without a mv, emit uncompressed
_mul(Rd, Rs1, Rs2);
return;
}
// Rd is either Rs1 or Rs2
if (!do_compress_zcb(Rs2, Rs1)) {
_mul(Rd, Rs1, Rs2);
} else {
if (Rd == Rs2) {
Rs2 = Rs1;
} else {
assert(Rd == Rs1, "must be");
}
c_mul(Rd, Rs2);
}
}
// Stack overflow checking
virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
static bool is_simm5(int64_t x);
static bool is_simm6(int64_t x);
static bool is_simm12(int64_t x);
static bool is_simm13(int64_t x);
static bool is_simm18(int64_t x);
static bool is_simm21(int64_t x);
static bool is_uimm2(uint64_t x);
static bool is_uimm3(uint64_t x);
static bool is_uimm5(uint64_t x);
static bool is_uimm6(uint64_t x);
static bool is_uimm7(uint64_t x);
static bool is_uimm8(uint64_t x);
static bool is_uimm9(uint64_t x);
static bool is_uimm10(uint64_t x);
// The maximum range of a branch is fixed for the RISCV architecture.
static const unsigned long branch_range = 1 * M;
static bool reachable_from_branch_at(address branch, address target) {
return g_uabs(target - branch) < branch_range;
}
// Decode the given instruction, checking if it's a 16-bit compressed
// instruction and return the address of the next instruction.
static address locate_next_instruction(address inst) {
// Instruction wider than 16 bits has the two least-significant bits set.
if ((0x3 & *inst) == 0x3) {
return inst + instruction_size;
} else {
return inst + compressed_instruction_size;
}
}
Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_scope(true) {}
};
#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP