mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-12 14:39:49 +00:00
8345298: RISC-V: Add riscv backend for Float16 operations - scalar
Reviewed-by: rehn, fyang
This commit is contained in:
parent
6241d09657
commit
a33b1f7f64
@ -1316,6 +1316,7 @@ enum operand_size { int8, int16, int32, uint32, int64 };
|
||||
|
||||
public:
|
||||
|
||||
void flh(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b001>(Rd, Rs, offset); }
|
||||
void flw(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b010>(Rd, Rs, offset); }
|
||||
void _fld(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b011>(Rd, Rs, offset); }
|
||||
|
||||
@ -1397,6 +1398,46 @@ enum operand_size { int8, int16, int32, uint32, int64 };
|
||||
fp_base<H_16_hp, 0b11100>(Rd, Rs1, 0b00000, 0b000);
|
||||
}
|
||||
|
||||
void fadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b00000>(Rd, Rs1, Rs2, rm);
|
||||
}
|
||||
|
||||
void fsub_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b00001>(Rd, Rs1, Rs2, rm);
|
||||
}
|
||||
|
||||
void fmul_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b00010>(Rd, Rs1, Rs2, rm);
|
||||
}
|
||||
|
||||
void fdiv_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b00011>(Rd, Rs1, Rs2, rm);
|
||||
}
|
||||
|
||||
void fsqrt_h(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b01011>(Rd, Rs1, 0b00000, rm);
|
||||
}
|
||||
|
||||
void fmin_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b000);
|
||||
}
|
||||
|
||||
void fmax_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
|
||||
assert_cond(UseZfh);
|
||||
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b001);
|
||||
}
|
||||
|
||||
void fmadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
|
||||
assert_cond(UseZfh);
|
||||
fp_fm<H_16_hp, 0b1000011>(Rd, Rs1, Rs2, Rs3, rm);
|
||||
}
|
||||
|
||||
// -------------- ZFA Instruction Definitions --------------
|
||||
// Zfa Extension for Additional Floating-Point Instructions
|
||||
void _fli_s(FloatRegister Rd, uint8_t Rs1) {
|
||||
|
||||
@ -2159,27 +2159,68 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis
|
||||
|
||||
// Set dst to NaN if any NaN input.
|
||||
void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
|
||||
bool is_double, bool is_min) {
|
||||
FLOAT_TYPE ft, bool is_min) {
|
||||
assert_cond((ft != FLOAT_TYPE::half_precision) || UseZfh);
|
||||
|
||||
Label Done, Compare;
|
||||
|
||||
is_double ? fclass_d(t0, src1)
|
||||
: fclass_s(t0, src1);
|
||||
is_double ? fclass_d(t1, src2)
|
||||
: fclass_s(t1, src2);
|
||||
orr(t0, t0, t1);
|
||||
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
|
||||
beqz(t0, Compare);
|
||||
is_double ? fadd_d(dst, src1, src2)
|
||||
: fadd_s(dst, src1, src2);
|
||||
j(Done);
|
||||
switch (ft) {
|
||||
case FLOAT_TYPE::half_precision:
|
||||
fclass_h(t0, src1);
|
||||
fclass_h(t1, src2);
|
||||
|
||||
bind(Compare);
|
||||
if (is_double) {
|
||||
is_min ? fmin_d(dst, src1, src2)
|
||||
: fmax_d(dst, src1, src2);
|
||||
} else {
|
||||
is_min ? fmin_s(dst, src1, src2)
|
||||
: fmax_s(dst, src1, src2);
|
||||
orr(t0, t0, t1);
|
||||
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
|
||||
beqz(t0, Compare);
|
||||
|
||||
fadd_h(dst, src1, src2);
|
||||
j(Done);
|
||||
|
||||
bind(Compare);
|
||||
if (is_min) {
|
||||
fmin_h(dst, src1, src2);
|
||||
} else {
|
||||
fmax_h(dst, src1, src2);
|
||||
}
|
||||
break;
|
||||
case FLOAT_TYPE::single_precision:
|
||||
fclass_s(t0, src1);
|
||||
fclass_s(t1, src2);
|
||||
|
||||
orr(t0, t0, t1);
|
||||
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
|
||||
beqz(t0, Compare);
|
||||
|
||||
fadd_s(dst, src1, src2);
|
||||
j(Done);
|
||||
|
||||
bind(Compare);
|
||||
if (is_min) {
|
||||
fmin_s(dst, src1, src2);
|
||||
} else {
|
||||
fmax_s(dst, src1, src2);
|
||||
}
|
||||
break;
|
||||
case FLOAT_TYPE::double_precision:
|
||||
fclass_d(t0, src1);
|
||||
fclass_d(t1, src2);
|
||||
|
||||
orr(t0, t0, t1);
|
||||
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
|
||||
beqz(t0, Compare);
|
||||
|
||||
fadd_d(dst, src1, src2);
|
||||
j(Done);
|
||||
|
||||
bind(Compare);
|
||||
if (is_min) {
|
||||
fmin_d(dst, src1, src2);
|
||||
} else {
|
||||
fmax_d(dst, src1, src2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
bind(Done);
|
||||
|
||||
@ -172,9 +172,15 @@
|
||||
}
|
||||
}
|
||||
|
||||
enum class FLOAT_TYPE {
|
||||
half_precision,
|
||||
single_precision,
|
||||
double_precision
|
||||
};
|
||||
|
||||
void minmax_fp(FloatRegister dst,
|
||||
FloatRegister src1, FloatRegister src2,
|
||||
bool is_double, bool is_min);
|
||||
FLOAT_TYPE ft, bool is_min);
|
||||
|
||||
void round_double_mode(FloatRegister dst, FloatRegister src, int round_mode,
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
@ -1080,6 +1080,7 @@ public:
|
||||
} \
|
||||
}
|
||||
|
||||
INSN(flh);
|
||||
INSN(flw);
|
||||
INSN(fld);
|
||||
|
||||
|
||||
@ -1916,7 +1916,19 @@ bool Matcher::match_rule_supported(int opcode) {
|
||||
|
||||
case Op_ConvHF2F:
|
||||
case Op_ConvF2HF:
|
||||
return VM_Version::supports_float16_float_conversion();
|
||||
case Op_ReinterpretS2HF:
|
||||
case Op_ReinterpretHF2S:
|
||||
return UseZfh || UseZfhmin;
|
||||
case Op_AddHF:
|
||||
case Op_DivHF:
|
||||
case Op_FmaHF:
|
||||
case Op_MaxHF:
|
||||
case Op_MinHF:
|
||||
case Op_MulHF:
|
||||
case Op_SubHF:
|
||||
case Op_SqrtHF:
|
||||
return UseZfh;
|
||||
}
|
||||
|
||||
return true; // Per default match rules are supported.
|
||||
@ -3056,6 +3068,27 @@ operand immF0()
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// Half Float Immediate
|
||||
operand immH()
|
||||
%{
|
||||
match(ConH);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// Half Float Immediate: +0.0f.
|
||||
operand immH0()
|
||||
%{
|
||||
predicate(jint_cast(n->geth()) == 0);
|
||||
match(ConH);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immIOffset()
|
||||
%{
|
||||
predicate(Assembler::is_simm12(n->get_int()));
|
||||
@ -4907,6 +4940,39 @@ instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
|
||||
ins_pipe(ialu_imm);
|
||||
%}
|
||||
|
||||
// Load Half Float Constant
|
||||
instruct loadConH(fRegF dst, immH con) %{
|
||||
match(Set dst con);
|
||||
|
||||
ins_cost(LOAD_COST);
|
||||
format %{
|
||||
"flh $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConH"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
assert(UseZfh || UseZfhmin, "must");
|
||||
__ flh(as_FloatRegister($dst$$reg), $constantaddress($con));
|
||||
// TODO: add zfa instructions for half float, and optimize here.
|
||||
%}
|
||||
|
||||
ins_pipe(fp_load_constant_s);
|
||||
%}
|
||||
|
||||
instruct loadConH0(fRegF dst, immH0 con) %{
|
||||
match(Set dst con);
|
||||
|
||||
ins_cost(XFER_COST);
|
||||
|
||||
format %{ "fmv.h.x $dst, zr\t# float, #@loadConH0" %}
|
||||
|
||||
ins_encode %{
|
||||
assert(UseZfh || UseZfhmin, "must");
|
||||
__ fmv_h_x(as_FloatRegister($dst$$reg), zr);
|
||||
%}
|
||||
|
||||
ins_pipe(fp_load_constant_s);
|
||||
%}
|
||||
|
||||
// Load Float Constant
|
||||
instruct loadConF(fRegF dst, immF con) %{
|
||||
match(Set dst con);
|
||||
@ -7291,7 +7357,7 @@ instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
|
||||
ins_encode %{
|
||||
__ minmax_fp(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
|
||||
false /* is_double */, false /* is_min */);
|
||||
__ FLOAT_TYPE::single_precision, false /* is_min */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_default);
|
||||
@ -7307,7 +7373,7 @@ instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
|
||||
ins_encode %{
|
||||
__ minmax_fp(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
|
||||
false /* is_double */, true /* is_min */);
|
||||
__ FLOAT_TYPE::single_precision, true /* is_min */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_default);
|
||||
@ -7323,7 +7389,7 @@ instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
|
||||
ins_encode %{
|
||||
__ minmax_fp(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
|
||||
true /* is_double */, false /* is_min */);
|
||||
__ FLOAT_TYPE::double_precision, false /* is_min */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_default);
|
||||
@ -7339,7 +7405,7 @@ instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
|
||||
ins_encode %{
|
||||
__ minmax_fp(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
|
||||
true /* is_double */, true /* is_min */);
|
||||
__ FLOAT_TYPE::double_precision, true /* is_min */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_default);
|
||||
@ -8009,6 +8075,17 @@ instruct checkCastPP(iRegPNoSp dst)
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castHH(fRegF dst)
|
||||
%{
|
||||
match(Set dst (CastHH dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castHH of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castFF(fRegF dst)
|
||||
%{
|
||||
match(Set dst (CastFF dst));
|
||||
@ -8171,6 +8248,108 @@ instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF ftmp, iRegINoSp xtmp)
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// half precision operations
|
||||
|
||||
instruct reinterpretS2HF(fRegF dst, iRegI src)
|
||||
%{
|
||||
match(Set dst (ReinterpretS2HF src));
|
||||
format %{ "fmv.h.x $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ fmv_h_x($dst$$FloatRegister, $src$$Register);
|
||||
%}
|
||||
ins_pipe(fp_i2f);
|
||||
%}
|
||||
|
||||
instruct convF2HFAndS2HF(fRegF dst, fRegF src)
|
||||
%{
|
||||
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
|
||||
format %{ "convF2HFAndS2HF $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ fcvt_h_s($dst$$FloatRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(fp_uop_s);
|
||||
%}
|
||||
|
||||
instruct reinterpretHF2S(iRegINoSp dst, fRegF src)
|
||||
%{
|
||||
match(Set dst (ReinterpretHF2S src));
|
||||
format %{ "fmv.x.h $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ fmv_x_h($dst$$Register, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(fp_f2i);
|
||||
%}
|
||||
|
||||
instruct convHF2SAndHF2F(fRegF dst, fRegF src)
|
||||
%{
|
||||
match(Set dst (ConvHF2F (ReinterpretHF2S src)));
|
||||
format %{ "convHF2SAndHF2F $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ fcvt_s_h($dst$$FloatRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(fp_uop_s);
|
||||
%}
|
||||
|
||||
instruct sqrt_HF_reg(fRegF dst, fRegF src)
|
||||
%{
|
||||
match(Set dst (SqrtHF src));
|
||||
format %{ "fsqrt.h $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ fsqrt_h($dst$$FloatRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(fp_sqrt_s);
|
||||
%}
|
||||
|
||||
instruct binOps_HF_reg(fRegF dst, fRegF src1, fRegF src2)
|
||||
%{
|
||||
match(Set dst (AddHF src1 src2));
|
||||
match(Set dst (SubHF src1 src2));
|
||||
match(Set dst (MulHF src1 src2));
|
||||
match(Set dst (DivHF src1 src2));
|
||||
format %{ "binop_hf $dst, $src1, $src2" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
switch(opcode) {
|
||||
case Op_AddHF: __ fadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
|
||||
case Op_SubHF: __ fsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
|
||||
case Op_MulHF: __ fmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
|
||||
case Op_DivHF: __ fdiv_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
|
||||
default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break;
|
||||
}
|
||||
%}
|
||||
ins_pipe(fp_dop_reg_reg_s);
|
||||
%}
|
||||
|
||||
instruct min_max_HF_reg(fRegF dst, fRegF src1, fRegF src2)
|
||||
%{
|
||||
match(Set dst (MinHF src1 src2));
|
||||
match(Set dst (MaxHF src1 src2));
|
||||
format %{ "min_max_hf $dst, $src1, $src2" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
switch(opcode) {
|
||||
case Op_MinHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
|
||||
__ FLOAT_TYPE::half_precision, true);
|
||||
break;
|
||||
case Op_MaxHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
|
||||
__ FLOAT_TYPE::half_precision, false);
|
||||
break;
|
||||
default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break;
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct fma_HF_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3)
|
||||
%{
|
||||
match(Set dst (FmaHF src3 (Binary src1 src2)));
|
||||
format %{ "fmadd.h $dst, $src1, $src2, $src3\t# $dst = $src1 * $src2 + $src3 fma packedH" %}
|
||||
ins_encode %{
|
||||
__ fmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// float <-> int
|
||||
|
||||
instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
|
||||
|
||||
@ -6156,6 +6156,104 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
// x10 = input (float16)
|
||||
// f10 = result (float)
|
||||
// t1 = temporary register
|
||||
address generate_float16ToFloat() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubGenStubId stub_id = StubGenStubId::hf2f_id;
|
||||
StubCodeMark mark(this, stub_id);
|
||||
address entry = __ pc();
|
||||
BLOCK_COMMENT("float16ToFloat:");
|
||||
|
||||
FloatRegister dst = f10;
|
||||
Register src = x10;
|
||||
Label NaN_SLOW;
|
||||
|
||||
assert(VM_Version::supports_float16_float_conversion(), "must");
|
||||
|
||||
// On riscv, NaN needs a special process as fcvt does not work in that case.
|
||||
// On riscv, Inf does not need a special process as fcvt can handle it correctly.
|
||||
// but we consider to get the slow path to process NaN and Inf at the same time,
|
||||
// as both of them are rare cases, and if we try to get the slow path to handle
|
||||
// only NaN case it would sacrifise the performance for normal cases,
|
||||
// i.e. non-NaN and non-Inf cases.
|
||||
|
||||
// check whether it's a NaN or +/- Inf.
|
||||
__ mv(t0, 0x7c00);
|
||||
__ andr(t1, src, t0);
|
||||
// jump to stub processing NaN and Inf cases.
|
||||
__ beq(t0, t1, NaN_SLOW);
|
||||
|
||||
// non-NaN or non-Inf cases, just use built-in instructions.
|
||||
__ fmv_h_x(dst, src);
|
||||
__ fcvt_s_h(dst, dst);
|
||||
__ ret();
|
||||
|
||||
__ bind(NaN_SLOW);
|
||||
// following instructions mainly focus on NaN, as riscv does not handle
|
||||
// NaN well with fcvt, but the code also works for Inf at the same time.
|
||||
|
||||
// construct a NaN in 32 bits from the NaN in 16 bits,
|
||||
// we need the payloads of non-canonical NaNs to be preserved.
|
||||
__ mv(t1, 0x7f800000);
|
||||
// sign-bit was already set via sign-extension if necessary.
|
||||
__ slli(t0, src, 13);
|
||||
__ orr(t1, t0, t1);
|
||||
__ fmv_w_x(dst, t1);
|
||||
|
||||
__ ret();
|
||||
return entry;
|
||||
}
|
||||
|
||||
// f10 = input (float)
|
||||
// x10 = result (float16)
|
||||
// f11 = temporary float register
|
||||
// t1 = temporary register
|
||||
address generate_floatToFloat16() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubGenStubId stub_id = StubGenStubId::f2hf_id;
|
||||
StubCodeMark mark(this, stub_id);
|
||||
address entry = __ pc();
|
||||
BLOCK_COMMENT("floatToFloat16:");
|
||||
|
||||
Register dst = x10;
|
||||
FloatRegister src = f10, ftmp = f11;
|
||||
Label NaN_SLOW;
|
||||
|
||||
assert(VM_Version::supports_float16_float_conversion(), "must");
|
||||
|
||||
// On riscv, NaN needs a special process as fcvt does not work in that case.
|
||||
|
||||
// check whether it's a NaN.
|
||||
// replace fclass with feq as performance optimization.
|
||||
__ feq_s(t0, src, src);
|
||||
// jump to stub processing NaN cases.
|
||||
__ beqz(t0, NaN_SLOW);
|
||||
|
||||
// non-NaN cases, just use built-in instructions.
|
||||
__ fcvt_h_s(ftmp, src);
|
||||
__ fmv_x_h(dst, ftmp);
|
||||
__ ret();
|
||||
|
||||
__ bind(NaN_SLOW);
|
||||
__ fmv_x_w(dst, src);
|
||||
|
||||
// preserve the payloads of non-canonical NaNs.
|
||||
__ srai(dst, dst, 13);
|
||||
// preserve the sign bit.
|
||||
__ srai(t1, dst, 13);
|
||||
__ slli(t1, t1, 10);
|
||||
__ mv(t0, 0x3ff);
|
||||
__ orr(t1, t1, t0);
|
||||
|
||||
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
|
||||
__ andr(dst, dst, t1);
|
||||
|
||||
__ ret();
|
||||
return entry;
|
||||
}
|
||||
|
||||
#endif // COMPILER2_OR_JVMCI
|
||||
|
||||
#ifdef COMPILER2
|
||||
@ -6525,6 +6623,12 @@ static const int64_t right_3_bits = right_n_bits(3);
|
||||
StubRoutines::_crc_table_adr = (address)StubRoutines::riscv::_crc_table;
|
||||
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
|
||||
}
|
||||
|
||||
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_float16ToFloat) &&
|
||||
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_floatToFloat16)) {
|
||||
StubRoutines::_hf2f = generate_float16ToFloat();
|
||||
StubRoutines::_f2hf = generate_floatToFloat16();
|
||||
}
|
||||
}
|
||||
|
||||
void generate_continuation_stubs() {
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "classfile/vmIntrinsics.hpp"
|
||||
#include "runtime/java.hpp"
|
||||
#include "runtime/os.inline.hpp"
|
||||
#include "runtime/vm_version.hpp"
|
||||
@ -464,3 +465,18 @@ void VM_Version::initialize_cpu_information(void) {
|
||||
snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", features_string());
|
||||
_initialized = true;
|
||||
}
|
||||
|
||||
bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
|
||||
assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
|
||||
switch (id) {
|
||||
case vmIntrinsics::_floatToFloat16:
|
||||
case vmIntrinsics::_float16ToFloat:
|
||||
if (!supports_float16_float_conversion()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -297,6 +297,13 @@ class VM_Version : public Abstract_VM_Version {
|
||||
// RISCV64 supports fast class initialization checks
|
||||
static bool supports_fast_class_init_checks() { return true; }
|
||||
static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); }
|
||||
|
||||
static bool supports_float16_float_conversion() {
|
||||
return UseZfh || UseZfhmin;
|
||||
}
|
||||
|
||||
// Check intrinsic support
|
||||
static bool is_intrinsic_supported(vmIntrinsicID id);
|
||||
};
|
||||
|
||||
#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
|
||||
|
||||
@ -54,7 +54,7 @@ public class ConvF2HFIdealizationTests {
|
||||
@Test
|
||||
@IR(counts = {IRNode.REINTERPRET_S2HF, ">=1", IRNode.REINTERPRET_HF2S, ">=1", IRNode.ADD_HF, ">=1" },
|
||||
failOn = {IRNode.ADD_F, IRNode.CONV_HF2F, IRNode.CONV_F2HF},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
// Test pattern - ConvHF2F -> AddF -> ConvF2HF is optimized to ReinterpretS2HF -> AddHF -> ReinterpretHF2S
|
||||
public void test1() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
|
||||
@ -54,7 +54,7 @@ public class MulHFNodeIdealizationTests {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_HF, "1"},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"},
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"},
|
||||
failOn = {IRNode.MUL_HF})
|
||||
public void test1() {
|
||||
dst = multiply(src, valueOf(2.0f));
|
||||
|
||||
@ -101,7 +101,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {"convHF2SAndHF2F", " >0 "}, phase = {CompilePhase.FINAL_CODE},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testEliminateIntermediateHF2S() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -114,7 +114,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testAdd1() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -125,7 +125,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.ADD_HF, IRNode.REINTERPRET_S2HF, IRNode.REINTERPRET_HF2S},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testAdd2() {
|
||||
Float16 hf0 = shortBitsToFloat16((short)0);
|
||||
Float16 hf1 = shortBitsToFloat16((short)15360);
|
||||
@ -137,7 +137,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SUB_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testSub() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -148,7 +148,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMul() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -159,7 +159,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.DIV_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testDiv() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -170,7 +170,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testDivByOne() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -181,7 +181,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MAX_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMax() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -192,7 +192,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MIN_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMin() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -203,7 +203,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SQRT_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testSqrt() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -214,7 +214,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.FMA_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testFma() {
|
||||
Float16 res = shortBitsToFloat16((short)0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
@ -226,7 +226,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testDivByPOT() {
|
||||
Float16 res = valueOf(0.0f);
|
||||
for (int i = 0; i < 50; i++) {
|
||||
@ -243,7 +243,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMulByTWO() {
|
||||
Float16 res = valueOf(0.0f);
|
||||
Float16 multiplier = valueOf(2.0f);
|
||||
@ -280,7 +280,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testAddConstantFolding() {
|
||||
// If either value is NaN, then the result is NaN.
|
||||
assertResult(add(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding");
|
||||
@ -323,7 +323,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SUB_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testSubConstantFolding() {
|
||||
// If either value is NaN, then the result is NaN.
|
||||
assertResult(subtract(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding");
|
||||
@ -356,7 +356,7 @@ public class TestFloat16ScalarOperations {
|
||||
@Test
|
||||
@Warmup(value = 10000)
|
||||
@IR(counts = {IRNode.MAX_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMaxConstantFolding() {
|
||||
// If either value is NaN, then the result is NaN.
|
||||
assertResult(max(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMaxConstantFolding");
|
||||
@ -374,7 +374,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MIN_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMinConstantFolding() {
|
||||
// If either value is NaN, then the result is NaN.
|
||||
assertResult(min(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMinConstantFolding");
|
||||
@ -391,7 +391,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testDivConstantFolding() {
|
||||
// If either value is NaN, then the result is NaN.
|
||||
assertResult(divide(Float16.NaN, POSITIVE_ZERO).floatValue(), Float.NaN, "testDivConstantFolding");
|
||||
@ -431,7 +431,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testMulConstantFolding() {
|
||||
// If any operand is NaN, the result is NaN.
|
||||
assertResult(multiply(Float16.NaN, valueOf(4.0f)).floatValue(), Float.NaN, "testMulConstantFolding");
|
||||
@ -454,7 +454,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SQRT_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testSqrtConstantFolding() {
|
||||
// If the argument is NaN or less than zero, then the result is NaN.
|
||||
assertResult(sqrt(Float16.NaN).floatValue(), Float.NaN, "testSqrtConstantFolding");
|
||||
@ -473,7 +473,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testFMAConstantFolding() {
|
||||
// If any argument is NaN, the result is NaN.
|
||||
assertResult(fma(Float16.NaN, valueOf(2.0f), valueOf(3.0f)).floatValue(), Float.NaN, "testFMAConstantFolding");
|
||||
@ -508,7 +508,7 @@ public class TestFloat16ScalarOperations {
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testRounding1() {
|
||||
dst[0] = float16ToRawShortBits(add(RANDOM1, RANDOM2));
|
||||
dst[1] = float16ToRawShortBits(subtract(RANDOM2, RANDOM3));
|
||||
@ -547,7 +547,7 @@ public class TestFloat16ScalarOperations {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.SUB_HF, " >0 ", IRNode.MUL_HF, " >0 ",
|
||||
IRNode.DIV_HF, " >0 ", IRNode.SQRT_HF, " >0 ", IRNode.FMA_HF, " >0 "},
|
||||
applyIfCPUFeature = {"avx512_fp16", "true"})
|
||||
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
|
||||
public void testRounding2() {
|
||||
dst[0] = float16ToRawShortBits(add(RANDOM1_VAR, RANDOM2_VAR));
|
||||
dst[1] = float16ToRawShortBits(subtract(RANDOM2_VAR, RANDOM3_VAR));
|
||||
|
||||
@ -110,6 +110,7 @@ public class IREncodingPrinter {
|
||||
"sve",
|
||||
// Riscv64
|
||||
"rvv",
|
||||
"zfh",
|
||||
"zvbb",
|
||||
"zvfh"
|
||||
));
|
||||
|
||||
@ -45,8 +45,6 @@ public class TestFloat16VectorConvChain {
|
||||
counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
|
||||
@IR(applyIfCPUFeatureAnd = {"avx512_fp16", "false", "f16c", "true"},
|
||||
counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
|
||||
@IR(applyIfCPUFeature = {"zvfh", "true"},
|
||||
counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
|
||||
public static void test(short [] res, short [] src1, short [] src2) {
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = (short)Float.float16ToFloat(Float.floatToFloat16(Float.float16ToFloat(src1[i]) + Float.float16ToFloat(src2[i])));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user