mirror of
https://github.com/openjdk/jdk.git
synced 2026-06-06 18:53:37 +00:00
8376285: AArch64: Support fusion optimization for SVE destructive instructions
Reviewed-by: aph, dlong, adinn
This commit is contained in:
parent
9c244ec182
commit
e83e79460b
@ -1671,24 +1671,42 @@ instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
|
||||
|
||||
// vector not - predicated
|
||||
|
||||
instruct vnotI_masked(vReg dst_src, immI_M1 m1, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnotI_masked(vReg dst, vReg src, immI_M1 m1, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
|
||||
format %{ "vnotI_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (XorV (Binary src (Replicate m1)) pg));
|
||||
format %{ "vnotI_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_not($dst$$FloatRegister, get_reg_variant(this),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vnotL_masked(vReg dst_src, immL_M1 m1, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnotL_masked(vReg dst, vReg src, immL_M1 m1, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
|
||||
format %{ "vnotL_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (XorV (Binary src (Replicate m1)) pg));
|
||||
format %{ "vnotL_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_not($dst$$FloatRegister, get_reg_variant(this),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -1985,62 +2003,116 @@ instruct vabsD(vReg dst, vReg src) %{
|
||||
|
||||
// vector abs - predicated
|
||||
|
||||
instruct vabsB_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vabsB_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (AbsVB dst_src pg));
|
||||
format %{ "vabsB_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (AbsVB src pg));
|
||||
format %{ "vabsB_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_abs($dst_src$$FloatRegister, __ B, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_abs($dst$$FloatRegister, __ B, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vabsS_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vabsS_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (AbsVS dst_src pg));
|
||||
format %{ "vabsS_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (AbsVS src pg));
|
||||
format %{ "vabsS_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_abs($dst_src$$FloatRegister, __ H, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_abs($dst$$FloatRegister, __ H, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vabsI_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vabsI_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (AbsVI dst_src pg));
|
||||
format %{ "vabsI_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (AbsVI src pg));
|
||||
format %{ "vabsI_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_abs($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_abs($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vabsL_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vabsL_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (AbsVL dst_src pg));
|
||||
format %{ "vabsL_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (AbsVL src pg));
|
||||
format %{ "vabsL_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_abs($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_abs($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vabsF_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vabsF_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (AbsVF dst_src pg));
|
||||
format %{ "vabsF_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (AbsVF src pg));
|
||||
format %{ "vabsF_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_fabs($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_fabs($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vabsD_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vabsD_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (AbsVD dst_src pg));
|
||||
format %{ "vabsD_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (AbsVD src pg));
|
||||
format %{ "vabsD_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_fabs($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_fabs($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -2158,44 +2230,80 @@ instruct vnegD(vReg dst, vReg src) %{
|
||||
|
||||
// vector neg - predicated
|
||||
|
||||
instruct vnegI_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnegI_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (NegVI dst_src pg));
|
||||
format %{ "vnegI_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (NegVI src pg));
|
||||
format %{ "vnegI_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_neg($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_neg($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vnegL_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnegL_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (NegVL dst_src pg));
|
||||
format %{ "vnegL_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (NegVL src pg));
|
||||
format %{ "vnegL_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_neg($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_neg($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vnegF_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnegF_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (NegVF dst_src pg));
|
||||
format %{ "vnegF_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (NegVF src pg));
|
||||
format %{ "vnegF_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_fneg($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_fneg($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vnegD_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnegD_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (NegVD dst_src pg));
|
||||
format %{ "vnegD_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (NegVD src pg));
|
||||
format %{ "vnegD_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_fneg($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_fneg($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -2251,22 +2359,40 @@ instruct vsqrtD(vReg dst, vReg src) %{
|
||||
|
||||
// vector sqrt - predicated
|
||||
|
||||
instruct vsqrtF_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vsqrtF_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (SqrtVF dst_src pg));
|
||||
format %{ "vsqrtF_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (SqrtVF src pg));
|
||||
format %{ "vsqrtF_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_fsqrt($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_fsqrt($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vsqrtD_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vsqrtD_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (SqrtVD dst_src pg));
|
||||
format %{ "vsqrtD_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (SqrtVD src pg));
|
||||
format %{ "vsqrtD_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_fsqrt($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_fsqrt($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5331,9 +5457,7 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx,
|
||||
__ sve_index($tmp$$FloatRegister, size, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
|
||||
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5356,9 +5480,7 @@ instruct insertI_index_ge32(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg t
|
||||
__ sve_dup($tmp2$$FloatRegister, size, (int)($idx$$constant));
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5392,9 +5514,7 @@ instruct insertL_gt128b(vReg dst, vReg src, iRegL val, immI idx,
|
||||
__ sve_index($tmp$$FloatRegister, __ D, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
|
||||
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5432,7 +5552,7 @@ instruct insertF_index_lt32(vReg dst, vReg src, vRegF val, immI idx,
|
||||
__ sve_index($dst$$FloatRegister, __ S, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
|
||||
$dst$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5451,7 +5571,7 @@ instruct insertF_index_ge32(vReg dst, vReg src, vRegF val, immI idx, vReg tmp,
|
||||
__ sve_dup($dst$$FloatRegister, __ S, (int)($idx$$constant));
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
|
||||
$tmp$$FloatRegister, $dst$$FloatRegister);
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5486,7 +5606,7 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
|
||||
__ sve_index($dst$$FloatRegister, __ D, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
|
||||
$dst$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5656,8 +5776,12 @@ instruct extractF(vRegF dst, vReg src, immI idx) %{
|
||||
__ ins($dst$$FloatRegister, __ S, $src$$FloatRegister, 0, index);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << 2);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the second source of ext. The movprfx destination register
|
||||
// must not appear in any source operand of the following instruction
|
||||
// except as the destructive operand.
|
||||
__ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << 2);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5677,8 +5801,12 @@ instruct extractD(vRegD dst, vReg src, immI idx) %{
|
||||
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, index);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << 3);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the second source of ext. The movprfx destination register
|
||||
// must not appear in any source operand of the following instruction
|
||||
// except as the destructive operand.
|
||||
__ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << 3);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -6855,25 +6983,43 @@ instruct vpopcountL(vReg dst, vReg src) %{
|
||||
|
||||
// vector popcount - predicated
|
||||
|
||||
instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vpopcountI_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (PopCountVI dst_src pg));
|
||||
format %{ "vpopcountI_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (PopCountVI src pg));
|
||||
format %{ "vpopcountI_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_cnt($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vpopcountL_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (PopCountVL dst_src pg));
|
||||
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (PopCountVL src pg));
|
||||
format %{ "vpopcountL_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt($dst_src$$FloatRegister, __ D,
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_cnt($dst$$FloatRegister, __ D,
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -7240,14 +7386,23 @@ instruct vcountLeadingZeros(vReg dst, vReg src) %{
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
|
||||
instruct vcountLeadingZeros_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vcountLeadingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (CountLeadingZerosV dst_src pg));
|
||||
format %{ "vcountLeadingZeros_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (CountLeadingZerosV src pg));
|
||||
format %{ "vcountLeadingZeros_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_clz($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_clz($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -7296,19 +7451,26 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vcountTrailingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (CountTrailingZerosV dst_src pg));
|
||||
format %{ "vcountTrailingZeros_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (CountTrailingZerosV src pg));
|
||||
format %{ "vcountTrailingZeros_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_rbit($dst_src$$FloatRegister, size,
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ sve_clz($dst_src$$FloatRegister, size,
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_rbit($dst$$FloatRegister, size,
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
__ sve_clz($dst$$FloatRegister, size,
|
||||
$pg$$PRegister, $dst$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -7347,14 +7509,23 @@ instruct vreverse(vReg dst, vReg src) %{
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
|
||||
instruct vreverse_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vreverse_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (ReverseV dst_src pg));
|
||||
format %{ "vreverse_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (ReverseV src pg));
|
||||
format %{ "vreverse_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_rbit($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_rbit($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -7393,19 +7564,28 @@ instruct vreverseBytes(vReg dst, vReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vreverseBytes_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (ReverseBytesV dst_src pg));
|
||||
format %{ "vreverseBytes_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (ReverseBytesV src pg));
|
||||
format %{ "vreverseBytes_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
if (bt == T_BYTE) {
|
||||
// do nothing
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
} else {
|
||||
__ sve_revb($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_revb($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
@ -899,13 +899,22 @@ dnl
|
||||
dnl VECTOR_NOT_PREDICATE($1 )
|
||||
dnl VECTOR_NOT_PREDICATE(type)
|
||||
define(`VECTOR_NOT_PREDICATE', `
|
||||
instruct vnot$1_masked`'(vReg dst_src, imm$1_M1 m1, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vnot$1_masked`'(vReg dst, vReg src, imm$1_M1 m1, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
|
||||
format %{ "vnot$1_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (XorV (Binary src (Replicate m1)) pg));
|
||||
format %{ "vnot$1_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_not($dst$$FloatRegister, get_reg_variant(this),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
@ -1042,14 +1051,23 @@ dnl
|
||||
dnl UNARY_OP_PREDICATE($1, $2, $3 )
|
||||
dnl UNARY_OP_PREDICATE(rule_name, op_name, insn)
|
||||
define(`UNARY_OP_PREDICATE', `
|
||||
instruct $1_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct $1_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src ($2 dst_src pg));
|
||||
format %{ "$1_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst ($2 src pg));
|
||||
format %{ "$1_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ $3($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ $3($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
@ -1057,12 +1075,21 @@ dnl
|
||||
dnl UNARY_OP_PREDICATE_WITH_SIZE($1, $2, $3, $4 )
|
||||
dnl UNARY_OP_PREDICATE_WITH_SIZE(rule_name, op_name, insn, size)
|
||||
define(`UNARY_OP_PREDICATE_WITH_SIZE', `
|
||||
instruct $1_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct $1_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src ($2 dst_src pg));
|
||||
format %{ "$1_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst ($2 src pg));
|
||||
format %{ "$1_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ $3($dst_src$$FloatRegister, __ $4, $pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ $3($dst$$FloatRegister, __ $4, $pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
@ -3368,9 +3395,7 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx,
|
||||
__ sve_index($tmp$$FloatRegister, size, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
|
||||
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3393,9 +3418,7 @@ instruct insertI_index_ge32(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg t
|
||||
__ sve_dup($tmp2$$FloatRegister, size, (int)($idx$$constant));
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3429,9 +3452,7 @@ instruct insertL_gt128b(vReg dst, vReg src, iRegL val, immI idx,
|
||||
__ sve_index($tmp$$FloatRegister, __ D, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
|
||||
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3469,7 +3490,7 @@ instruct insertF_index_lt32(vReg dst, vReg src, vRegF val, immI idx,
|
||||
__ sve_index($dst$$FloatRegister, __ S, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
|
||||
$dst$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3488,7 +3509,7 @@ instruct insertF_index_ge32(vReg dst, vReg src, vRegF val, immI idx, vReg tmp,
|
||||
__ sve_dup($dst$$FloatRegister, __ S, (int)($idx$$constant));
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
|
||||
$tmp$$FloatRegister, $dst$$FloatRegister);
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3523,7 +3544,7 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
|
||||
__ sve_index($dst$$FloatRegister, __ D, -16, 1);
|
||||
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
|
||||
$dst$$FloatRegister, (int)($idx$$constant) - 16);
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3621,8 +3642,12 @@ instruct extract$1(vReg$1 dst, vReg src, immI idx) %{
|
||||
__ ins($dst$$FloatRegister, __ $4, $src$$FloatRegister, 0, index);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
__ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << $5);
|
||||
__ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the second source of ext. The movprfx destination register
|
||||
// must not appear in any source operand of the following instruction
|
||||
// except as the destructive operand.
|
||||
__ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << $5);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -4682,13 +4707,22 @@ instruct vpopcountL(vReg dst, vReg src) %{
|
||||
// vector popcount - predicated
|
||||
UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
|
||||
|
||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vpopcountL_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (PopCountVL dst_src pg));
|
||||
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (PopCountVL src pg));
|
||||
format %{ "vpopcountL_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt($dst_src$$FloatRegister, __ D,
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_cnt($dst$$FloatRegister, __ D,
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5100,19 +5134,26 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vcountTrailingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (CountTrailingZerosV dst_src pg));
|
||||
format %{ "vcountTrailingZeros_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (CountTrailingZerosV src pg));
|
||||
format %{ "vcountTrailingZeros_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_rbit($dst_src$$FloatRegister, size,
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ sve_clz($dst_src$$FloatRegister, size,
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_rbit($dst$$FloatRegister, size,
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
__ sve_clz($dst$$FloatRegister, size,
|
||||
$pg$$PRegister, $dst$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5186,19 +5227,28 @@ instruct vreverseBytes(vReg dst, vReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
|
||||
// The Java Vector API specification requires that for masked unary operations,
|
||||
// suppressed lanes are filled from the first vector operand (see "Masked
|
||||
// Operations" in Vector.java around line 568). So we use movprfx to copy src
|
||||
// into dst before emitting the predicated instruction.
|
||||
instruct vreverseBytes_masked(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (ReverseBytesV dst_src pg));
|
||||
format %{ "vreverseBytes_masked $dst_src, $pg, $dst_src" %}
|
||||
match(Set dst (ReverseBytesV src pg));
|
||||
format %{ "vreverseBytes_masked $dst, $pg, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
if (bt == T_BYTE) {
|
||||
// do nothing
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
|
||||
}
|
||||
} else {
|
||||
__ sve_revb($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $dst_src$$FloatRegister);
|
||||
__ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
|
||||
// Although dst and src hold the same value after movprfx, we must use src
|
||||
// (not dst) as the source of the following instruction. The movprfx
|
||||
// destination register must not appear in any source operand of the
|
||||
// following instruction except as the destructive operand.
|
||||
__ sve_revb($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pg$$PRegister, $src$$FloatRegister);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
@ -2494,8 +2494,12 @@ void C2_MacroAssembler::sve_extract_integral(Register dst, BasicType bt, FloatRe
|
||||
smov(dst, src, size, idx);
|
||||
}
|
||||
} else {
|
||||
sve_orr(vtmp, src, src);
|
||||
sve_ext(vtmp, vtmp, idx << size);
|
||||
sve_movprfx(vtmp, src);
|
||||
// Although vtmp and src hold the same value after movprfx, we must use src
|
||||
// (not vtmp) as the second source of ext. The movprfx destination register
|
||||
// must not appear in any source operand of the following instruction except
|
||||
// as the destructive operand.
|
||||
sve_ext(vtmp, src, idx << size);
|
||||
if (bt == T_INT || bt == T_LONG) {
|
||||
umov(dst, vtmp, size, 0);
|
||||
} else {
|
||||
|
||||
@ -7278,3 +7278,26 @@ void MacroAssembler::neon_vector_rotate(FloatRegister dst, SIMD_Arrangement T,
|
||||
sli(dst, T, src, lshift);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::try_to_replace_prev_vector_copy_with_movprfx(FloatRegister dst) {
|
||||
if (code_section()->is_empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
address prev = pc() - NativeInstruction::instruction_size;
|
||||
uint32_t insn = nativeInstruction_at(prev)->encoding();
|
||||
if (!NativeInstruction::is_neon_vector_mov_alias(insn) &&
|
||||
!NativeInstruction::is_sve_vector_mov_alias(insn)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The destructive instruction must reuse the mov alias destination.
|
||||
uint32_t rd = Instruction_aarch64::extract(insn, 4, 0);
|
||||
if (rd != (uint32_t)dst->encoding()) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t rn = Instruction_aarch64::extract(insn, 9, 5);
|
||||
Instruction_aarch64::patch(prev, 31, 0,
|
||||
NativeInstruction::encode_sve_movprfx(rd, rn));
|
||||
}
|
||||
|
||||
@ -1734,7 +1734,103 @@ public:
|
||||
private:
|
||||
// Check the current thread doesn't need a cross modify fence.
|
||||
void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
|
||||
void try_to_replace_prev_vector_copy_with_movprfx(FloatRegister dst);
|
||||
|
||||
public:
|
||||
void maybe_movprfx(FloatRegister dst, FloatRegister src) {
|
||||
if (dst != src) {
|
||||
sve_movprfx(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
// Wrappers for SVE explicit destructive instructions, overriding the
|
||||
// same-signature Assembler entry points to enable movprfx fusion optimization.
|
||||
//
|
||||
// Implicit destructive instructions (e.g. predicated unary ops like sve_abs/
|
||||
// sve_neg/sve_not, whose ISA encoding allows Zd != Zn but whose use as a Java
|
||||
// Vector API masked operation requires pass-through of the first source) are
|
||||
// not covered here. For those, the .ad file is responsible for emitting
|
||||
// movprfx explicitly via maybe_movprfx() before the destructive op.
|
||||
#define SVE_DESTRUCTIVE_BINARY_INS(NAME) \
|
||||
using Assembler::NAME; \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, \
|
||||
FloatRegister Zm) { \
|
||||
if (Zd != Zm) { \
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd); \
|
||||
} \
|
||||
Assembler::NAME(Zd, T, Pg, Zm); \
|
||||
}
|
||||
|
||||
#define SVE_DESTRUCTIVE_BINARY_5(I1, I2, I3, I4, I5) \
|
||||
SVE_DESTRUCTIVE_BINARY_INS(I1); SVE_DESTRUCTIVE_BINARY_INS(I2); \
|
||||
SVE_DESTRUCTIVE_BINARY_INS(I3); SVE_DESTRUCTIVE_BINARY_INS(I4); \
|
||||
SVE_DESTRUCTIVE_BINARY_INS(I5);
|
||||
|
||||
SVE_DESTRUCTIVE_BINARY_5(sve_add, sve_and, sve_asr, sve_bic, sve_eor)
|
||||
SVE_DESTRUCTIVE_BINARY_5(sve_fabd, sve_fadd, sve_fdiv, sve_fmax, sve_fmin)
|
||||
SVE_DESTRUCTIVE_BINARY_5(sve_fmul, sve_fsub, sve_lsl, sve_lsr, sve_mul)
|
||||
SVE_DESTRUCTIVE_BINARY_5(sve_orr, sve_smax, sve_smin, sve_sqadd, sve_sqsub)
|
||||
SVE_DESTRUCTIVE_BINARY_5(sve_sub, sve_uqadd, sve_uqsub, sve_umax, sve_umin)
|
||||
|
||||
#undef SVE_DESTRUCTIVE_BINARY_INS
|
||||
#undef SVE_DESTRUCTIVE_BINARY_5
|
||||
|
||||
#define SVE_DESTRUCTIVE_SHIFT_IMM_INS(NAME) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int shift) { \
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd); \
|
||||
Assembler::NAME(Zd, T, Pg, shift); \
|
||||
}
|
||||
|
||||
SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_asr);
|
||||
SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_lsl);
|
||||
SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_lsr);
|
||||
|
||||
#undef SVE_DESTRUCTIVE_SHIFT_IMM_INS
|
||||
|
||||
#define SVE_DESTRUCTIVE_UNPRED_IMM_INS(NAME, IMM_TYPE) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, IMM_TYPE imm) { \
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd); \
|
||||
Assembler::NAME(Zd, T, imm); \
|
||||
}
|
||||
|
||||
SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_add, unsigned);
|
||||
SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_sub, unsigned);
|
||||
SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_and, uint64_t);
|
||||
SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_eor, uint64_t);
|
||||
SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_orr, uint64_t);
|
||||
|
||||
#undef SVE_DESTRUCTIVE_UNPRED_IMM_INS
|
||||
|
||||
#define SVE_DESTRUCTIVE_TERNARY_INS(NAME) \
|
||||
using Assembler::NAME; \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, \
|
||||
FloatRegister Zn, FloatRegister Zm) { \
|
||||
if (Zd != Zn && Zd != Zm) { \
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd); \
|
||||
} \
|
||||
Assembler::NAME(Zd, T, Pg, Zn, Zm); \
|
||||
}
|
||||
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fmad);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fmla);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fmls);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fmsb);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmad);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmla);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmls);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmsb);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_mla);
|
||||
SVE_DESTRUCTIVE_TERNARY_INS(sve_mls);
|
||||
|
||||
#undef SVE_DESTRUCTIVE_TERNARY_INS
|
||||
|
||||
using Assembler::sve_eor3;
|
||||
void sve_eor3(FloatRegister Zd, FloatRegister Zm, FloatRegister Zk) {
|
||||
if (Zd != Zm && Zd != Zk) {
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd);
|
||||
}
|
||||
Assembler::sve_eor3(Zd, Zm, Zk);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef ASSERT
|
||||
|
||||
@ -140,6 +140,29 @@ public:
|
||||
Instruction_aarch64::extract(insn, 23, 23) == 0b0 &&
|
||||
Instruction_aarch64::extract(insn, 26, 25) == 0b00;
|
||||
}
|
||||
|
||||
static bool is_neon_vector_mov_alias(uint32_t insn) {
|
||||
if (Instruction_aarch64::extract(insn, 31, 31) != 0 ||
|
||||
Instruction_aarch64::extract(insn, 29, 21) != 0b001110101 ||
|
||||
Instruction_aarch64::extract(insn, 15, 10) != 0b000111) {
|
||||
return false;
|
||||
}
|
||||
return Instruction_aarch64::extract(insn, 9, 5) ==
|
||||
Instruction_aarch64::extract(insn, 20, 16);
|
||||
}
|
||||
|
||||
static bool is_sve_vector_mov_alias(uint32_t insn) {
|
||||
if (Instruction_aarch64::extract(insn, 31, 21) != 0b00000100011 ||
|
||||
Instruction_aarch64::extract(insn, 15, 10) != 0b001100) {
|
||||
return false;
|
||||
}
|
||||
return Instruction_aarch64::extract(insn, 9, 5) ==
|
||||
Instruction_aarch64::extract(insn, 20, 16);
|
||||
}
|
||||
|
||||
static uint32_t encode_sve_movprfx(uint32_t dst, uint32_t src) {
|
||||
return 0x1082f << 10 | (src << 5) | dst;
|
||||
}
|
||||
};
|
||||
|
||||
inline NativeInstruction* nativeInstruction_at(address address) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user