mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-06 00:18:34 +00:00
8268363: AArch64: Implement string_indexof_char intrinsic in SVE
Reviewed-by: aph, njian
This commit is contained in:
parent
270fbcb3f5
commit
e54585be57
@ -5495,6 +5495,15 @@ operand vRegD_V31()
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand pReg()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(pr_reg));
|
||||
match(RegVectMask);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand pRegGov()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(gov_pr));
|
||||
@ -16660,11 +16669,11 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
%}
|
||||
|
||||
instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
||||
iRegINoSp tmp3, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
||||
iRegINoSp tmp3, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
||||
predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
|
||||
predicate((UseSVE == 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
|
||||
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
||||
|
||||
@ -16683,7 +16692,7 @@ instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
||||
iRegINoSp tmp3, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
||||
predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
|
||||
predicate((UseSVE == 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
|
||||
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
||||
|
||||
@ -16691,8 +16700,8 @@ instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
||||
|
||||
ins_encode %{
|
||||
__ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
|
||||
$result$$Register, $tmp1$$Register, $tmp2$$Register,
|
||||
$tmp3$$Register);
|
||||
$result$$Register, $tmp1$$Register, $tmp2$$Register,
|
||||
$tmp3$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
@ -1775,3 +1775,42 @@ instruct vmaskcast(vReg dst) %{
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
// Intrisics for String.indexOf(char)
|
||||
|
||||
|
||||
instruct stringL_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
||||
iRegI_R0 result, vReg ztmp1, vReg ztmp2,
|
||||
pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
||||
predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
|
||||
effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
|
||||
|
||||
format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
|
||||
|
||||
ins_encode %{
|
||||
__ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
||||
as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
|
||||
as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), true /* isL */);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
||||
iRegI_R0 result, vReg ztmp1, vReg ztmp2,
|
||||
pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
||||
predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
|
||||
effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
|
||||
|
||||
format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
|
||||
|
||||
ins_encode %{
|
||||
__ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
||||
as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
|
||||
as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), false /* isL */);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
|
||||
@ -919,3 +919,29 @@ instruct vmaskcast(vReg dst) %{
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
// Intrisics for String.indexOf(char)
|
||||
|
||||
dnl
|
||||
define(`STRING_INDEXOF_CHAR', `
|
||||
instruct string$1_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
||||
iRegI_R0 result, vReg ztmp1, vReg ztmp2,
|
||||
pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
||||
predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::$1));
|
||||
effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
|
||||
|
||||
format %{ "String$2 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
|
||||
|
||||
ins_encode %{
|
||||
__ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
||||
as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
|
||||
as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), $3 /* isL */);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}')dnl
|
||||
dnl $1 $2 $3
|
||||
STRING_INDEXOF_CHAR(L, Latin1, true)
|
||||
STRING_INDEXOF_CHAR(U, UTF16, false)
|
||||
dnl
|
||||
|
||||
|
||||
@ -3214,6 +3214,70 @@ public:
|
||||
f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
|
||||
}
|
||||
|
||||
// Integer comparisons (SVE)
|
||||
#define INSN(NAME, cond) \
|
||||
void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
|
||||
starti; \
|
||||
assert(T != Q, "invalid size"); \
|
||||
f(0b00100100, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond >> 1) & 7, 15, 13); \
|
||||
pgrf(Pg, 10), rf(Zn, 5), f(cond & 1, 4), prf(Pd, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_cmpeq, 0b1010); // Compare signed equal to vector
|
||||
INSN(sve_cmpne, 0b1011); // Compare not equal to vector
|
||||
INSN(sve_cmpge, 0b1000); // Compare signed greater than or equal to vector
|
||||
INSN(sve_cmpgt, 0b1001); // Compare signed greater than vector
|
||||
#undef INSN
|
||||
|
||||
// Predicate counted loop (SVE) (32-bit variants are not included)
|
||||
#define INSN(NAME, decode) \
|
||||
void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) { \
|
||||
starti; \
|
||||
assert(T != Q, "invalid register variant"); \
|
||||
f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21), \
|
||||
zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10), \
|
||||
zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_whilelt, 0b010); // While incrementing signed scalar less than scalar
|
||||
INSN(sve_whilele, 0b011); // While incrementing signed scalar less than or equal to scalar
|
||||
INSN(sve_whilelo, 0b110); // While incrementing unsigned scalar lower than scalar
|
||||
INSN(sve_whilels, 0b111); // While incrementing unsigned scalar lower than or the same as scalar
|
||||
#undef INSN
|
||||
|
||||
// Predicate scan (SVE)
|
||||
|
||||
// Break after the first true condition
|
||||
void sve_brka(PRegister pd, PRegister pg, PRegister pn, bool isMerge) {
|
||||
starti;
|
||||
f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b01000001, 21, 14),
|
||||
prf(pg, 10), f(0b0, 9), prf(pn, 5), f(isMerge ? 1 : 0, 4), prf(pd, 0);
|
||||
}
|
||||
|
||||
// Element count and increment scalar (SVE)
|
||||
#define INSN(NAME, TYPE) \
|
||||
void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \
|
||||
starti; \
|
||||
f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20); \
|
||||
f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_cntb, B); // Set scalar to multiple of 8-bit predicate constraint element count
|
||||
INSN(sve_cnth, H); // Set scalar to multiple of 16-bit predicate constraint element count
|
||||
INSN(sve_cntw, S); // Set scalar to multiple of 32-bit predicate constraint element count
|
||||
INSN(sve_cntd, D); // Set scalar to multiple of 64-bit predicate constraint element count
|
||||
#undef INSN
|
||||
|
||||
// Predicate count and increment scalar (SVE)
|
||||
|
||||
// Set scalar to the number of Active predicate elements that are TRUE
|
||||
void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9),
|
||||
prf(pg, 5), rf(rd, 0);
|
||||
}
|
||||
|
||||
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
||||
}
|
||||
|
||||
|
||||
@ -540,6 +540,75 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::string_indexof_char_sve(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
FloatRegister ztmp1,
|
||||
FloatRegister ztmp2,
|
||||
PRegister tmp_pg,
|
||||
PRegister tmp_pdn, bool isL)
|
||||
{
|
||||
// Note that `tmp_pdn` should *NOT* be used as governing predicate register.
|
||||
assert(tmp_pg->is_governing(),
|
||||
"this register has to be a governing predicate register");
|
||||
|
||||
Label LOOP, MATCH, DONE, NOMATCH;
|
||||
Register vec_len = rscratch1;
|
||||
Register idx = rscratch2;
|
||||
|
||||
SIMD_RegVariant T = (isL == true) ? B : H;
|
||||
|
||||
cbz(cnt1, NOMATCH);
|
||||
|
||||
// Assign the particular char throughout the vector.
|
||||
sve_dup(ztmp2, T, ch);
|
||||
if (isL) {
|
||||
sve_cntb(vec_len);
|
||||
} else {
|
||||
sve_cnth(vec_len);
|
||||
}
|
||||
mov(idx, 0);
|
||||
|
||||
// Generate a predicate to control the reading of input string.
|
||||
sve_whilelt(tmp_pg, T, idx, cnt1);
|
||||
|
||||
BIND(LOOP);
|
||||
// Read a vector of 8- or 16-bit data depending on the string type. Note
|
||||
// that inactive elements indicated by the predicate register won't cause
|
||||
// a data read from memory to the destination vector.
|
||||
if (isL) {
|
||||
sve_ld1b(ztmp1, T, tmp_pg, Address(str1, idx));
|
||||
} else {
|
||||
sve_ld1h(ztmp1, T, tmp_pg, Address(str1, idx, Address::lsl(1)));
|
||||
}
|
||||
add(idx, idx, vec_len);
|
||||
|
||||
// Perform the comparison. An element of the destination predicate is set
|
||||
// to active if the particular char is matched.
|
||||
sve_cmpeq(tmp_pdn, T, tmp_pg, ztmp1, ztmp2);
|
||||
|
||||
// Branch if the particular char is found.
|
||||
br(NE, MATCH);
|
||||
|
||||
sve_whilelt(tmp_pg, T, idx, cnt1);
|
||||
|
||||
// Loop back if the particular char not found.
|
||||
br(MI, LOOP);
|
||||
|
||||
BIND(NOMATCH);
|
||||
mov(result, -1);
|
||||
b(DONE);
|
||||
|
||||
BIND(MATCH);
|
||||
// Undo the index increment.
|
||||
sub(idx, idx, vec_len);
|
||||
|
||||
// Crop the vector to find its location.
|
||||
sve_brka(tmp_pdn, tmp_pg, tmp_pdn, false /* isMerge */);
|
||||
add(result, idx, -1);
|
||||
sve_incp(result, T, tmp_pdn);
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3)
|
||||
|
||||
@ -46,8 +46,13 @@
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
void stringL_indexof_char(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
Register ch, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
void string_indexof_char_sve(Register str1, Register cnt1,
|
||||
Register ch, Register result,
|
||||
FloatRegister ztmp1, FloatRegister ztmp2,
|
||||
PRegister pgtmp, PRegister ptmp, bool isL);
|
||||
|
||||
// SIMD&FP comparison
|
||||
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -242,6 +242,7 @@ class PRegisterImpl: public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers = 16,
|
||||
number_of_governing_registers = 8,
|
||||
max_slots_per_register = 1
|
||||
};
|
||||
|
||||
@ -257,6 +258,7 @@ class PRegisterImpl: public AbstractRegisterImpl {
|
||||
int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
|
||||
int encoding_nocheck() const { return (intptr_t)this; }
|
||||
bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
|
||||
bool is_governing() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_governing_registers; }
|
||||
const char* name() const;
|
||||
};
|
||||
|
||||
|
||||
@ -908,10 +908,17 @@ class SVEVectorOp(Instruction):
|
||||
self.numRegs = len(regs)
|
||||
if regTypes[0] != "p" and regTypes[1] == 'P':
|
||||
self._isPredicated = True
|
||||
self._merge = "/m"
|
||||
assert len(args) > 2, "Must specify predicate type"
|
||||
for arg in args[2:]:
|
||||
if arg == 'm':
|
||||
self._merge = "/m"
|
||||
elif arg == 'z':
|
||||
self._merge = "/z"
|
||||
else:
|
||||
assert arg == "dn", "Unknown predicate type"
|
||||
else:
|
||||
self._isPredicated = False
|
||||
self._merge =""
|
||||
self._merge = ""
|
||||
|
||||
self._bitwiseop = False
|
||||
if name[0] == 'f':
|
||||
@ -921,10 +928,13 @@ class SVEVectorOp(Instruction):
|
||||
self._bitwiseop = True
|
||||
else:
|
||||
self._width = RegVariant(0, 3)
|
||||
|
||||
self._dnm = None
|
||||
if len(args) > 2:
|
||||
self._dnm = args[2]
|
||||
else:
|
||||
self._dnm = None
|
||||
for arg in args[2:]:
|
||||
if arg == "dn":
|
||||
self._dnm = arg
|
||||
|
||||
Instruction.__init__(self, name)
|
||||
|
||||
def cstr(self):
|
||||
@ -1543,6 +1553,8 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"],
|
||||
["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"],
|
||||
["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"],
|
||||
["dup", "__ sve_dup(z4, __ B, r3);", "dup\tz4.b, w3"],
|
||||
["dup", "__ sve_dup(z14, __ H, r22);", "dup\tz14.h, w22"],
|
||||
["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"],
|
||||
["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
|
||||
["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
|
||||
@ -1558,6 +1570,17 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"],
|
||||
["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"],
|
||||
["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"],
|
||||
["cntb", "__ sve_cntb(r9);", "cntb\tx9"],
|
||||
["cnth", "__ sve_cnth(r10);", "cnth\tx10"],
|
||||
["cntw", "__ sve_cntw(r11);", "cntw\tx11"],
|
||||
["cntd", "__ sve_cntd(r12);", "cntd\tx12"],
|
||||
["brka", "__ sve_brka(p2, p0, p2, false);", "brka\tp2.b, p0/z, p2.b"],
|
||||
["brka", "__ sve_brka(p1, p2, p3, true);", "brka\tp1.b, p2/m, p3.b"],
|
||||
["incp", "__ sve_incp(r0, __ B, p2);", "incp\tx0, p2.b"],
|
||||
["whilelt", "__ sve_whilelt(p0, __ B, r1, r28);", "whilelt\tp0.b, x1, x28"],
|
||||
["whilele", "__ sve_whilele(p2, __ H, r11, r8);", "whilele\tp2.h, x11, x8"],
|
||||
["whilelo", "__ sve_whilelo(p3, __ S, r7, r2);", "whilelo\tp3.s, x7, x2"],
|
||||
["whilels", "__ sve_whilels(p4, __ D, r17, r10);", "whilels\tp4.d, x17, x10"],
|
||||
])
|
||||
|
||||
print "\n// FloatImmediateOp"
|
||||
@ -1593,40 +1616,44 @@ generate(SVEVectorOp, [["add", "ZZZ"],
|
||||
["fadd", "ZZZ"],
|
||||
["fmul", "ZZZ"],
|
||||
["fsub", "ZZZ"],
|
||||
["abs", "ZPZ"],
|
||||
["add", "ZPZ", "dn"],
|
||||
["asr", "ZPZ", "dn"],
|
||||
["cnt", "ZPZ"],
|
||||
["lsl", "ZPZ", "dn"],
|
||||
["lsr", "ZPZ", "dn"],
|
||||
["mul", "ZPZ", "dn"],
|
||||
["neg", "ZPZ"],
|
||||
["not", "ZPZ"],
|
||||
["smax", "ZPZ", "dn"],
|
||||
["smin", "ZPZ", "dn"],
|
||||
["sub", "ZPZ", "dn"],
|
||||
["fabs", "ZPZ"],
|
||||
["fadd", "ZPZ", "dn"],
|
||||
["fdiv", "ZPZ", "dn"],
|
||||
["fmax", "ZPZ", "dn"],
|
||||
["fmin", "ZPZ", "dn"],
|
||||
["fmul", "ZPZ", "dn"],
|
||||
["fneg", "ZPZ"],
|
||||
["frintm", "ZPZ"],
|
||||
["frintn", "ZPZ"],
|
||||
["frintp", "ZPZ"],
|
||||
["fsqrt", "ZPZ"],
|
||||
["fsub", "ZPZ", "dn"],
|
||||
["fmla", "ZPZZ"],
|
||||
["fmls", "ZPZZ"],
|
||||
["fnmla", "ZPZZ"],
|
||||
["fnmls", "ZPZZ"],
|
||||
["mla", "ZPZZ"],
|
||||
["mls", "ZPZZ"],
|
||||
["abs", "ZPZ", "m"],
|
||||
["add", "ZPZ", "m", "dn"],
|
||||
["asr", "ZPZ", "m", "dn"],
|
||||
["cnt", "ZPZ", "m"],
|
||||
["lsl", "ZPZ", "m", "dn"],
|
||||
["lsr", "ZPZ", "m", "dn"],
|
||||
["mul", "ZPZ", "m", "dn"],
|
||||
["neg", "ZPZ", "m"],
|
||||
["not", "ZPZ", "m"],
|
||||
["smax", "ZPZ", "m", "dn"],
|
||||
["smin", "ZPZ", "m", "dn"],
|
||||
["sub", "ZPZ", "m", "dn"],
|
||||
["fabs", "ZPZ", "m"],
|
||||
["fadd", "ZPZ", "m", "dn"],
|
||||
["fdiv", "ZPZ", "m", "dn"],
|
||||
["fmax", "ZPZ", "m", "dn"],
|
||||
["fmin", "ZPZ", "m", "dn"],
|
||||
["fmul", "ZPZ", "m", "dn"],
|
||||
["fneg", "ZPZ", "m"],
|
||||
["frintm", "ZPZ", "m"],
|
||||
["frintn", "ZPZ", "m"],
|
||||
["frintp", "ZPZ", "m"],
|
||||
["fsqrt", "ZPZ", "m"],
|
||||
["fsub", "ZPZ", "m", "dn"],
|
||||
["fmla", "ZPZZ", "m"],
|
||||
["fmls", "ZPZZ", "m"],
|
||||
["fnmla", "ZPZZ", "m"],
|
||||
["fnmls", "ZPZZ", "m"],
|
||||
["mla", "ZPZZ", "m"],
|
||||
["mls", "ZPZZ", "m"],
|
||||
["and", "ZZZ"],
|
||||
["eor", "ZZZ"],
|
||||
["orr", "ZZZ"],
|
||||
["bic", "ZZZ"],
|
||||
["cmpeq", "PPZZ", "z"],
|
||||
["cmpge", "PPZZ", "z"],
|
||||
["cmpgt", "PPZZ", "z"],
|
||||
["cmpne", "PPZZ", "z"],
|
||||
])
|
||||
|
||||
generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
|
||||
|
||||
@ -742,6 +742,8 @@
|
||||
__ sve_dup(z1, __ H, -128); // dup z1.h, -128
|
||||
__ sve_dup(z2, __ S, 32512); // dup z2.s, 32512
|
||||
__ sve_dup(z7, __ D, -32768); // dup z7.d, -32768
|
||||
__ sve_dup(z4, __ B, r3); // dup z4.b, w3
|
||||
__ sve_dup(z14, __ H, r22); // dup z14.h, w22
|
||||
__ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp]
|
||||
__ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
|
||||
__ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
|
||||
@ -757,6 +759,17 @@
|
||||
__ sve_ldr(z0, Address(sp)); // ldr z0, [sp]
|
||||
__ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL]
|
||||
__ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL]
|
||||
__ sve_cntb(r9); // cntb x9
|
||||
__ sve_cnth(r10); // cnth x10
|
||||
__ sve_cntw(r11); // cntw x11
|
||||
__ sve_cntd(r12); // cntd x12
|
||||
__ sve_brka(p2, p0, p2, false); // brka p2.b, p0/z, p2.b
|
||||
__ sve_brka(p1, p2, p3, true); // brka p1.b, p2/m, p3.b
|
||||
__ sve_incp(r0, __ B, p2); // incp x0, p2.b
|
||||
__ sve_whilelt(p0, __ B, r1, r28); // whilelt p0.b, x1, x28
|
||||
__ sve_whilele(p2, __ H, r11, r8); // whilele p2.h, x11, x8
|
||||
__ sve_whilelo(p3, __ S, r7, r2); // whilelo p3.s, x7, x2
|
||||
__ sve_whilels(p4, __ D, r17, r10); // whilels p4.d, x17, x10
|
||||
|
||||
// FloatImmediateOp
|
||||
__ fmovd(v0, 2.0); // fmov d0, #2.0
|
||||
@ -932,17 +945,21 @@
|
||||
__ sve_eor(z2, z11, z28); // eor z2.d, z11.d, z28.d
|
||||
__ sve_orr(z7, z1, z26); // orr z7.d, z1.d, z26.d
|
||||
__ sve_bic(z17, z14, z8); // bic z17.d, z14.d, z8.d
|
||||
__ sve_cmpeq(p5, __ S, p6, z5, z19); // cmpeq p5.s, p6/z, z5.s, z19.s
|
||||
__ sve_cmpge(p4, __ S, p5, z16, z29); // cmpge p4.s, p5/z, z16.s, z29.s
|
||||
__ sve_cmpgt(p5, __ D, p0, z4, z17); // cmpgt p5.d, p0/z, z4.d, z17.d
|
||||
__ sve_cmpne(p1, __ D, p5, z4, z23); // cmpne p1.d, p5/z, z4.d, z23.d
|
||||
|
||||
// SVEReductionOp
|
||||
__ sve_andv(v21, __ S, p6, z5); // andv s21, p6, z5.s
|
||||
__ sve_orv(v21, __ S, p4, z22); // orv s21, p4, z22.s
|
||||
__ sve_eorv(v29, __ B, p5, z19); // eorv b29, p5, z19.b
|
||||
__ sve_smaxv(v4, __ B, p4, z23); // smaxv b4, p4, z23.b
|
||||
__ sve_sminv(v19, __ D, p1, z23); // sminv d19, p1, z23.d
|
||||
__ sve_fminv(v19, __ S, p0, z8); // fminv s19, p0, z8.s
|
||||
__ sve_fmaxv(v14, __ D, p6, z17); // fmaxv d14, p6, z17.d
|
||||
__ sve_fadda(v21, __ S, p1, z30); // fadda s21, p1, s21, z30.s
|
||||
__ sve_uaddv(v10, __ B, p5, z12); // uaddv d10, p5, z12.b
|
||||
__ sve_andv(v19, __ H, p0, z8); // andv h19, p0, z8.h
|
||||
__ sve_orv(v14, __ D, p6, z17); // orv d14, p6, z17.d
|
||||
__ sve_eorv(v21, __ B, p1, z30); // eorv b21, p1, z30.b
|
||||
__ sve_smaxv(v10, __ B, p5, z12); // smaxv b10, p5, z12.b
|
||||
__ sve_sminv(v9, __ S, p1, z24); // sminv s9, p1, z24.s
|
||||
__ sve_fminv(v4, __ S, p6, z6); // fminv s4, p6, z6.s
|
||||
__ sve_fmaxv(v27, __ D, p6, z13); // fmaxv d27, p6, z13.d
|
||||
__ sve_fadda(v30, __ D, p5, z22); // fadda d30, p5, d30, z22.d
|
||||
__ sve_uaddv(v30, __ H, p7, z9); // uaddv d30, p7, z9.h
|
||||
|
||||
__ bind(forth);
|
||||
|
||||
@ -961,30 +978,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140002fb, 0x94000000,
|
||||
0x97ffffd4, 0x940002f8, 0x3400000a, 0x34fffa2a,
|
||||
0x34005eaa, 0x35000008, 0x35fff9c8, 0x35005e48,
|
||||
0xb400000b, 0xb4fff96b, 0xb4005deb, 0xb500001d,
|
||||
0xb5fff91d, 0xb5005d9d, 0x10000013, 0x10fff8b3,
|
||||
0x10005d33, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36305cb6, 0x3758000c, 0x375ff7cc, 0x37585c4c,
|
||||
0x14000000, 0x17ffffd7, 0x1400030c, 0x94000000,
|
||||
0x97ffffd4, 0x94000309, 0x3400000a, 0x34fffa2a,
|
||||
0x340060ca, 0x35000008, 0x35fff9c8, 0x35006068,
|
||||
0xb400000b, 0xb4fff96b, 0xb400600b, 0xb500001d,
|
||||
0xb5fff91d, 0xb5005fbd, 0x10000013, 0x10fff8b3,
|
||||
0x10005f53, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36305ed6, 0x3758000c, 0x375ff7cc, 0x37585e6c,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54005a20, 0x54000001, 0x54fff541, 0x540059c1,
|
||||
0x54000002, 0x54fff4e2, 0x54005962, 0x54000002,
|
||||
0x54fff482, 0x54005902, 0x54000003, 0x54fff423,
|
||||
0x540058a3, 0x54000003, 0x54fff3c3, 0x54005843,
|
||||
0x54000004, 0x54fff364, 0x540057e4, 0x54000005,
|
||||
0x54fff305, 0x54005785, 0x54000006, 0x54fff2a6,
|
||||
0x54005726, 0x54000007, 0x54fff247, 0x540056c7,
|
||||
0x54000008, 0x54fff1e8, 0x54005668, 0x54000009,
|
||||
0x54fff189, 0x54005609, 0x5400000a, 0x54fff12a,
|
||||
0x540055aa, 0x5400000b, 0x54fff0cb, 0x5400554b,
|
||||
0x5400000c, 0x54fff06c, 0x540054ec, 0x5400000d,
|
||||
0x54fff00d, 0x5400548d, 0x5400000e, 0x54ffefae,
|
||||
0x5400542e, 0x5400000f, 0x54ffef4f, 0x540053cf,
|
||||
0x54005c40, 0x54000001, 0x54fff541, 0x54005be1,
|
||||
0x54000002, 0x54fff4e2, 0x54005b82, 0x54000002,
|
||||
0x54fff482, 0x54005b22, 0x54000003, 0x54fff423,
|
||||
0x54005ac3, 0x54000003, 0x54fff3c3, 0x54005a63,
|
||||
0x54000004, 0x54fff364, 0x54005a04, 0x54000005,
|
||||
0x54fff305, 0x540059a5, 0x54000006, 0x54fff2a6,
|
||||
0x54005946, 0x54000007, 0x54fff247, 0x540058e7,
|
||||
0x54000008, 0x54fff1e8, 0x54005888, 0x54000009,
|
||||
0x54fff189, 0x54005829, 0x5400000a, 0x54fff12a,
|
||||
0x540057ca, 0x5400000b, 0x54fff0cb, 0x5400576b,
|
||||
0x5400000c, 0x54fff06c, 0x5400570c, 0x5400000d,
|
||||
0x54fff00d, 0x540056ad, 0x5400000e, 0x54ffefae,
|
||||
0x5400564e, 0x5400000f, 0x54ffef4f, 0x540055ef,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
|
||||
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
|
||||
@ -1016,7 +1033,7 @@
|
||||
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
|
||||
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
|
||||
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
|
||||
0xbd1b1869, 0x5800441b, 0x1800000b, 0xf8945060,
|
||||
0xbd1b1869, 0x5800463b, 0x1800000b, 0xf8945060,
|
||||
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
|
||||
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
|
||||
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
|
||||
@ -1108,50 +1125,54 @@
|
||||
0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
|
||||
0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
|
||||
0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
|
||||
0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
|
||||
0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
|
||||
0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
|
||||
0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff,
|
||||
0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
|
||||
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
|
||||
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
|
||||
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
|
||||
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
|
||||
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
|
||||
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
|
||||
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
|
||||
0x1e7e3000, 0xf8208193, 0xf83101b6, 0xf83c13fe,
|
||||
0xf821239a, 0xf824309e, 0xf826535e, 0xf8304109,
|
||||
0xf82c7280, 0xf8216058, 0xf8a08309, 0xf8ba03d0,
|
||||
0xf8a312ea, 0xf8aa21e4, 0xf8a2310b, 0xf8aa522f,
|
||||
0xf8a2418a, 0xf8ac71af, 0xf8a26287, 0xf8fa8090,
|
||||
0xf8e20184, 0xf8f01215, 0xf8f022ab, 0xf8f7334c,
|
||||
0xf8f751dc, 0xf8eb4038, 0xf8ec715f, 0xf8f06047,
|
||||
0xf863826d, 0xf8710070, 0xf86113cb, 0xf86521e8,
|
||||
0xf87d301e, 0xf8745287, 0xf87742bc, 0xf87b70b9,
|
||||
0xf8616217, 0xb83f8185, 0xb82901fc, 0xb83d13f6,
|
||||
0xb83320bf, 0xb82e33f0, 0xb830529b, 0xb830416c,
|
||||
0xb82973c6, 0xb831639b, 0xb8be8147, 0xb8b4008a,
|
||||
0xb8b81231, 0xb8b623a3, 0xb8af3276, 0xb8b35056,
|
||||
0xb8af4186, 0xb8b071ab, 0xb8b763c1, 0xb8f38225,
|
||||
0xb8e202d0, 0xb8ed12aa, 0xb8fd219b, 0xb8fb3023,
|
||||
0xb8ff5278, 0xb8f14389, 0xb8fb70ef, 0xb8f563f7,
|
||||
0xb87983e2, 0xb87b0150, 0xb8771073, 0xb8702320,
|
||||
0xb87a3057, 0xb870508c, 0xb87c43be, 0xb87070db,
|
||||
0xb86961fd, 0xce273c87, 0xce080ac9, 0xce7e8e9b,
|
||||
0xce808b45, 0xce79806e, 0xce758768, 0xcec0835a,
|
||||
0xce608ad8, 0x043100c4, 0x046105e3, 0x65c900a6,
|
||||
0x65d60a87, 0x65c80545, 0x0416a63e, 0x04001f8b,
|
||||
0x0450979a, 0x04dabe0d, 0x045381a5, 0x04918b4f,
|
||||
0x049006cb, 0x0497a264, 0x045eadd1, 0x04881062,
|
||||
0x040a04d7, 0x04810f71, 0x04dca450, 0x65c084c3,
|
||||
0x65cd8d93, 0x65c69a68, 0x65878ae0, 0x65c29db3,
|
||||
0x049da0e6, 0x6582b911, 0x65c0b6d6, 0x65c1a1e2,
|
||||
0x65cda494, 0x65c18107, 0x65af1493, 0x65e52b36,
|
||||
0x65ab4ed0, 0x65f06a8d, 0x0451448f, 0x049c7c86,
|
||||
0x0429335d, 0x04bc3162, 0x047a3027, 0x04e831d1,
|
||||
0x049a38b5, 0x049832d5, 0x0419367d, 0x040832e4,
|
||||
0x04ca26f3, 0x65872113, 0x65c63a2e, 0x659827d5,
|
||||
0x0401358a,
|
||||
0x25b8efe2, 0x25f8f007, 0x05203864, 0x05603ace,
|
||||
0xa400a3e0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe,
|
||||
0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff,
|
||||
0xe547e400, 0xe4014be0, 0xe4a84fe0, 0xe5f15000,
|
||||
0x858043e0, 0x85a043ff, 0xe59f5d08, 0x0420e3e9,
|
||||
0x0460e3ea, 0x04a0e3eb, 0x04e0e3ec, 0x25104042,
|
||||
0x25104871, 0x252c8840, 0x253c1420, 0x25681572,
|
||||
0x25a21ce3, 0x25ea1e34, 0x1e601000, 0x1e603000,
|
||||
0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000,
|
||||
0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000,
|
||||
0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000,
|
||||
0x1e6e1000, 0x1e6e3000, 0x1e701000, 0x1e703000,
|
||||
0x1e721000, 0x1e723000, 0x1e741000, 0x1e743000,
|
||||
0x1e761000, 0x1e763000, 0x1e781000, 0x1e783000,
|
||||
0x1e7a1000, 0x1e7a3000, 0x1e7c1000, 0x1e7c3000,
|
||||
0x1e7e1000, 0x1e7e3000, 0xf8208193, 0xf83101b6,
|
||||
0xf83c13fe, 0xf821239a, 0xf824309e, 0xf826535e,
|
||||
0xf8304109, 0xf82c7280, 0xf8216058, 0xf8a08309,
|
||||
0xf8ba03d0, 0xf8a312ea, 0xf8aa21e4, 0xf8a2310b,
|
||||
0xf8aa522f, 0xf8a2418a, 0xf8ac71af, 0xf8a26287,
|
||||
0xf8fa8090, 0xf8e20184, 0xf8f01215, 0xf8f022ab,
|
||||
0xf8f7334c, 0xf8f751dc, 0xf8eb4038, 0xf8ec715f,
|
||||
0xf8f06047, 0xf863826d, 0xf8710070, 0xf86113cb,
|
||||
0xf86521e8, 0xf87d301e, 0xf8745287, 0xf87742bc,
|
||||
0xf87b70b9, 0xf8616217, 0xb83f8185, 0xb82901fc,
|
||||
0xb83d13f6, 0xb83320bf, 0xb82e33f0, 0xb830529b,
|
||||
0xb830416c, 0xb82973c6, 0xb831639b, 0xb8be8147,
|
||||
0xb8b4008a, 0xb8b81231, 0xb8b623a3, 0xb8af3276,
|
||||
0xb8b35056, 0xb8af4186, 0xb8b071ab, 0xb8b763c1,
|
||||
0xb8f38225, 0xb8e202d0, 0xb8ed12aa, 0xb8fd219b,
|
||||
0xb8fb3023, 0xb8ff5278, 0xb8f14389, 0xb8fb70ef,
|
||||
0xb8f563f7, 0xb87983e2, 0xb87b0150, 0xb8771073,
|
||||
0xb8702320, 0xb87a3057, 0xb870508c, 0xb87c43be,
|
||||
0xb87070db, 0xb86961fd, 0xce273c87, 0xce080ac9,
|
||||
0xce7e8e9b, 0xce808b45, 0xce79806e, 0xce758768,
|
||||
0xcec0835a, 0xce608ad8, 0x043100c4, 0x046105e3,
|
||||
0x65c900a6, 0x65d60a87, 0x65c80545, 0x0416a63e,
|
||||
0x04001f8b, 0x0450979a, 0x04dabe0d, 0x045381a5,
|
||||
0x04918b4f, 0x049006cb, 0x0497a264, 0x045eadd1,
|
||||
0x04881062, 0x040a04d7, 0x04810f71, 0x04dca450,
|
||||
0x65c084c3, 0x65cd8d93, 0x65c69a68, 0x65878ae0,
|
||||
0x65c29db3, 0x049da0e6, 0x6582b911, 0x65c0b6d6,
|
||||
0x65c1a1e2, 0x65cda494, 0x65c18107, 0x65af1493,
|
||||
0x65e52b36, 0x65ab4ed0, 0x65f06a8d, 0x0451448f,
|
||||
0x049c7c86, 0x0429335d, 0x04bc3162, 0x047a3027,
|
||||
0x04e831d1, 0x2493b8a5, 0x249d9604, 0x24d18095,
|
||||
0x24d7b491, 0x045a2113, 0x04d83a2e, 0x041927d5,
|
||||
0x0408358a, 0x048a2709, 0x658738c4, 0x65c639bb,
|
||||
0x65d836de, 0x04413d3e,
|
||||
};
|
||||
// END Generated code -- do not edit
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user