8268363: AArch64: Implement string_indexof_char intrinsic in SVE

Reviewed-by: aph, njian
This commit is contained in:
TatWai Chong 2021-07-08 02:29:17 +00:00 committed by Ningsheng Jian
parent 270fbcb3f5
commit e54585be57
9 changed files with 381 additions and 119 deletions

View File

@ -5495,6 +5495,15 @@ operand vRegD_V31()
interface(REG_INTER);
%}
operand pReg()
%{
constraint(ALLOC_IN_RC(pr_reg));
match(RegVectMask);
op_cost(0);
format %{ %}
interface(REG_INTER);
%}
operand pRegGov()
%{
constraint(ALLOC_IN_RC(gov_pr));
@ -16660,11 +16669,11 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
%}
instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, rFlagsReg cr)
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
predicate((UseSVE == 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
@ -16683,7 +16692,7 @@ instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegINoSp tmp3, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
predicate((UseSVE == 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
@ -16691,8 +16700,8 @@ instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
ins_encode %{
__ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
$result$$Register, $tmp1$$Register, $tmp2$$Register,
$tmp3$$Register);
$result$$Register, $tmp1$$Register, $tmp2$$Register,
$tmp3$$Register);
%}
ins_pipe(pipe_class_memory);
%}

View File

@ -1775,3 +1775,42 @@ instruct vmaskcast(vReg dst) %{
ins_pipe(pipe_class_empty);
%}
// Intrisics for String.indexOf(char)
instruct stringL_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegI_R0 result, vReg ztmp1, vReg ztmp2,
pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
ins_encode %{
__ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), true /* isL */);
%}
ins_pipe(pipe_class_memory);
%}
instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegI_R0 result, vReg ztmp1, vReg ztmp2,
pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
ins_encode %{
__ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), false /* isL */);
%}
ins_pipe(pipe_class_memory);
%}

View File

@ -919,3 +919,29 @@ instruct vmaskcast(vReg dst) %{
ins_pipe(pipe_class_empty);
%}
// Intrisics for String.indexOf(char)
dnl
define(`STRING_INDEXOF_CHAR', `
instruct string$1_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegI_R0 result, vReg ztmp1, vReg ztmp2,
pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::$1));
effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
format %{ "String$2 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
ins_encode %{
__ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), $3 /* isL */);
%}
ins_pipe(pipe_class_memory);
%}')dnl
dnl $1 $2 $3
STRING_INDEXOF_CHAR(L, Latin1, true)
STRING_INDEXOF_CHAR(U, UTF16, false)
dnl

View File

@ -3214,6 +3214,70 @@ public:
f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
}
// Integer comparisons (SVE)
#define INSN(NAME, cond) \
void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
starti; \
assert(T != Q, "invalid size"); \
f(0b00100100, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond >> 1) & 7, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), f(cond & 1, 4), prf(Pd, 0); \
}
INSN(sve_cmpeq, 0b1010); // Compare signed equal to vector
INSN(sve_cmpne, 0b1011); // Compare not equal to vector
INSN(sve_cmpge, 0b1000); // Compare signed greater than or equal to vector
INSN(sve_cmpgt, 0b1001); // Compare signed greater than vector
#undef INSN
// Predicate counted loop (SVE) (32-bit variants are not included)
#define INSN(NAME, decode) \
void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) { \
starti; \
assert(T != Q, "invalid register variant"); \
f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21), \
zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10), \
zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0); \
}
INSN(sve_whilelt, 0b010); // While incrementing signed scalar less than scalar
INSN(sve_whilele, 0b011); // While incrementing signed scalar less than or equal to scalar
INSN(sve_whilelo, 0b110); // While incrementing unsigned scalar lower than scalar
INSN(sve_whilels, 0b111); // While incrementing unsigned scalar lower than or the same as scalar
#undef INSN
// Predicate scan (SVE)
// Break after the first true condition
void sve_brka(PRegister pd, PRegister pg, PRegister pn, bool isMerge) {
starti;
f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b01000001, 21, 14),
prf(pg, 10), f(0b0, 9), prf(pn, 5), f(isMerge ? 1 : 0, 4), prf(pd, 0);
}
// Element count and increment scalar (SVE)
#define INSN(NAME, TYPE) \
void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \
starti; \
f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20); \
f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \
}
INSN(sve_cntb, B); // Set scalar to multiple of 8-bit predicate constraint element count
INSN(sve_cnth, H); // Set scalar to multiple of 16-bit predicate constraint element count
INSN(sve_cntw, S); // Set scalar to multiple of 32-bit predicate constraint element count
INSN(sve_cntd, D); // Set scalar to multiple of 64-bit predicate constraint element count
#undef INSN
// Predicate count and increment scalar (SVE)
// Set scalar to the number of Active predicate elements that are TRUE
void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) {
starti;
assert(T != Q, "invalid size");
f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9),
prf(pg, 5), rf(rd, 0);
}
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

View File

@ -540,6 +540,75 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
BIND(DONE);
}
void C2_MacroAssembler::string_indexof_char_sve(Register str1, Register cnt1,
Register ch, Register result,
FloatRegister ztmp1,
FloatRegister ztmp2,
PRegister tmp_pg,
PRegister tmp_pdn, bool isL)
{
// Note that `tmp_pdn` should *NOT* be used as governing predicate register.
assert(tmp_pg->is_governing(),
"this register has to be a governing predicate register");
Label LOOP, MATCH, DONE, NOMATCH;
Register vec_len = rscratch1;
Register idx = rscratch2;
SIMD_RegVariant T = (isL == true) ? B : H;
cbz(cnt1, NOMATCH);
// Assign the particular char throughout the vector.
sve_dup(ztmp2, T, ch);
if (isL) {
sve_cntb(vec_len);
} else {
sve_cnth(vec_len);
}
mov(idx, 0);
// Generate a predicate to control the reading of input string.
sve_whilelt(tmp_pg, T, idx, cnt1);
BIND(LOOP);
// Read a vector of 8- or 16-bit data depending on the string type. Note
// that inactive elements indicated by the predicate register won't cause
// a data read from memory to the destination vector.
if (isL) {
sve_ld1b(ztmp1, T, tmp_pg, Address(str1, idx));
} else {
sve_ld1h(ztmp1, T, tmp_pg, Address(str1, idx, Address::lsl(1)));
}
add(idx, idx, vec_len);
// Perform the comparison. An element of the destination predicate is set
// to active if the particular char is matched.
sve_cmpeq(tmp_pdn, T, tmp_pg, ztmp1, ztmp2);
// Branch if the particular char is found.
br(NE, MATCH);
sve_whilelt(tmp_pg, T, idx, cnt1);
// Loop back if the particular char not found.
br(MI, LOOP);
BIND(NOMATCH);
mov(result, -1);
b(DONE);
BIND(MATCH);
// Undo the index increment.
sub(idx, idx, vec_len);
// Crop the vector to find its location.
sve_brka(tmp_pdn, tmp_pg, tmp_pdn, false /* isMerge */);
add(result, idx, -1);
sve_incp(result, T, tmp_pdn);
BIND(DONE);
}
void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3)

View File

@ -46,8 +46,13 @@
Register tmp1, Register tmp2, Register tmp3);
void stringL_indexof_char(Register str1, Register cnt1,
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3);
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3);
void string_indexof_char_sve(Register str1, Register cnt1,
Register ch, Register result,
FloatRegister ztmp1, FloatRegister ztmp2,
PRegister pgtmp, PRegister ptmp, bool isL);
// SIMD&FP comparison
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -242,6 +242,7 @@ class PRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 16,
number_of_governing_registers = 8,
max_slots_per_register = 1
};
@ -257,6 +258,7 @@ class PRegisterImpl: public AbstractRegisterImpl {
int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
int encoding_nocheck() const { return (intptr_t)this; }
bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
bool is_governing() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_governing_registers; }
const char* name() const;
};

View File

@ -908,10 +908,17 @@ class SVEVectorOp(Instruction):
self.numRegs = len(regs)
if regTypes[0] != "p" and regTypes[1] == 'P':
self._isPredicated = True
self._merge = "/m"
assert len(args) > 2, "Must specify predicate type"
for arg in args[2:]:
if arg == 'm':
self._merge = "/m"
elif arg == 'z':
self._merge = "/z"
else:
assert arg == "dn", "Unknown predicate type"
else:
self._isPredicated = False
self._merge =""
self._merge = ""
self._bitwiseop = False
if name[0] == 'f':
@ -921,10 +928,13 @@ class SVEVectorOp(Instruction):
self._bitwiseop = True
else:
self._width = RegVariant(0, 3)
self._dnm = None
if len(args) > 2:
self._dnm = args[2]
else:
self._dnm = None
for arg in args[2:]:
if arg == "dn":
self._dnm = arg
Instruction.__init__(self, name)
def cstr(self):
@ -1543,6 +1553,8 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"],
["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"],
["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"],
["dup", "__ sve_dup(z4, __ B, r3);", "dup\tz4.b, w3"],
["dup", "__ sve_dup(z14, __ H, r22);", "dup\tz14.h, w22"],
["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"],
["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
@ -1558,6 +1570,17 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"],
["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"],
["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"],
["cntb", "__ sve_cntb(r9);", "cntb\tx9"],
["cnth", "__ sve_cnth(r10);", "cnth\tx10"],
["cntw", "__ sve_cntw(r11);", "cntw\tx11"],
["cntd", "__ sve_cntd(r12);", "cntd\tx12"],
["brka", "__ sve_brka(p2, p0, p2, false);", "brka\tp2.b, p0/z, p2.b"],
["brka", "__ sve_brka(p1, p2, p3, true);", "brka\tp1.b, p2/m, p3.b"],
["incp", "__ sve_incp(r0, __ B, p2);", "incp\tx0, p2.b"],
["whilelt", "__ sve_whilelt(p0, __ B, r1, r28);", "whilelt\tp0.b, x1, x28"],
["whilele", "__ sve_whilele(p2, __ H, r11, r8);", "whilele\tp2.h, x11, x8"],
["whilelo", "__ sve_whilelo(p3, __ S, r7, r2);", "whilelo\tp3.s, x7, x2"],
["whilels", "__ sve_whilels(p4, __ D, r17, r10);", "whilels\tp4.d, x17, x10"],
])
print "\n// FloatImmediateOp"
@ -1593,40 +1616,44 @@ generate(SVEVectorOp, [["add", "ZZZ"],
["fadd", "ZZZ"],
["fmul", "ZZZ"],
["fsub", "ZZZ"],
["abs", "ZPZ"],
["add", "ZPZ", "dn"],
["asr", "ZPZ", "dn"],
["cnt", "ZPZ"],
["lsl", "ZPZ", "dn"],
["lsr", "ZPZ", "dn"],
["mul", "ZPZ", "dn"],
["neg", "ZPZ"],
["not", "ZPZ"],
["smax", "ZPZ", "dn"],
["smin", "ZPZ", "dn"],
["sub", "ZPZ", "dn"],
["fabs", "ZPZ"],
["fadd", "ZPZ", "dn"],
["fdiv", "ZPZ", "dn"],
["fmax", "ZPZ", "dn"],
["fmin", "ZPZ", "dn"],
["fmul", "ZPZ", "dn"],
["fneg", "ZPZ"],
["frintm", "ZPZ"],
["frintn", "ZPZ"],
["frintp", "ZPZ"],
["fsqrt", "ZPZ"],
["fsub", "ZPZ", "dn"],
["fmla", "ZPZZ"],
["fmls", "ZPZZ"],
["fnmla", "ZPZZ"],
["fnmls", "ZPZZ"],
["mla", "ZPZZ"],
["mls", "ZPZZ"],
["abs", "ZPZ", "m"],
["add", "ZPZ", "m", "dn"],
["asr", "ZPZ", "m", "dn"],
["cnt", "ZPZ", "m"],
["lsl", "ZPZ", "m", "dn"],
["lsr", "ZPZ", "m", "dn"],
["mul", "ZPZ", "m", "dn"],
["neg", "ZPZ", "m"],
["not", "ZPZ", "m"],
["smax", "ZPZ", "m", "dn"],
["smin", "ZPZ", "m", "dn"],
["sub", "ZPZ", "m", "dn"],
["fabs", "ZPZ", "m"],
["fadd", "ZPZ", "m", "dn"],
["fdiv", "ZPZ", "m", "dn"],
["fmax", "ZPZ", "m", "dn"],
["fmin", "ZPZ", "m", "dn"],
["fmul", "ZPZ", "m", "dn"],
["fneg", "ZPZ", "m"],
["frintm", "ZPZ", "m"],
["frintn", "ZPZ", "m"],
["frintp", "ZPZ", "m"],
["fsqrt", "ZPZ", "m"],
["fsub", "ZPZ", "m", "dn"],
["fmla", "ZPZZ", "m"],
["fmls", "ZPZZ", "m"],
["fnmla", "ZPZZ", "m"],
["fnmls", "ZPZZ", "m"],
["mla", "ZPZZ", "m"],
["mls", "ZPZZ", "m"],
["and", "ZZZ"],
["eor", "ZZZ"],
["orr", "ZZZ"],
["bic", "ZZZ"],
["cmpeq", "PPZZ", "z"],
["cmpge", "PPZZ", "z"],
["cmpgt", "PPZZ", "z"],
["cmpne", "PPZZ", "z"],
])
generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],

View File

@ -742,6 +742,8 @@
__ sve_dup(z1, __ H, -128); // dup z1.h, -128
__ sve_dup(z2, __ S, 32512); // dup z2.s, 32512
__ sve_dup(z7, __ D, -32768); // dup z7.d, -32768
__ sve_dup(z4, __ B, r3); // dup z4.b, w3
__ sve_dup(z14, __ H, r22); // dup z14.h, w22
__ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp]
__ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
__ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
@ -757,6 +759,17 @@
__ sve_ldr(z0, Address(sp)); // ldr z0, [sp]
__ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL]
__ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL]
__ sve_cntb(r9); // cntb x9
__ sve_cnth(r10); // cnth x10
__ sve_cntw(r11); // cntw x11
__ sve_cntd(r12); // cntd x12
__ sve_brka(p2, p0, p2, false); // brka p2.b, p0/z, p2.b
__ sve_brka(p1, p2, p3, true); // brka p1.b, p2/m, p3.b
__ sve_incp(r0, __ B, p2); // incp x0, p2.b
__ sve_whilelt(p0, __ B, r1, r28); // whilelt p0.b, x1, x28
__ sve_whilele(p2, __ H, r11, r8); // whilele p2.h, x11, x8
__ sve_whilelo(p3, __ S, r7, r2); // whilelo p3.s, x7, x2
__ sve_whilels(p4, __ D, r17, r10); // whilels p4.d, x17, x10
// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
@ -932,17 +945,21 @@
__ sve_eor(z2, z11, z28); // eor z2.d, z11.d, z28.d
__ sve_orr(z7, z1, z26); // orr z7.d, z1.d, z26.d
__ sve_bic(z17, z14, z8); // bic z17.d, z14.d, z8.d
__ sve_cmpeq(p5, __ S, p6, z5, z19); // cmpeq p5.s, p6/z, z5.s, z19.s
__ sve_cmpge(p4, __ S, p5, z16, z29); // cmpge p4.s, p5/z, z16.s, z29.s
__ sve_cmpgt(p5, __ D, p0, z4, z17); // cmpgt p5.d, p0/z, z4.d, z17.d
__ sve_cmpne(p1, __ D, p5, z4, z23); // cmpne p1.d, p5/z, z4.d, z23.d
// SVEReductionOp
__ sve_andv(v21, __ S, p6, z5); // andv s21, p6, z5.s
__ sve_orv(v21, __ S, p4, z22); // orv s21, p4, z22.s
__ sve_eorv(v29, __ B, p5, z19); // eorv b29, p5, z19.b
__ sve_smaxv(v4, __ B, p4, z23); // smaxv b4, p4, z23.b
__ sve_sminv(v19, __ D, p1, z23); // sminv d19, p1, z23.d
__ sve_fminv(v19, __ S, p0, z8); // fminv s19, p0, z8.s
__ sve_fmaxv(v14, __ D, p6, z17); // fmaxv d14, p6, z17.d
__ sve_fadda(v21, __ S, p1, z30); // fadda s21, p1, s21, z30.s
__ sve_uaddv(v10, __ B, p5, z12); // uaddv d10, p5, z12.b
__ sve_andv(v19, __ H, p0, z8); // andv h19, p0, z8.h
__ sve_orv(v14, __ D, p6, z17); // orv d14, p6, z17.d
__ sve_eorv(v21, __ B, p1, z30); // eorv b21, p1, z30.b
__ sve_smaxv(v10, __ B, p5, z12); // smaxv b10, p5, z12.b
__ sve_sminv(v9, __ S, p1, z24); // sminv s9, p1, z24.s
__ sve_fminv(v4, __ S, p6, z6); // fminv s4, p6, z6.s
__ sve_fmaxv(v27, __ D, p6, z13); // fmaxv d27, p6, z13.d
__ sve_fadda(v30, __ D, p5, z22); // fadda d30, p5, d30, z22.d
__ sve_uaddv(v30, __ H, p7, z9); // uaddv d30, p7, z9.h
__ bind(forth);
@ -961,30 +978,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140002fb, 0x94000000,
0x97ffffd4, 0x940002f8, 0x3400000a, 0x34fffa2a,
0x34005eaa, 0x35000008, 0x35fff9c8, 0x35005e48,
0xb400000b, 0xb4fff96b, 0xb4005deb, 0xb500001d,
0xb5fff91d, 0xb5005d9d, 0x10000013, 0x10fff8b3,
0x10005d33, 0x90000013, 0x36300016, 0x3637f836,
0x36305cb6, 0x3758000c, 0x375ff7cc, 0x37585c4c,
0x14000000, 0x17ffffd7, 0x1400030c, 0x94000000,
0x97ffffd4, 0x94000309, 0x3400000a, 0x34fffa2a,
0x340060ca, 0x35000008, 0x35fff9c8, 0x35006068,
0xb400000b, 0xb4fff96b, 0xb400600b, 0xb500001d,
0xb5fff91d, 0xb5005fbd, 0x10000013, 0x10fff8b3,
0x10005f53, 0x90000013, 0x36300016, 0x3637f836,
0x36305ed6, 0x3758000c, 0x375ff7cc, 0x37585e6c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005a20, 0x54000001, 0x54fff541, 0x540059c1,
0x54000002, 0x54fff4e2, 0x54005962, 0x54000002,
0x54fff482, 0x54005902, 0x54000003, 0x54fff423,
0x540058a3, 0x54000003, 0x54fff3c3, 0x54005843,
0x54000004, 0x54fff364, 0x540057e4, 0x54000005,
0x54fff305, 0x54005785, 0x54000006, 0x54fff2a6,
0x54005726, 0x54000007, 0x54fff247, 0x540056c7,
0x54000008, 0x54fff1e8, 0x54005668, 0x54000009,
0x54fff189, 0x54005609, 0x5400000a, 0x54fff12a,
0x540055aa, 0x5400000b, 0x54fff0cb, 0x5400554b,
0x5400000c, 0x54fff06c, 0x540054ec, 0x5400000d,
0x54fff00d, 0x5400548d, 0x5400000e, 0x54ffefae,
0x5400542e, 0x5400000f, 0x54ffef4f, 0x540053cf,
0x54005c40, 0x54000001, 0x54fff541, 0x54005be1,
0x54000002, 0x54fff4e2, 0x54005b82, 0x54000002,
0x54fff482, 0x54005b22, 0x54000003, 0x54fff423,
0x54005ac3, 0x54000003, 0x54fff3c3, 0x54005a63,
0x54000004, 0x54fff364, 0x54005a04, 0x54000005,
0x54fff305, 0x540059a5, 0x54000006, 0x54fff2a6,
0x54005946, 0x54000007, 0x54fff247, 0x540058e7,
0x54000008, 0x54fff1e8, 0x54005888, 0x54000009,
0x54fff189, 0x54005829, 0x5400000a, 0x54fff12a,
0x540057ca, 0x5400000b, 0x54fff0cb, 0x5400576b,
0x5400000c, 0x54fff06c, 0x5400570c, 0x5400000d,
0x54fff00d, 0x540056ad, 0x5400000e, 0x54ffefae,
0x5400564e, 0x5400000f, 0x54ffef4f, 0x540055ef,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@ -1016,7 +1033,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x5800441b, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x5800463b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@ -1108,50 +1125,54 @@
0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff,
0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8208193, 0xf83101b6, 0xf83c13fe,
0xf821239a, 0xf824309e, 0xf826535e, 0xf8304109,
0xf82c7280, 0xf8216058, 0xf8a08309, 0xf8ba03d0,
0xf8a312ea, 0xf8aa21e4, 0xf8a2310b, 0xf8aa522f,
0xf8a2418a, 0xf8ac71af, 0xf8a26287, 0xf8fa8090,
0xf8e20184, 0xf8f01215, 0xf8f022ab, 0xf8f7334c,
0xf8f751dc, 0xf8eb4038, 0xf8ec715f, 0xf8f06047,
0xf863826d, 0xf8710070, 0xf86113cb, 0xf86521e8,
0xf87d301e, 0xf8745287, 0xf87742bc, 0xf87b70b9,
0xf8616217, 0xb83f8185, 0xb82901fc, 0xb83d13f6,
0xb83320bf, 0xb82e33f0, 0xb830529b, 0xb830416c,
0xb82973c6, 0xb831639b, 0xb8be8147, 0xb8b4008a,
0xb8b81231, 0xb8b623a3, 0xb8af3276, 0xb8b35056,
0xb8af4186, 0xb8b071ab, 0xb8b763c1, 0xb8f38225,
0xb8e202d0, 0xb8ed12aa, 0xb8fd219b, 0xb8fb3023,
0xb8ff5278, 0xb8f14389, 0xb8fb70ef, 0xb8f563f7,
0xb87983e2, 0xb87b0150, 0xb8771073, 0xb8702320,
0xb87a3057, 0xb870508c, 0xb87c43be, 0xb87070db,
0xb86961fd, 0xce273c87, 0xce080ac9, 0xce7e8e9b,
0xce808b45, 0xce79806e, 0xce758768, 0xcec0835a,
0xce608ad8, 0x043100c4, 0x046105e3, 0x65c900a6,
0x65d60a87, 0x65c80545, 0x0416a63e, 0x04001f8b,
0x0450979a, 0x04dabe0d, 0x045381a5, 0x04918b4f,
0x049006cb, 0x0497a264, 0x045eadd1, 0x04881062,
0x040a04d7, 0x04810f71, 0x04dca450, 0x65c084c3,
0x65cd8d93, 0x65c69a68, 0x65878ae0, 0x65c29db3,
0x049da0e6, 0x6582b911, 0x65c0b6d6, 0x65c1a1e2,
0x65cda494, 0x65c18107, 0x65af1493, 0x65e52b36,
0x65ab4ed0, 0x65f06a8d, 0x0451448f, 0x049c7c86,
0x0429335d, 0x04bc3162, 0x047a3027, 0x04e831d1,
0x049a38b5, 0x049832d5, 0x0419367d, 0x040832e4,
0x04ca26f3, 0x65872113, 0x65c63a2e, 0x659827d5,
0x0401358a,
0x25b8efe2, 0x25f8f007, 0x05203864, 0x05603ace,
0xa400a3e0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe,
0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff,
0xe547e400, 0xe4014be0, 0xe4a84fe0, 0xe5f15000,
0x858043e0, 0x85a043ff, 0xe59f5d08, 0x0420e3e9,
0x0460e3ea, 0x04a0e3eb, 0x04e0e3ec, 0x25104042,
0x25104871, 0x252c8840, 0x253c1420, 0x25681572,
0x25a21ce3, 0x25ea1e34, 0x1e601000, 0x1e603000,
0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000,
0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000,
0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000,
0x1e6e1000, 0x1e6e3000, 0x1e701000, 0x1e703000,
0x1e721000, 0x1e723000, 0x1e741000, 0x1e743000,
0x1e761000, 0x1e763000, 0x1e781000, 0x1e783000,
0x1e7a1000, 0x1e7a3000, 0x1e7c1000, 0x1e7c3000,
0x1e7e1000, 0x1e7e3000, 0xf8208193, 0xf83101b6,
0xf83c13fe, 0xf821239a, 0xf824309e, 0xf826535e,
0xf8304109, 0xf82c7280, 0xf8216058, 0xf8a08309,
0xf8ba03d0, 0xf8a312ea, 0xf8aa21e4, 0xf8a2310b,
0xf8aa522f, 0xf8a2418a, 0xf8ac71af, 0xf8a26287,
0xf8fa8090, 0xf8e20184, 0xf8f01215, 0xf8f022ab,
0xf8f7334c, 0xf8f751dc, 0xf8eb4038, 0xf8ec715f,
0xf8f06047, 0xf863826d, 0xf8710070, 0xf86113cb,
0xf86521e8, 0xf87d301e, 0xf8745287, 0xf87742bc,
0xf87b70b9, 0xf8616217, 0xb83f8185, 0xb82901fc,
0xb83d13f6, 0xb83320bf, 0xb82e33f0, 0xb830529b,
0xb830416c, 0xb82973c6, 0xb831639b, 0xb8be8147,
0xb8b4008a, 0xb8b81231, 0xb8b623a3, 0xb8af3276,
0xb8b35056, 0xb8af4186, 0xb8b071ab, 0xb8b763c1,
0xb8f38225, 0xb8e202d0, 0xb8ed12aa, 0xb8fd219b,
0xb8fb3023, 0xb8ff5278, 0xb8f14389, 0xb8fb70ef,
0xb8f563f7, 0xb87983e2, 0xb87b0150, 0xb8771073,
0xb8702320, 0xb87a3057, 0xb870508c, 0xb87c43be,
0xb87070db, 0xb86961fd, 0xce273c87, 0xce080ac9,
0xce7e8e9b, 0xce808b45, 0xce79806e, 0xce758768,
0xcec0835a, 0xce608ad8, 0x043100c4, 0x046105e3,
0x65c900a6, 0x65d60a87, 0x65c80545, 0x0416a63e,
0x04001f8b, 0x0450979a, 0x04dabe0d, 0x045381a5,
0x04918b4f, 0x049006cb, 0x0497a264, 0x045eadd1,
0x04881062, 0x040a04d7, 0x04810f71, 0x04dca450,
0x65c084c3, 0x65cd8d93, 0x65c69a68, 0x65878ae0,
0x65c29db3, 0x049da0e6, 0x6582b911, 0x65c0b6d6,
0x65c1a1e2, 0x65cda494, 0x65c18107, 0x65af1493,
0x65e52b36, 0x65ab4ed0, 0x65f06a8d, 0x0451448f,
0x049c7c86, 0x0429335d, 0x04bc3162, 0x047a3027,
0x04e831d1, 0x2493b8a5, 0x249d9604, 0x24d18095,
0x24d7b491, 0x045a2113, 0x04d83a2e, 0x041927d5,
0x0408358a, 0x048a2709, 0x658738c4, 0x65c639bb,
0x65d836de, 0x04413d3e,
};
// END Generated code -- do not edit