From dfe438d0570ada60de9d8dd7cdf97d229d91fc48 Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Tue, 31 Mar 2026 10:14:26 +0000 Subject: [PATCH] 8366441: AArch64: Support WFET in OnSpinWait Co-authored-by: Stuart Monteith Co-authored-by: Andrew Haley Reviewed-by: aph, eastigeevich --- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 4 + src/hotspot/cpu/aarch64/globals_aarch64.hpp | 14 +- .../cpu/aarch64/macroAssembler_aarch64.cpp | 25 + .../cpu/aarch64/macroAssembler_aarch64.hpp | 9 + src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp | 3 + src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp | 17 +- .../cpu/aarch64/vm_version_aarch64.cpp | 26 +- .../cpu/aarch64/vm_version_aarch64.hpp | 4 +- .../os_cpu/bsd_aarch64/os_bsd_aarch64.cpp | 2 + .../vm_version_linux_aarch64.cpp | 13 + .../flags/jvmFlagConstraintsRuntime.cpp | 16 +- .../classes/jdk/vm/ci/aarch64/AArch64.java | 2 + test/hotspot/gtest/aarch64/aarch64-asmtest.py | 13 +- test/hotspot/gtest/aarch64/asmtest.out.h | 2253 +++++++++-------- .../onSpinWait/TestOnSpinWaitAArch64.java | 38 +- 15 files changed, 1299 insertions(+), 1140 deletions(-) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 67cf77989d2..ebd8f3a9e03 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -1095,6 +1095,10 @@ public: #undef INSN + void wfet(Register rt) { + system(0b00, 0b011, 0b0001, 0b0000, 0b000, rt); + } + // we only provide mrs and msr for the special purpose system // registers where op1 (instr[20:19]) == 11 // n.b msr has L (instr[21]) == 0 mrs has L == 1 diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index e6de2c798b1..0ca5cb25e0c 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -115,14 +115,18 @@ define_pd_global(intx, InlineSmallCode, 1000); "Value -1 means off.") \ range(-1, 4096) \ product(ccstr, OnSpinWaitInst, "yield", DIAGNOSTIC, \ - "The instruction to use to implement " \ - "java.lang.Thread.onSpinWait()." \ - "Valid values are: none, nop, isb, yield, sb.") \ + "The instruction to use for java.lang.Thread.onSpinWait(). " \ + "Valid values are: none, nop, isb, yield, sb, wfet.") \ constraint(OnSpinWaitInstNameConstraintFunc, AtParse) \ product(uint, OnSpinWaitInstCount, 1, DIAGNOSTIC, \ - "The number of OnSpinWaitInst instructions to generate." \ - "It cannot be used with OnSpinWaitInst=none.") \ + "The number of OnSpinWaitInst instructions to generate. " \ + "It cannot be used with OnSpinWaitInst=none. " \ + "For OnSpinWaitInst=wfet it must be 1.") \ range(1, 99) \ + product(uint, OnSpinWaitDelay, 40, DIAGNOSTIC, \ + "The minimum delay (in nanoseconds) of the OnSpinWait loop. " \ + "It can only be used with -XX:OnSpinWaitInst=wfet.") \ + range(1, 1000) \ product(ccstr, UseBranchProtection, "none", \ "Branch Protection to use: none, standard, pac-ret") \ product(bool, AlwaysMergeDMB, true, DIAGNOSTIC, \ diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 732d94180ae..ebbc35ce20a 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -6807,6 +6807,9 @@ void MacroAssembler::spin_wait() { assert(VM_Version::supports_sb(), "current CPU does not support SB instruction"); sb(); break; + case SpinWait::WFET: + spin_wait_wfet(VM_Version::spin_wait_desc().delay()); + break; default: ShouldNotReachHere(); } @@ -6814,6 +6817,28 @@ void MacroAssembler::spin_wait() { block_comment("}"); } +void MacroAssembler::spin_wait_wfet(int delay_ns) { + // The sequence assumes CNTFRQ_EL0 is fixed to 1GHz. The assumption is valid + // starting from Armv8.6, according to the "D12.1.2 The system counter" of the + // Arm Architecture Reference Manual for A-profile architecture version M.a.a. + // This is sufficient because FEAT_WFXT is introduced from Armv8.6. + Register target = rscratch1; + Register current = rscratch2; + get_cntvctss_el0(current); + add(target, current, delay_ns); + + Label L_wait_loop; + bind(L_wait_loop); + + wfet(target); + get_cntvctss_el0(current); + + cmp(current, target); + br(LT, L_wait_loop); + + sb(); +} + // Stack frame creation/removal void MacroAssembler::enter(bool strip_ret_addr) { diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index fa32f3055b9..994fbe3c80f 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -660,6 +660,14 @@ public: msr(0b011, 0b0100, 0b0010, 0b000, reg); } + // CNTVCTSS_EL0: op1 == 011 + // CRn == 1110 + // CRm == 0000 + // op2 == 110 + inline void get_cntvctss_el0(Register reg) { + mrs(0b011, 0b1110, 0b0000, 0b110, reg); + } + // idiv variant which deals with MINLONG as dividend and -1 as divisor int corrected_idivl(Register result, Register ra, Register rb, bool want_remainder, Register tmp = rscratch1); @@ -1724,6 +1732,7 @@ public: // Code for java.lang.Thread::onSpinWait() intrinsic. void spin_wait(); + void spin_wait_wfet(int delay_ns); void fast_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow); void fast_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow); diff --git a/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp b/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp index 7da0151d834..97a981ab815 100644 --- a/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp @@ -32,6 +32,7 @@ bool SpinWait::supports(const char *name) { strcmp(name, "isb") == 0 || strcmp(name, "yield") == 0 || strcmp(name, "sb") == 0 || + strcmp(name, "wfet") == 0 || strcmp(name, "none") == 0); } @@ -46,6 +47,8 @@ SpinWait::Inst SpinWait::from_name(const char* name) { return SpinWait::YIELD; } else if (strcmp(name, "sb") == 0) { return SpinWait::SB; + } else if (strcmp(name, "wfet") == 0) { + return SpinWait::WFET; } return SpinWait::NONE; diff --git a/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp b/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp index 0e96a4b7157..6ebcd2477a8 100644 --- a/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp @@ -24,6 +24,8 @@ #ifndef CPU_AARCH64_SPIN_WAIT_AARCH64_HPP #define CPU_AARCH64_SPIN_WAIT_AARCH64_HPP +#include "utilities/debug.hpp" + class SpinWait { public: enum Inst { @@ -31,21 +33,30 @@ public: NOP, ISB, YIELD, - SB + SB, + WFET }; private: Inst _inst; int _count; + int _delay; Inst from_name(const char *name); public: - SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(inst == NONE ? 0 : count) {} - SpinWait(const char *name, int count) : SpinWait(from_name(name), count) {} + SpinWait(Inst inst = NONE, int count = 0, int delay = -1) + : _inst(inst), _count(inst == NONE ? 0 : count), _delay(delay) {} + SpinWait(const char *name, int count, int delay) + : SpinWait(from_name(name), count, delay) {} Inst inst() const { return _inst; } int inst_count() const { return _count; } + int delay() const { + assert(_inst == WFET, "Specifying the delay value is only supported for WFET"); + assert(_delay > 0, "The delay value must be positive"); + return _delay; + } static bool supports(const char *name); }; diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index 4423d9c5b58..8ccffac25a8 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved. - * Copyright 2025 Arm Limited and/or its affiliates. + * Copyright 2025, 2026 Arm Limited and/or its affiliates. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,11 +55,33 @@ SpinWait VM_Version::_spin_wait; const char* VM_Version::_features_names[MAX_CPU_FEATURES] = { nullptr }; static SpinWait get_spin_wait_desc() { - SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount); + SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount, OnSpinWaitDelay); if (spin_wait.inst() == SpinWait::SB && !VM_Version::supports_sb()) { vm_exit_during_initialization("OnSpinWaitInst is SB but current CPU does not support SB instruction"); } + if (spin_wait.inst() == SpinWait::WFET) { + if (!VM_Version::supports_wfxt()) { + vm_exit_during_initialization("OnSpinWaitInst is WFET but the CPU does not support the WFET instruction"); + } + + if (!VM_Version::supports_ecv()) { + vm_exit_during_initialization("The CPU does not support the FEAT_ECV required by the -XX:OnSpinWaitInst=wfet implementation"); + } + + if (!VM_Version::supports_sb()) { + vm_exit_during_initialization("The CPU does not support the SB instruction required by the -XX:OnSpinWaitInst=wfet implementation"); + } + + if (OnSpinWaitInstCount != 1) { + vm_exit_during_initialization("OnSpinWaitInstCount for OnSpinWaitInst 'wfet' must be 1"); + } + } else { + if (!FLAG_IS_DEFAULT(OnSpinWaitDelay)) { + vm_exit_during_initialization("OnSpinWaitDelay can only be used with -XX:OnSpinWaitInst=wfet"); + } + } + return spin_wait; } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index e8681611234..378524fe168 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -159,7 +159,9 @@ public: /* flags above must follow Linux HWCAP */ \ decl(SVEBITPERM, svebitperm, 27) \ decl(SVE2, sve2, 28) \ - decl(A53MAC, a53mac, 31) + decl(A53MAC, a53mac, 31) \ + decl(ECV, ecv, 32) \ + decl(WFXT, wfxt, 33) enum Feature_Flag { #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = bit, diff --git a/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp b/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp index 36599594842..49d879731ff 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp +++ b/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp @@ -620,6 +620,8 @@ extern "C" { assert(VM_Version::supports_sb(), "current CPU does not support SB instruction"); asm volatile(".inst 0xd50330ff" : : : "memory"); break; + case SpinWait::WFET: + ShouldNotReachHere(); #ifdef ASSERT default: ShouldNotReachHere(); diff --git a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp index 1fe06dc640d..168fc622a0b 100644 --- a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp +++ b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp @@ -95,6 +95,13 @@ #define HWCAP2_SVEBITPERM (1 << 4) #endif +#ifndef HWCAP2_ECV +#define HWCAP2_ECV (1 << 19) +#endif + +#ifndef HWCAP2_WFXT +#define HWCAP2_WFXT (1u << 31) +#endif #ifndef PR_SVE_GET_VL // For old toolchains which do not have SVE related macros defined. #define PR_SVE_SET_VL 50 @@ -158,6 +165,12 @@ void VM_Version::get_os_cpu_info() { if (auxv2 & HWCAP2_SVEBITPERM) { set_feature(CPU_SVEBITPERM); } + if (auxv2 & HWCAP2_ECV) { + set_feature(CPU_ECV); + } + if (auxv2 & HWCAP2_WFXT) { + set_feature(CPU_WFXT); + } uint64_t ctr_el0; uint64_t dczid_el0; diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsRuntime.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsRuntime.cpp index 1e6efd893c8..1f16fada239 100644 --- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsRuntime.cpp +++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsRuntime.cpp @@ -153,6 +153,20 @@ JVMFlag::Error OnSpinWaitInstNameConstraintFunc(ccstr value, bool verbose) { return JVMFlag::VIOLATES_CONSTRAINT; } +#ifdef LINUX + if (strcmp(value, "wfet") == 0) { + if (UnlockExperimentalVMOptions) { + return JVMFlag::SUCCESS; + } else { + JVMFlag::printError(verbose, + "'wfet' value for OnSpinWaitInst is experimental and " + "must be enabled via -XX:+UnlockExperimentalVMOptions.\n" + "Error: The unlock option must precede 'OnSpinWaitInst'.\n"); + return JVMFlag::VIOLATES_CONSTRAINT; + } + } +#endif + if (strcmp(value, "nop") != 0 && strcmp(value, "isb") != 0 && strcmp(value, "yield") != 0 && @@ -160,7 +174,7 @@ JVMFlag::Error OnSpinWaitInstNameConstraintFunc(ccstr value, bool verbose) { strcmp(value, "none") != 0) { JVMFlag::printError(verbose, "Unrecognized value %s for OnSpinWaitInst. Must be one of the following: " - "nop, isb, yield, sb, none\n", + "nop, isb, yield, sb," LINUX_ONLY(" wfet,") " none\n", value); return JVMFlag::VIOLATES_CONSTRAINT; } diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/aarch64/AArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/aarch64/AArch64.java index 391ac224609..7790a9abd7c 100644 --- a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/aarch64/AArch64.java +++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/aarch64/AArch64.java @@ -184,6 +184,8 @@ public class AArch64 extends Architecture { SVEBITPERM, SVE2, A53MAC, + ECV, + WFXT, FPHP, ASIMDHP, } diff --git a/test/hotspot/gtest/aarch64/aarch64-asmtest.py b/test/hotspot/gtest/aarch64/aarch64-asmtest.py index 1ac2e1a89cd..bcf786d6f1f 100644 --- a/test/hotspot/gtest/aarch64/aarch64-asmtest.py +++ b/test/hotspot/gtest/aarch64/aarch64-asmtest.py @@ -391,6 +391,11 @@ class SystemRegOp(Instruction): self.CRn = 0b0100 self.CRm = 0b0010 self.op2 = 0b000 + elif self.system_reg == 'cntvctss_el0': + self.op1 = 0b011 + self.CRn = 0b1110 + self.CRm = 0b0000 + self.op2 = 0b110 def generate(self): self.reg = [GeneralRegister().generate()] @@ -1607,6 +1612,8 @@ generate (Op, ["nop", "yield", "wfe", "sev", "sevl", "pacia1716", "paciasp", "paciaz", "pacib1716", "pacibsp", "pacibz", "eret", "drps", "isb", "sb",]) +generate (OneRegOp, ["wfet"]) + # Ensure the "i" is not stripped off the end of the instruction generate (PostfixExceptionOp, ["wfi", "xpaclri"]) @@ -1623,7 +1630,7 @@ generate (OneRegOp, ["br", "blr", for system_reg in ["fpsr", "nzcv"]: generate (SystemOneRegOp, [ ["msr", system_reg] ]) -for system_reg in ["fpsr", "nzcv", "dczid_el0", "ctr_el0"]: +for system_reg in ["fpsr", "nzcv", "dczid_el0", "ctr_el0", "cntvctss_el0"]: generate (OneRegSystemOp, [ ["mrs", system_reg] ]) # Ensure the "i" is not stripped off the end of the instruction @@ -2275,9 +2282,9 @@ outfile.write("forth:\n") outfile.close() -# compile for sve with armv9-a+sha3+sve2-bitperm because of SHA3 crypto extension and SVE2 bitperm instructions. +# compile for sve with armv9.2-a+sha3+sve2-bitperm because of SHA3 crypto extension and SVE2 bitperm instructions. # armv9-a enables sve and sve2 by default. -subprocess.check_call([AARCH64_AS, "-march=armv9-a+sha3+sve2-bitperm", "aarch64ops.s", "-o", "aarch64ops.o"]) +subprocess.check_call([AARCH64_AS, "-march=armv9.2-a+sha3+sve2-bitperm", "aarch64ops.s", "-o", "aarch64ops.o"]) print print "/*" diff --git a/test/hotspot/gtest/aarch64/asmtest.out.h b/test/hotspot/gtest/aarch64/asmtest.out.h index 22bb6c57784..cd9fd4cfe9a 100644 --- a/test/hotspot/gtest/aarch64/asmtest.out.h +++ b/test/hotspot/gtest/aarch64/asmtest.out.h @@ -189,756 +189,762 @@ __ isb(); // isb __ sb(); // sb +// OneRegOp + __ wfet(r26); // wfet x26 + // PostfixExceptionOp __ wfi(); // wfi __ xpaclri(); // xpaclri // SystemOp - __ dsb(Assembler::ST); // dsb ST - __ dmb(Assembler::OSHST); // dmb OSHST + __ dsb(Assembler::OSHST); // dsb OSHST + __ dmb(Assembler::ISHLD); // dmb ISHLD // OneRegOp - __ br(r16); // br x16 - __ blr(r20); // blr x20 - __ paciza(r10); // paciza x10 - __ pacizb(r27); // pacizb x27 - __ pacdza(r8); // pacdza x8 - __ pacdzb(r0); // pacdzb x0 - __ autiza(r1); // autiza x1 - __ autizb(r21); // autizb x21 - __ autdza(r17); // autdza x17 + __ br(r20); // br x20 + __ blr(r10); // blr x10 + __ paciza(r27); // paciza x27 + __ pacizb(r8); // pacizb x8 + __ pacdza(r0); // pacdza x0 + __ pacdzb(r1); // pacdzb x1 + __ autiza(r21); // autiza x21 + __ autizb(r17); // autizb x17 + __ autdza(r29); // autdza x29 __ autdzb(r29); // autdzb x29 - __ xpacd(r29); // xpacd x29 - __ braaz(r28); // braaz x28 - __ brabz(r1); // brabz x1 - __ blraaz(r23); // blraaz x23 - __ blrabz(r21); // blrabz x21 + __ xpacd(r28); // xpacd x28 + __ braaz(r1); // braaz x1 + __ brabz(r23); // brabz x23 + __ blraaz(r21); // blraaz x21 + __ blrabz(r20); // blrabz x20 // SystemOneRegOp - __ msr(3, 4, 4, 1, r20); // msr fpsr, x20 + __ msr(3, 4, 4, 1, r22); // msr fpsr, x22 // SystemOneRegOp - __ msr(3, 4, 2, 0, r22); // msr nzcv, x22 + __ msr(3, 4, 2, 0, r27); // msr nzcv, x27 // OneRegSystemOp - __ mrs(3, 4, 4, 1, r27); // mrs x27, fpsr + __ mrs(3, 4, 4, 1, r19); // mrs x19, fpsr // OneRegSystemOp - __ mrs(3, 4, 2, 0, r19); // mrs x19, nzcv + __ mrs(3, 4, 2, 0, r11); // mrs x11, nzcv // OneRegSystemOp - __ mrs(3, 0, 0, 7, r11); // mrs x11, dczid_el0 + __ mrs(3, 0, 0, 7, r16); // mrs x16, dczid_el0 // OneRegSystemOp - __ mrs(3, 0, 0, 1, r16); // mrs x16, ctr_el0 + __ mrs(3, 0, 0, 1, r6); // mrs x6, ctr_el0 + +// OneRegSystemOp + __ mrs(3, 14, 0, 6, r17); // mrs x17, cntvctss_el0 // PostfixExceptionOneRegOp - __ xpaci(r6); // xpaci x6 + __ xpaci(r0); // xpaci x0 // LoadStoreExclusiveOp - __ stxr(r17, r0, r4); // stxr w17, x0, [x4] - __ stlxr(r10, r24, r22); // stlxr w10, x24, [x22] - __ ldxr(r10, r19); // ldxr x10, [x19] - __ ldaxr(r1, r5); // ldaxr x1, [x5] - __ stlr(r30, r8); // stlr x30, [x8] - __ ldar(r12, r17); // ldar x12, [x17] + __ stxr(r4, r10, r24); // stxr w4, x10, [x24] + __ stlxr(r22, r10, r19); // stlxr w22, x10, [x19] + __ ldxr(r1, r5); // ldxr x1, [x5] + __ ldaxr(r30, r8); // ldaxr x30, [x8] + __ stlr(r12, r17); // stlr x12, [x17] + __ ldar(r9, r14); // ldar x9, [x14] // LoadStoreExclusiveOp - __ stxrw(r9, r14, r7); // stxr w9, w14, [x7] - __ stlxrw(r1, r5, r16); // stlxr w1, w5, [x16] - __ ldxrw(r2, r12); // ldxr w2, [x12] - __ ldaxrw(r10, r12); // ldaxr w10, [x12] - __ stlrw(r3, r28); // stlr w3, [x28] - __ ldarw(r14, r26); // ldar w14, [x26] + __ stxrw(r7, r1, r5); // stxr w7, w1, [x5] + __ stlxrw(r16, r2, r12); // stlxr w16, w2, [x12] + __ ldxrw(r10, r12); // ldxr w10, [x12] + __ ldaxrw(r3, r28); // ldaxr w3, [x28] + __ stlrw(r14, r26); // stlr w14, [x26] + __ ldarw(r30, r10); // ldar w30, [x10] // LoadStoreExclusiveOp - __ stxrh(r30, r10, r14); // stxrh w30, w10, [x14] - __ stlxrh(r21, r13, r9); // stlxrh w21, w13, [x9] - __ ldxrh(r22, r27); // ldxrh w22, [x27] - __ ldaxrh(r28, r19); // ldaxrh w28, [x19] - __ stlrh(r11, r30); // stlrh w11, [x30] - __ ldarh(r19, r2); // ldarh w19, [x2] + __ stxrh(r14, r21, r13); // stxrh w14, w21, [x13] + __ stlxrh(r9, r22, r27); // stlxrh w9, w22, [x27] + __ ldxrh(r28, r19); // ldxrh w28, [x19] + __ ldaxrh(r11, r30); // ldaxrh w11, [x30] + __ stlrh(r19, r2); // stlrh w19, [x2] + __ ldarh(r2, r23); // ldarh w2, [x23] // LoadStoreExclusiveOp - __ stxrb(r2, r23, r1); // stxrb w2, w23, [x1] - __ stlxrb(r0, r12, r16); // stlxrb w0, w12, [x16] - __ ldxrb(r13, r15); // ldxrb w13, [x15] - __ ldaxrb(r17, r21); // ldaxrb w17, [x21] - __ stlrb(r13, r11); // stlrb w13, [x11] - __ ldarb(r30, r8); // ldarb w30, [x8] + __ stxrb(r1, r0, r12); // stxrb w1, w0, [x12] + __ stlxrb(r16, r13, r15); // stlxrb w16, w13, [x15] + __ ldxrb(r17, r21); // ldxrb w17, [x21] + __ ldaxrb(r13, r11); // ldaxrb w13, [x11] + __ stlrb(r30, r8); // stlrb w30, [x8] + __ ldarb(r24, r13); // ldarb w24, [x13] // LoadStoreExclusiveOp - __ ldxp(r24, r13, r11); // ldxp x24, x13, [x11] - __ ldaxp(r1, r26, r21); // ldaxp x1, x26, [x21] - __ stxp(r27, r13, r20, r3); // stxp w27, x13, x20, [x3] - __ stlxp(r12, r6, r1, r29); // stlxp w12, x6, x1, [x29] + __ ldxp(r11, r1, r26); // ldxp x11, x1, [x26] + __ ldaxp(r21, r27, r13); // ldaxp x21, x27, [x13] + __ stxp(r20, r3, r12, r6); // stxp w20, x3, x12, [x6] + __ stlxp(r1, r29, r6, r4); // stlxp w1, x29, x6, [x4] // LoadStoreExclusiveOp - __ ldxpw(r6, r4, r11); // ldxp w6, w4, [x11] - __ ldaxpw(r16, r4, r30); // ldaxp w16, w4, [x30] - __ stxpw(r30, r4, r12, r21); // stxp w30, w4, w12, [x21] - __ stlxpw(r27, r15, r28, r9); // stlxp w27, w15, w28, [x9] + __ ldxpw(r6, r11, r16); // ldxp w6, w11, [x16] + __ ldaxpw(r4, r30, r12); // ldaxp w4, w30, [x12] + __ stxpw(r21, r27, r15, r28); // stxp w21, w27, w15, [x28] + __ stlxpw(r9, r15, r20, r6); // stlxp w9, w15, w20, [x6] // base_plus_unscaled_offset // LoadStoreOp - __ str(r25, Address(r15, 1)); // str x25, [x15, 1] - __ strw(r2, Address(r1, -79)); // str w2, [x1, -79] - __ strb(r20, Address(r26, -22)); // strb w20, [x26, -22] - __ strh(r23, Address(r30, 22)); // strh w23, [x30, 22] - __ ldr(r26, Address(r28, -49)); // ldr x26, [x28, -49] - __ ldrw(r9, Address(r24, -128)); // ldr w9, [x24, -128] - __ ldrb(r12, Address(r12, -30)); // ldrb w12, [x12, -30] - __ ldrh(r1, Address(r15, 5)); // ldrh w1, [x15, 5] - __ ldrsb(r24, Address(r14, -31)); // ldrsb x24, [x14, -31] - __ ldrsh(r24, Address(r15, -6)); // ldrsh x24, [x15, -6] - __ ldrshw(r5, Address(r3, 12)); // ldrsh w5, [x3, 12] - __ ldrsw(r27, Address(r24, 17)); // ldrsw x27, [x24, 17] - __ ldrd(v13, Address(r29, -35)); // ldr d13, [x29, -35] - __ ldrs(v23, Address(r9, -47)); // ldr s23, [x9, -47] - __ strd(v11, Address(r0, 9)); // str d11, [x0, 9] - __ strs(v21, Address(r0, -127)); // str s21, [x0, -127] + __ str(r24, Address(r17, -125)); // str x24, [x17, -125] + __ strw(r1, Address(r26, 10)); // str w1, [x26, 10] + __ strb(r15, Address(r0, -17)); // strb w15, [x0, -17] + __ strh(r17, Address(r17, 13)); // strh w17, [x17, 13] + __ ldr(r5, Address(r25, 49)); // ldr x5, [x25, 49] + __ ldrw(r19, Address(r15, -116)); // ldr w19, [x15, -116] + __ ldrb(r20, Address(r3, 7)); // ldrb w20, [x3, 7] + __ ldrh(r19, Address(r4, -25)); // ldrh w19, [x4, -25] + __ ldrsb(r14, Address(r19, -6)); // ldrsb x14, [x19, -6] + __ ldrsh(r28, Address(r19, -53)); // ldrsh x28, [x19, -53] + __ ldrshw(r14, Address(r27, 13)); // ldrsh w14, [x27, 13] + __ ldrsw(r6, Address(r24, -120)); // ldrsw x6, [x24, -120] + __ ldrd(v18, Address(r2, 87)); // ldr d18, [x2, 87] + __ ldrs(v0, Address(r28, -48)); // ldr s0, [x28, -48] + __ strd(v31, Address(r28, 23)); // str d31, [x28, 23] + __ strs(v11, Address(r25, 43)); // str s11, [x25, 43] // pre // LoadStoreOp - __ str(r29, Address(__ pre(r3, -114))); // str x29, [x3, -114]! - __ strw(r17, Address(__ pre(r4, -72))); // str w17, [x4, -72]! - __ strb(r0, Address(__ pre(r2, -17))); // strb w0, [x2, -17]! - __ strh(r29, Address(__ pre(r1, 7))); // strh w29, [x1, 7]! - __ ldr(r16, Address(__ pre(r21, -133))); // ldr x16, [x21, -133]! - __ ldrw(r20, Address(__ pre(r14, 19))); // ldr w20, [x14, 19]! - __ ldrb(r22, Address(__ pre(r14, -3))); // ldrb w22, [x14, -3]! - __ ldrh(r15, Address(__ pre(r17, 9))); // ldrh w15, [x17, 9]! - __ ldrsb(r10, Address(__ pre(r15, -19))); // ldrsb x10, [x15, -19]! - __ ldrsh(r20, Address(__ pre(r12, -25))); // ldrsh x20, [x12, -25]! - __ ldrshw(r21, Address(__ pre(r10, -29))); // ldrsh w21, [x10, -29]! - __ ldrsw(r19, Address(__ pre(r0, 5))); // ldrsw x19, [x0, 5]! - __ ldrd(v0, Address(__ pre(r14, -54))); // ldr d0, [x14, -54]! - __ ldrs(v3, Address(__ pre(r1, 40))); // ldr s3, [x1, 40]! - __ strd(v4, Address(__ pre(r14, -94))); // str d4, [x14, -94]! - __ strs(v18, Address(__ pre(r28, -54))); // str s18, [x28, -54]! + __ str(r14, Address(__ pre(r8, -185))); // str x14, [x8, -185]! + __ strw(r25, Address(__ pre(r9, -93))); // str w25, [x9, -93]! + __ strb(r16, Address(__ pre(r23, -12))); // strb w16, [x23, -12]! + __ strh(r22, Address(__ pre(r9, -56))); // strh w22, [x9, -56]! + __ ldr(r11, Address(__ pre(r24, 109))); // ldr x11, [x24, 109]! + __ ldrw(r21, Address(__ pre(r19, 52))); // ldr w21, [x19, 52]! + __ ldrb(r6, Address(__ pre(r23, -2))); // ldrb w6, [x23, -2]! + __ ldrh(r7, Address(__ pre(r8, 21))); // ldrh w7, [x8, 21]! + __ ldrsb(r4, Address(__ pre(r12, -31))); // ldrsb x4, [x12, -31]! + __ ldrsh(r8, Address(__ pre(r11, -4))); // ldrsh x8, [x11, -4]! + __ ldrshw(r14, Address(__ pre(r21, -28))); // ldrsh w14, [x21, -28]! + __ ldrsw(r0, Address(__ pre(r15, -71))); // ldrsw x0, [x15, -71]! + __ ldrd(v27, Address(__ pre(r2, 29))); // ldr d27, [x2, 29]! + __ ldrs(v13, Address(__ pre(r21, 16))); // ldr s13, [x21, 16]! + __ strd(v12, Address(__ pre(r27, 16))); // str d12, [x27, 16]! + __ strs(v2, Address(__ pre(r25, -91))); // str s2, [x25, -91]! // post // LoadStoreOp - __ str(r22, Address(__ post(r15, -185))); // str x22, [x15], -185 - __ strw(r17, Address(__ post(r14, -7))); // str w17, [x14], -7 - __ strb(r30, Address(__ post(r11, -25))); // strb w30, [x11], -25 - __ strh(r1, Address(__ post(r11, 20))); // strh w1, [x11], 20 - __ ldr(r22, Address(__ post(r1, 2))); // ldr x22, [x1], 2 - __ ldrw(r2, Address(__ post(r23, -119))); // ldr w2, [x23], -119 - __ ldrb(r3, Address(__ post(r27, -12))); // ldrb w3, [x27], -12 - __ ldrh(r16, Address(__ post(r7, -37))); // ldrh w16, [x7], -37 - __ ldrsb(r15, Address(__ post(r26, 3))); // ldrsb x15, [x26], 3 - __ ldrsh(r7, Address(__ post(r15, -30))); // ldrsh x7, [x15], -30 - __ ldrshw(r3, Address(__ post(r11, -48))); // ldrsh w3, [x11], -48 - __ ldrsw(r25, Address(__ post(r23, 22))); // ldrsw x25, [x23], 22 - __ ldrd(v0, Address(__ post(r10, -215))); // ldr d0, [x10], -215 - __ ldrs(v19, Address(__ post(r6, 55))); // ldr s19, [x6], 55 - __ strd(v14, Address(__ post(r21, -234))); // str d14, [x21], -234 - __ strs(v0, Address(__ post(r22, -70))); // str s0, [x22], -70 + __ str(r10, Address(__ post(r13, -21))); // str x10, [x13], -21 + __ strw(r7, Address(__ post(r1, 50))); // str w7, [x1], 50 + __ strb(r10, Address(__ post(r3, 5))); // strb w10, [x3], 5 + __ strh(r5, Address(__ post(r6, -33))); // strh w5, [x6], -33 + __ ldr(r13, Address(__ post(r22, -158))); // ldr x13, [x22], -158 + __ ldrw(r13, Address(__ post(r2, -106))); // ldr w13, [x2], -106 + __ ldrb(r13, Address(__ post(r16, -24))); // ldrb w13, [x16], -24 + __ ldrh(r24, Address(__ post(r2, -22))); // ldrh w24, [x2], -22 + __ ldrsb(r3, Address(__ post(r23, -1))); // ldrsb x3, [x23], -1 + __ ldrsh(r19, Address(__ post(r8, -43))); // ldrsh x19, [x8], -43 + __ ldrshw(r2, Address(__ post(r23, -8))); // ldrsh w2, [x23], -8 + __ ldrsw(r19, Address(__ post(r28, -87))); // ldrsw x19, [x28], -87 + __ ldrd(v15, Address(__ post(r3, -81))); // ldr d15, [x3], -81 + __ ldrs(v12, Address(__ post(r29, -79))); // ldr s12, [x29], -79 + __ strd(v21, Address(__ post(r1, -124))); // str d21, [x1], -124 + __ strs(v7, Address(__ post(r9, -99))); // str s7, [x9], -99 // base_plus_reg // LoadStoreOp - __ str(r27, Address(r19, r0, Address::sxtx(0))); // str x27, [x19, x0, sxtx #0] - __ strw(r8, Address(r6, r13, Address::lsl(0))); // str w8, [x6, x13, lsl #0] - __ strb(r4, Address(r16, r22, Address::lsl(0))); // strb w4, [x16, x22, lsl #0] - __ strh(r25, Address(r26, r15, Address::uxtw(0))); // strh w25, [x26, w15, uxtw #0] - __ ldr(r4, Address(r5, r24, Address::sxtw(0))); // ldr x4, [x5, w24, sxtw #0] - __ ldrw(r4, Address(r17, r7, Address::uxtw(0))); // ldr w4, [x17, w7, uxtw #0] - __ ldrb(r17, Address(r7, r11, Address::lsl(0))); // ldrb w17, [x7, x11, lsl #0] - __ ldrh(r0, Address(r30, r23, Address::lsl(0))); // ldrh w0, [x30, x23, lsl #0] - __ ldrsb(r10, Address(r22, r1, Address::uxtw(0))); // ldrsb x10, [x22, w1, uxtw #0] - __ ldrsh(r21, Address(r30, r30, Address::sxtw(1))); // ldrsh x21, [x30, w30, sxtw #1] - __ ldrshw(r11, Address(r10, r28, Address::sxtw(1))); // ldrsh w11, [x10, w28, sxtw #1] - __ ldrsw(r28, Address(r19, r10, Address::uxtw(0))); // ldrsw x28, [x19, w10, uxtw #0] - __ ldrd(v30, Address(r29, r14, Address::sxtw(0))); // ldr d30, [x29, w14, sxtw #0] - __ ldrs(v8, Address(r5, r5, Address::sxtw(2))); // ldr s8, [x5, w5, sxtw #2] - __ strd(v25, Address(r8, r13, Address::sxtx(0))); // str d25, [x8, x13, sxtx #0] - __ strs(v17, Address(r24, r26, Address::lsl(2))); // str s17, [x24, x26, lsl #2] + __ str(r5, Address(r20, r2, Address::lsl(0))); // str x5, [x20, x2, lsl #0] + __ strw(r14, Address(r10, r30, Address::sxtw(2))); // str w14, [x10, w30, sxtw #2] + __ strb(r4, Address(r1, r28, Address::sxtw(0))); // strb w4, [x1, w28, sxtw #0] + __ strh(r5, Address(r10, r26, Address::sxtx(1))); // strh w5, [x10, x26, sxtx #1] + __ ldr(r22, Address(r2, r28, Address::lsl(3))); // ldr x22, [x2, x28, lsl #3] + __ ldrw(r12, Address(r0, r28, Address::lsl(2))); // ldr w12, [x0, x28, lsl #2] + __ ldrb(r8, Address(r8, r15, Address::sxtx(0))); // ldrb w8, [x8, x15, sxtx #0] + __ ldrh(r17, Address(r21, r14, Address::uxtw(1))); // ldrh w17, [x21, w14, uxtw #1] + __ ldrsb(r17, Address(r24, r8, Address::sxtx(0))); // ldrsb x17, [x24, x8, sxtx #0] + __ ldrsh(r10, Address(r10, r30, Address::sxtx(1))); // ldrsh x10, [x10, x30, sxtx #1] + __ ldrshw(r17, Address(r23, r0, Address::lsl(1))); // ldrsh w17, [x23, x0, lsl #1] + __ ldrsw(r25, Address(r30, r0, Address::lsl(2))); // ldrsw x25, [x30, x0, lsl #2] + __ ldrd(v5, Address(r24, r23, Address::uxtw(3))); // ldr d5, [x24, w23, uxtw #3] + __ ldrs(v22, Address(r3, r15, Address::lsl(2))); // ldr s22, [x3, x15, lsl #2] + __ strd(v4, Address(r29, r16, Address::sxtx(3))); // str d4, [x29, x16, sxtx #3] + __ strs(v7, Address(r20, r28, Address::sxtw(2))); // str s7, [x20, w28, sxtw #2] // base_plus_scaled_offset // LoadStoreOp - __ str(r19, Address(r12, 15904)); // str x19, [x12, 15904] - __ strw(r23, Address(r15, 7892)); // str w23, [x15, 7892] - __ strb(r29, Address(r13, 1970)); // strb w29, [x13, 1970] - __ strh(r11, Address(r7, 3094)); // strh w11, [x7, 3094] - __ ldr(r10, Address(r24, 14992)); // ldr x10, [x24, 14992] - __ ldrw(r16, Address(r0, 6160)); // ldr w16, [x0, 6160] - __ ldrb(r20, Address(r1, 2032)); // ldrb w20, [x1, 2032] - __ ldrh(r1, Address(r17, 4056)); // ldrh w1, [x17, 4056] - __ ldrsb(r17, Address(r25, 1889)); // ldrsb x17, [x25, 1889] - __ ldrsh(r27, Address(r25, 3964)); // ldrsh x27, [x25, 3964] - __ ldrshw(r14, Address(r17, 3724)); // ldrsh w14, [x17, 3724] - __ ldrsw(r10, Address(r7, 6372)); // ldrsw x10, [x7, 6372] - __ ldrd(v3, Address(r25, 12392)); // ldr d3, [x25, 12392] - __ ldrs(v12, Address(r9, 7840)); // ldr s12, [x9, 7840] - __ strd(v24, Address(r1, 12728)); // str d24, [x1, 12728] - __ strs(v3, Address(r20, 6924)); // str s3, [x20, 6924] + __ str(r27, Address(r0, 16360)); // str x27, [x0, 16360] + __ strw(r21, Address(r8, 7984)); // str w21, [x8, 7984] + __ strb(r17, Address(r29, 1795)); // strb w17, [x29, 1795] + __ strh(r27, Address(r2, 3636)); // strh w27, [x2, 3636] + __ ldr(r1, Address(r1, 12480)); // ldr x1, [x1, 12480] + __ ldrw(r29, Address(r17, 6320)); // ldr w29, [x17, 6320] + __ ldrb(r1, Address(r9, 1634)); // ldrb w1, [x9, 1634] + __ ldrh(r23, Address(r3, 3736)); // ldrh w23, [x3, 3736] + __ ldrsb(r21, Address(r19, 1857)); // ldrsb x21, [x19, 1857] + __ ldrsh(r19, Address(r7, 3180)); // ldrsh x19, [x7, 3180] + __ ldrshw(r10, Address(r1, 3560)); // ldrsh w10, [x1, 3560] + __ ldrsw(r10, Address(r20, 7428)); // ldrsw x10, [x20, 7428] + __ ldrd(v0, Address(r13, 14880)); // ldr d0, [x13, 14880] + __ ldrs(v11, Address(r27, 6356)); // ldr s11, [x27, 6356] + __ strd(v13, Address(r25, 13704)); // str d13, [x25, 13704] + __ strs(v20, Address(r0, 7580)); // str s20, [x0, 7580] // pcrel // LoadStoreOp - __ ldr(r2, back); // ldr x2, back - __ ldrw(r29, __ pc()); // ldr w29, . + __ ldr(r26, __ pc()); // ldr x26, . + __ ldrw(r19, back); // ldr w19, back // LoadStoreOp - __ prfm(Address(r14, 93)); // prfm PLDL1KEEP, [x14, 93] + __ prfm(Address(r8, -111)); // prfm PLDL1KEEP, [x8, -111] // LoadStoreOp - __ prfm(back); // prfm PLDL1KEEP, back + __ prfm(forth); // prfm PLDL1KEEP, forth // LoadStoreOp - __ prfm(Address(r1, r7, Address::lsl(3))); // prfm PLDL1KEEP, [x1, x7, lsl #3] + __ prfm(Address(r7, r7, Address::uxtw(3))); // prfm PLDL1KEEP, [x7, w7, uxtw #3] // LoadStoreOp - __ prfm(Address(r17, 12288)); // prfm PLDL1KEEP, [x17, 12288] + __ prfm(Address(r12, 12600)); // prfm PLDL1KEEP, [x12, 12600] // AddSubCarryOp - __ adcw(r1, r24, r3); // adc w1, w24, w3 - __ adcsw(r17, r24, r20); // adcs w17, w24, w20 - __ sbcw(r11, r0, r13); // sbc w11, w0, w13 - __ sbcsw(r28, r10, r7); // sbcs w28, w10, w7 - __ adc(r4, r15, r16); // adc x4, x15, x16 - __ adcs(r2, r12, r20); // adcs x2, x12, x20 - __ sbc(r29, r13, r13); // sbc x29, x13, x13 - __ sbcs(r14, r6, r12); // sbcs x14, x6, x12 + __ adcw(r24, r20, r11); // adc w24, w20, w11 + __ adcsw(r0, r13, r28); // adcs w0, w13, w28 + __ sbcw(r10, r7, r4); // sbc w10, w7, w4 + __ sbcsw(r15, r16, r2); // sbcs w15, w16, w2 + __ adc(r12, r20, r29); // adc x12, x20, x29 + __ adcs(r13, r13, r14); // adcs x13, x13, x14 + __ sbc(r6, r12, r20); // sbc x6, x12, x20 + __ sbcs(r12, r17, r25); // sbcs x12, x17, x25 // AddSubExtendedOp - __ addw(r20, r12, r17, ext::sxtx, 4); // add w20, w12, w17, sxtx #4 - __ addsw(r27, r11, r0, ext::uxtx, 3); // adds w27, w11, w0, uxtx #3 - __ sub(r7, r1, r9, ext::sxtx, 4); // sub x7, x1, x9, sxtx #4 - __ subsw(r3, r27, r1, ext::uxtb, 3); // subs w3, w27, w1, uxtb #3 - __ add(r13, r26, r12, ext::sxth, 4); // add x13, x26, x12, sxth #4 - __ adds(r17, r5, r10, ext::sxtb, 2); // adds x17, x5, x10, sxtb #2 - __ sub(r30, r8, r15, ext::uxtw, 4); // sub x30, x8, x15, uxtw #4 - __ subs(r19, r23, r19, ext::uxth, 4); // subs x19, x23, x19, uxth #4 + __ addw(r30, r27, r11, ext::sxtb, 1); // add w30, w27, w11, sxtb #1 + __ addsw(r14, r7, r1, ext::sxtw, 2); // adds w14, w7, w1, sxtw #2 + __ sub(r29, r3, r27, ext::sxth, 1); // sub x29, x3, x27, sxth #1 + __ subsw(r0, r13, r26, ext::sxtx, 2); // subs w0, w13, w26, sxtx #2 + __ add(r22, r17, r5, ext::uxtx, 2); // add x22, x17, x5, uxtx #2 + __ adds(r17, r30, r8, ext::sxtx, 3); // adds x17, x30, x8, sxtx #3 + __ sub(r10, r19, r23, ext::sxtw, 3); // sub x10, x19, x23, sxtw #3 + __ subs(r6, r29, r5, ext::uxtw, 3); // subs x6, x29, x5, uxtw #3 // ConditionalCompareOp - __ ccmnw(r29, r5, 10u, Assembler::LO); // ccmn w29, w5, #10, LO - __ ccmpw(r9, r13, 11u, Assembler::LO); // ccmp w9, w13, #11, LO - __ ccmn(r10, r4, 6u, Assembler::HS); // ccmn x10, x4, #6, HS - __ ccmp(r12, r2, 12u, Assembler::HI); // ccmp x12, x2, #12, HI + __ ccmnw(r19, r9, 4u, Assembler::HI); // ccmn w19, w9, #4, HI + __ ccmpw(r22, r10, 2u, Assembler::VC); // ccmp w22, w10, #2, VC + __ ccmn(r13, r12, 10u, Assembler::LS); // ccmn x13, x12, #10, LS + __ ccmp(r24, r16, 3u, Assembler::HS); // ccmp x24, x16, #3, HS // ConditionalCompareImmedOp - __ ccmnw(r16, 6, 2, Assembler::VS); // ccmn w16, #6, #2, VS - __ ccmpw(r7, 11, 13, Assembler::VS); // ccmp w7, #11, #13, VS - __ ccmn(r27, 10, 11, Assembler::LS); // ccmn x27, #10, #11, LS - __ ccmp(r3, 13, 13, Assembler::LE); // ccmp x3, #13, #13, LE + __ ccmnw(r7, 11, 13, Assembler::VS); // ccmn w7, #11, #13, VS + __ ccmpw(r27, 10, 11, Assembler::LS); // ccmp w27, #10, #11, LS + __ ccmn(r3, 13, 13, Assembler::LE); // ccmn x3, #13, #13, LE + __ ccmp(r26, 16, 5, Assembler::GT); // ccmp x26, #16, #5, GT // ConditionalSelectOp - __ cselw(r26, r27, r10, Assembler::VS); // csel w26, w27, w10, VS - __ csincw(r10, r21, r28, Assembler::LE); // csinc w10, w21, w28, LE - __ csinvw(r23, r9, r27, Assembler::LE); // csinv w23, w9, w27, LE - __ csnegw(r10, r29, r15, Assembler::LE); // csneg w10, w29, w15, LE - __ csel(r30, r25, r21, Assembler::HS); // csel x30, x25, x21, HS - __ csinc(r0, r17, r21, Assembler::GT); // csinc x0, x17, x21, GT - __ csinv(r16, r21, r20, Assembler::CS); // csinv x16, x21, x20, CS - __ csneg(r19, r30, r3, Assembler::LS); // csneg x19, x30, x3, LS + __ cselw(r10, r21, r28, Assembler::LE); // csel w10, w21, w28, LE + __ csincw(r23, r9, r27, Assembler::LE); // csinc w23, w9, w27, LE + __ csinvw(r10, r29, r15, Assembler::LE); // csinv w10, w29, w15, LE + __ csnegw(r30, r25, r21, Assembler::HS); // csneg w30, w25, w21, HS + __ csel(r0, r17, r21, Assembler::GT); // csel x0, x17, x21, GT + __ csinc(r16, r21, r20, Assembler::CS); // csinc x16, x21, x20, CS + __ csinv(r19, r30, r3, Assembler::LS); // csinv x19, x30, x3, LS + __ csneg(r19, r11, r24, Assembler::EQ); // csneg x19, x11, x24, EQ // TwoRegOp - __ rbitw(r19, r11); // rbit w19, w11 - __ rev16w(r24, r0); // rev16 w24, w0 - __ revw(r27, r25); // rev w27, w25 - __ clzw(r14, r3); // clz w14, w3 - __ clsw(r14, r17); // cls w14, w17 - __ rbit(r7, r15); // rbit x7, x15 - __ rev16(r24, r28); // rev16 x24, x28 - __ rev32(r17, r25); // rev32 x17, x25 - __ rev(r2, r26); // rev x2, x26 - __ clz(r28, r5); // clz x28, x5 - __ cls(r25, r26); // cls x25, x26 - __ pacia(r27, r16); // pacia x27, x16 - __ pacib(r17, r6); // pacib x17, x6 - __ pacda(r21, r12); // pacda x21, x12 - __ pacdb(r0, r4); // pacdb x0, x4 - __ autia(r12, r27); // autia x12, x27 - __ autib(r17, r28); // autib x17, x28 - __ autda(r28, r2); // autda x28, x2 - __ autdb(r17, r10); // autdb x17, x10 - __ braa(r15, r14); // braa x15, x14 - __ brab(r14, r3); // brab x14, x3 - __ blraa(r25, r15); // blraa x25, x15 - __ blrab(r19, r14); // blrab x19, x14 + __ rbitw(r27, r25); // rbit w27, w25 + __ rev16w(r14, r3); // rev16 w14, w3 + __ revw(r14, r17); // rev w14, w17 + __ clzw(r7, r15); // clz w7, w15 + __ clsw(r24, r28); // cls w24, w28 + __ rbit(r17, r25); // rbit x17, x25 + __ rev16(r2, r26); // rev16 x2, x26 + __ rev32(r28, r5); // rev32 x28, x5 + __ rev(r25, r26); // rev x25, x26 + __ clz(r27, r16); // clz x27, x16 + __ cls(r17, r6); // cls x17, x6 + __ pacia(r21, r12); // pacia x21, x12 + __ pacib(r0, r4); // pacib x0, x4 + __ pacda(r12, r27); // pacda x12, x27 + __ pacdb(r17, r28); // pacdb x17, x28 + __ autia(r28, r2); // autia x28, x2 + __ autib(r17, r10); // autib x17, x10 + __ autda(r15, r14); // autda x15, x14 + __ autdb(r14, r3); // autdb x14, x3 + __ braa(r25, r15); // braa x25, x15 + __ brab(r19, r14); // brab x19, x14 + __ blraa(r5, r16); // blraa x5, x16 + __ blrab(r4, r26); // blrab x4, x26 // ThreeRegOp - __ udivw(r5, r16, r4); // udiv w5, w16, w4 - __ sdivw(r26, r25, r4); // sdiv w26, w25, w4 - __ lslvw(r2, r2, r12); // lslv w2, w2, w12 - __ lsrvw(r29, r17, r8); // lsrv w29, w17, w8 - __ asrvw(r7, r3, r4); // asrv w7, w3, w4 - __ rorvw(r25, r4, r26); // rorv w25, w4, w26 - __ udiv(r25, r4, r17); // udiv x25, x4, x17 - __ sdiv(r0, r26, r17); // sdiv x0, x26, x17 - __ lslv(r23, r15, r21); // lslv x23, x15, x21 - __ lsrv(r28, r17, r27); // lsrv x28, x17, x27 - __ asrv(r10, r3, r0); // asrv x10, x3, x0 - __ rorv(r7, r25, r9); // rorv x7, x25, x9 - __ umulh(r6, r15, r29); // umulh x6, x15, x29 - __ smulh(r15, r10, r2); // smulh x15, x10, x2 + __ udivw(r25, r4, r2); // udiv w25, w4, w2 + __ sdivw(r2, r12, r29); // sdiv w2, w12, w29 + __ lslvw(r17, r8, r7); // lslv w17, w8, w7 + __ lsrvw(r3, r4, r25); // lsrv w3, w4, w25 + __ asrvw(r4, r26, r25); // asrv w4, w26, w25 + __ rorvw(r4, r17, r0); // rorv w4, w17, w0 + __ udiv(r26, r17, r23); // udiv x26, x17, x23 + __ sdiv(r15, r21, r28); // sdiv x15, x21, x28 + __ lslv(r17, r27, r10); // lslv x17, x27, x10 + __ lsrv(r3, r0, r7); // lsrv x3, x0, x7 + __ asrv(r25, r9, r6); // asrv x25, x9, x6 + __ rorv(r15, r29, r15); // rorv x15, x29, x15 + __ umulh(r10, r2, r17); // umulh x10, x2, x17 + __ smulh(r7, r11, r11); // smulh x7, x11, x11 // FourRegMulOp - __ maddw(r17, r7, r11, r11); // madd w17, w7, w11, w11 - __ msubw(r23, r7, r29, r23); // msub w23, w7, w29, w23 - __ madd(r14, r27, r11, r11); // madd x14, x27, x11, x11 - __ msub(r4, r24, r12, r15); // msub x4, x24, x12, x15 - __ smaddl(r14, r20, r11, r28); // smaddl x14, w20, w11, x28 - __ smsubl(r13, r11, r12, r23); // smsubl x13, w11, w12, x23 - __ umaddl(r30, r26, r14, r9); // umaddl x30, w26, w14, x9 - __ umsubl(r13, r10, r7, r5); // umsubl x13, w10, w7, x5 + __ maddw(r23, r7, r29, r23); // madd w23, w7, w29, w23 + __ msubw(r14, r27, r11, r11); // msub w14, w27, w11, w11 + __ madd(r4, r24, r12, r15); // madd x4, x24, x12, x15 + __ msub(r14, r20, r11, r28); // msub x14, x20, x11, x28 + __ smaddl(r13, r11, r12, r23); // smaddl x13, w11, w12, x23 + __ smsubl(r30, r26, r14, r9); // smsubl x30, w26, w14, x9 + __ umaddl(r13, r10, r7, r5); // umaddl x13, w10, w7, x5 + __ umsubl(r29, r15, r3, r11); // umsubl x29, w15, w3, x11 // ThreeRegFloatOp - __ fabdh(v30, v15, v3); // fabd h30, h15, h3 - __ fmulh(v12, v12, v16); // fmul h12, h12, h16 - __ fdivh(v31, v31, v18); // fdiv h31, h31, h18 - __ faddh(v19, v21, v16); // fadd h19, h21, h16 - __ fsubh(v15, v10, v21); // fsub h15, h10, h21 - __ fmaxh(v2, v10, v28); // fmax h2, h10, h28 - __ fminh(v7, v30, v31); // fmin h7, h30, h31 - __ fnmulh(v18, v1, v2); // fnmul h18, h1, h2 - __ fabds(v6, v10, v3); // fabd s6, s10, s3 - __ fmuls(v25, v11, v7); // fmul s25, s11, s7 - __ fdivs(v1, v12, v0); // fdiv s1, s12, s0 - __ fadds(v3, v19, v29); // fadd s3, s19, s29 - __ fsubs(v6, v23, v6); // fsub s6, s23, s6 - __ fmaxs(v0, v28, v27); // fmax s0, s28, s27 - __ fmins(v2, v5, v7); // fmin s2, s5, s7 - __ fnmuls(v29, v12, v25); // fnmul s29, s12, s25 - __ fabdd(v13, v12, v24); // fabd d13, d12, d24 - __ fmuld(v19, v8, v18); // fmul d19, d8, d18 - __ fdivd(v22, v26, v21); // fdiv d22, d26, d21 - __ faddd(v20, v19, v2); // fadd d20, d19, d2 - __ fsubd(v30, v22, v8); // fsub d30, d22, d8 - __ fmaxd(v22, v19, v21); // fmax d22, d19, d21 - __ fmind(v12, v18, v21); // fmin d12, d18, d21 - __ fnmuld(v6, v16, v3); // fnmul d6, d16, d3 + __ fabdh(v12, v16, v31); // fabd h12, h16, h31 + __ fmulh(v31, v18, v19); // fmul h31, h18, h19 + __ fdivh(v21, v16, v15); // fdiv h21, h16, h15 + __ faddh(v10, v21, v2); // fadd h10, h21, h2 + __ fsubh(v10, v28, v7); // fsub h10, h28, h7 + __ fmaxh(v30, v31, v18); // fmax h30, h31, h18 + __ fminh(v1, v2, v6); // fmin h1, h2, h6 + __ fnmulh(v10, v3, v25); // fnmul h10, h3, h25 + __ fabds(v11, v7, v1); // fabd s11, s7, s1 + __ fmuls(v12, v0, v3); // fmul s12, s0, s3 + __ fdivs(v19, v29, v6); // fdiv s19, s29, s6 + __ fadds(v23, v6, v0); // fadd s23, s6, s0 + __ fsubs(v28, v27, v2); // fsub s28, s27, s2 + __ fmaxs(v5, v7, v29); // fmax s5, s7, s29 + __ fmins(v12, v25, v13); // fmin s12, s25, s13 + __ fnmuls(v12, v24, v19); // fnmul s12, s24, s19 + __ fabdd(v8, v18, v22); // fabd d8, d18, d22 + __ fmuld(v26, v21, v20); // fmul d26, d21, d20 + __ fdivd(v19, v2, v30); // fdiv d19, d2, d30 + __ faddd(v22, v8, v22); // fadd d22, d8, d22 + __ fsubd(v19, v21, v12); // fsub d19, d21, d12 + __ fmaxd(v18, v21, v6); // fmax d18, d21, d6 + __ fmind(v16, v3, v3); // fmin d16, d3, d3 + __ fnmuld(v29, v3, v28); // fnmul d29, d3, d28 // FourRegFloatOp - __ fmaddh(v3, v29, v3, v28); // fmadd h3, h29, h3, h28 - __ fmadds(v15, v14, v10, v13); // fmadd s15, s14, s10, s13 - __ fmsubs(v12, v18, v10, v26); // fmsub s12, s18, s10, s26 - __ fnmadds(v7, v7, v15, v29); // fnmadd s7, s7, s15, s29 + __ fmaddh(v15, v14, v10, v13); // fmadd h15, h14, h10, h13 + __ fmadds(v12, v18, v10, v26); // fmadd s12, s18, s10, s26 + __ fmsubs(v7, v7, v15, v29); // fmsub s7, s7, s15, s29 __ fnmadds(v0, v23, v0, v12); // fnmadd s0, s23, s0, s12 - __ fmaddd(v24, v14, v13, v8); // fmadd d24, d14, d13, d8 - __ fmsubd(v15, v7, v9, v20); // fmsub d15, d7, d9, d20 - __ fnmaddd(v19, v29, v31, v16); // fnmadd d19, d29, d31, d16 + __ fnmadds(v24, v14, v13, v8); // fnmadd s24, s14, s13, s8 + __ fmaddd(v15, v7, v9, v20); // fmadd d15, d7, d9, d20 + __ fmsubd(v19, v29, v31, v16); // fmsub d19, d29, d31, d16 __ fnmaddd(v2, v9, v16, v21); // fnmadd d2, d9, d16, d21 + __ fnmaddd(v30, v4, v1, v27); // fnmadd d30, d4, d1, d27 // TwoRegFloatOp - __ fmovs(v30, v4); // fmov s30, s4 - __ fabss(v1, v27); // fabs s1, s27 - __ fnegs(v25, v24); // fneg s25, s24 - __ fsqrts(v14, v21); // fsqrt s14, s21 - __ fcvts(v13, v6); // fcvt d13, s6 - __ fcvtsh(v12, v25); // fcvt h12, s25 - __ fcvths(v25, v30); // fcvt s25, h30 - __ fmovd(v28, v21); // fmov d28, d21 - __ fabsd(v16, v23); // fabs d16, d23 - __ fnegd(v5, v29); // fneg d5, d29 - __ fsqrtd(v22, v19); // fsqrt d22, d19 - __ fcvtd(v13, v20); // fcvt s13, d20 - __ fsqrth(v19, v28); // fsqrt h19, h28 + __ fmovs(v25, v24); // fmov s25, s24 + __ fabss(v14, v21); // fabs s14, s21 + __ fnegs(v13, v6); // fneg s13, s6 + __ fsqrts(v12, v25); // fsqrt s12, s25 + __ fcvts(v25, v30); // fcvt d25, s30 + __ fcvtsh(v28, v21); // fcvt h28, s21 + __ fcvths(v16, v23); // fcvt s16, h23 + __ fmovd(v5, v29); // fmov d5, d29 + __ fabsd(v22, v19); // fabs d22, d19 + __ fnegd(v13, v20); // fneg d13, d20 + __ fsqrtd(v19, v28); // fsqrt d19, d28 + __ fcvtd(v18, v6); // fcvt s18, d6 + __ fsqrth(v14, v7); // fsqrt h14, h7 // FloatConvertOp - __ fcvtzsw(r17, v6); // fcvtzs w17, s6 - __ fcvtzs(r13, v7); // fcvtzs x13, s7 - __ fcvtzdw(r28, v26); // fcvtzs w28, d26 - __ fcvtzd(r17, v6); // fcvtzs x17, d6 - __ scvtfws(v1, r4); // scvtf s1, w4 - __ scvtfs(v14, r20); // scvtf s14, x20 - __ scvtfwd(v7, r21); // scvtf d7, w21 - __ scvtfd(v27, r23); // scvtf d27, x23 - __ fcvtassw(r13, v20); // fcvtas w13, s20 - __ fcvtasd(r30, v28); // fcvtas x30, d28 - __ fcvtmssw(r10, v21); // fcvtms w10, s21 - __ fcvtmsd(r5, v17); // fcvtms x5, d17 - __ fmovs(r11, v14); // fmov w11, s14 - __ fmovd(r13, v21); // fmov x13, d21 - __ fmovs(v27, r14); // fmov s27, w14 - __ fmovd(v4, r23); // fmov d4, x23 + __ fcvtzsw(r28, v26); // fcvtzs w28, s26 + __ fcvtzs(r17, v6); // fcvtzs x17, s6 + __ fcvtzdw(r1, v4); // fcvtzs w1, d4 + __ fcvtzd(r13, v21); // fcvtzs x13, d21 + __ scvtfws(v7, r21); // scvtf s7, w21 + __ scvtfs(v27, r23); // scvtf s27, x23 + __ scvtfwd(v13, r20); // scvtf d13, w20 + __ scvtfd(v31, r27); // scvtf d31, x27 + __ fcvtassw(r10, v21); // fcvtas w10, s21 + __ fcvtasd(r5, v17); // fcvtas x5, d17 + __ fcvtmssw(r11, v14); // fcvtms w11, s14 + __ fcvtmsd(r13, v21); // fcvtms x13, d21 + __ fmovs(r26, v14); // fmov w26, s14 + __ fmovd(r4, v24); // fmov x4, d24 + __ fmovs(v24, r29); // fmov s24, w29 + __ fmovd(v12, r14); // fmov d12, x14 // TwoRegFloatOp - __ fcmps(v24, v30); // fcmp s24, s30 - __ fcmpd(v12, v14); // fcmp d12, d14 - __ fcmps(v17, 0.0); // fcmp s17, #0.0 - __ fcmpd(v28, 0.0); // fcmp d28, #0.0 + __ fcmps(v17, v28); // fcmp s17, s28 + __ fcmpd(v22, v0); // fcmp d22, d0 + __ fcmps(v6, 0.0); // fcmp s6, #0.0 + __ fcmpd(v27, 0.0); // fcmp d27, #0.0 // LoadStorePairOp - __ stpw(r0, r6, Address(r26, 16)); // stp w0, w6, [x26, #16] - __ ldpw(r0, r30, Address(r6, -32)); // ldp w0, w30, [x6, #-32] - __ ldpsw(r16, r2, Address(r11, -208)); // ldpsw x16, x2, [x11, #-208] - __ stp(r15, r0, Address(r12, 128)); // stp x15, x0, [x12, #128] - __ ldp(r7, r30, Address(r23, 32)); // ldp x7, x30, [x23, #32] + __ stpw(r27, r12, Address(r6, -32)); // stp w27, w12, [x6, #-32] + __ ldpw(r14, r11, Address(r19, -256)); // ldp w14, w11, [x19, #-256] + __ ldpsw(r0, r12, Address(r15, -48)); // ldpsw x0, x12, [x15, #-48] + __ stp(r9, r23, Address(r23, 32)); // stp x9, x23, [x23, #32] + __ ldp(r15, r4, Address(r26, -176)); // ldp x15, x4, [x26, #-176] // LoadStorePairOp - __ stpw(r26, r15, Address(__ pre(r7, -256))); // stp w26, w15, [x7, #-256]! - __ ldpw(r11, r15, Address(__ pre(r10, -32))); // ldp w11, w15, [x10, #-32]! - __ ldpsw(r19, r16, Address(__ pre(r1, 64))); // ldpsw x19, x16, [x1, #64]! - __ stp(r14, r9, Address(__ pre(r0, 128))); // stp x14, x9, [x0, #128]! - __ ldp(r27, r3, Address(__ pre(r12, -96))); // ldp x27, x3, [x12, #-96]! + __ stpw(r17, r8, Address(__ pre(r6, -160))); // stp w17, w8, [x6, #-160]! + __ ldpw(r7, r2, Address(__ pre(r4, -112))); // ldp w7, w2, [x4, #-112]! + __ ldpsw(r14, r9, Address(__ pre(r22, -16))); // ldpsw x14, x9, [x22, #-16]! + __ stp(r13, r20, Address(__ pre(r24, -256))); // stp x13, x20, [x24, #-256]! + __ ldp(r8, r11, Address(__ pre(r10, 96))); // ldp x8, x11, [x10, #96]! // LoadStorePairOp - __ stpw(r8, r11, Address(__ post(r12, -256))); // stp w8, w11, [x12], #-256 - __ ldpw(r10, r16, Address(__ post(r4, 64))); // ldp w10, w16, [x4], #64 - __ ldpsw(r10, r30, Address(__ post(r19, -64))); // ldpsw x10, x30, [x19], #-64 - __ stp(r24, r2, Address(__ post(r15, -96))); // stp x24, x2, [x15], #-96 - __ ldp(r24, r10, Address(__ post(r16, 80))); // ldp x24, x10, [x16], #80 + __ stpw(r24, r5, Address(__ post(r16, -112))); // stp w24, w5, [x16], #-112 + __ ldpw(r0, r26, Address(__ post(r9, -128))); // ldp w0, w26, [x9], #-128 + __ ldpsw(r24, r2, Address(__ post(r17, -128))); // ldpsw x24, x2, [x17], #-128 + __ stp(r26, r17, Address(__ post(r14, -48))); // stp x26, x17, [x14], #-48 + __ ldp(r30, r21, Address(__ post(r29, 48))); // ldp x30, x21, [x29], #48 // LoadStorePairOp - __ stnpw(r30, r21, Address(r29, 16)); // stnp w30, w21, [x29, #16] - __ ldnpw(r8, r30, Address(r10, -112)); // ldnp w8, w30, [x10, #-112] - __ stnp(r30, r26, Address(r6, -128)); // stnp x30, x26, [x6, #-128] - __ ldnp(r24, r2, Address(r20, 64)); // ldnp x24, x2, [x20, #64] + __ stnpw(r17, r23, Address(r10, -112)); // stnp w17, w23, [x10, #-112] + __ ldnpw(r26, r6, Address(r30, -160)); // ldnp w26, w6, [x30, #-160] + __ stnp(r30, r8, Address(r20, 64)); // stnp x30, x8, [x20, #64] + __ ldnp(r22, r29, Address(r9, 48)); // ldnp x22, x29, [x9, #48] // LdStNEONOp - __ ld1(v31, __ T8B, Address(r25)); // ld1 {v31.8B}, [x25] - __ ld1(v5, v6, __ T16B, Address(__ post(r15, 32))); // ld1 {v5.16B, v6.16B}, [x15], 32 - __ ld1(v10, v11, v12, __ T1D, Address(__ post(r7, r13))); // ld1 {v10.1D, v11.1D, v12.1D}, [x7], x13 - __ ld1(v13, v14, v15, v16, __ T8H, Address(__ post(r16, 64))); // ld1 {v13.8H, v14.8H, v15.8H, v16.8H}, [x16], 64 - __ ld1r(v7, __ T8B, Address(r17)); // ld1r {v7.8B}, [x17] - __ ld1r(v16, __ T4S, Address(__ post(r25, 4))); // ld1r {v16.4S}, [x25], 4 - __ ld1r(v11, __ T1D, Address(__ post(r3, r7))); // ld1r {v11.1D}, [x3], x7 - __ ld2(v13, v14, __ T2D, Address(r7)); // ld2 {v13.2D, v14.2D}, [x7] - __ ld2(v9, v10, __ T4H, Address(__ post(r27, 16))); // ld2 {v9.4H, v10.4H}, [x27], 16 - __ ld2r(v6, v7, __ T16B, Address(r26)); // ld2r {v6.16B, v7.16B}, [x26] - __ ld2r(v23, v24, __ T2S, Address(__ post(r16, 8))); // ld2r {v23.2S, v24.2S}, [x16], 8 - __ ld2r(v6, v7, __ T2D, Address(__ post(r13, r8))); // ld2r {v6.2D, v7.2D}, [x13], x8 - __ ld3(v20, v21, v22, __ T4S, Address(__ post(r1, r26))); // ld3 {v20.4S, v21.4S, v22.4S}, [x1], x26 - __ ld3(v15, v16, v17, __ T2S, Address(r15)); // ld3 {v15.2S, v16.2S, v17.2S}, [x15] - __ ld3r(v29, v30, v31, __ T8H, Address(r22)); // ld3r {v29.8H, v30.8H, v31.8H}, [x22] - __ ld3r(v6, v7, v8, __ T4S, Address(__ post(r10, 12))); // ld3r {v6.4S, v7.4S, v8.4S}, [x10], 12 - __ ld3r(v15, v16, v17, __ T1D, Address(__ post(r6, r15))); // ld3r {v15.1D, v16.1D, v17.1D}, [x6], x15 - __ ld4(v6, v7, v8, v9, __ T8H, Address(__ post(r10, 64))); // ld4 {v6.8H, v7.8H, v8.8H, v9.8H}, [x10], 64 - __ ld4(v11, v12, v13, v14, __ T8B, Address(__ post(r3, r7))); // ld4 {v11.8B, v12.8B, v13.8B, v14.8B}, [x3], x7 - __ ld4r(v12, v13, v14, v15, __ T8B, Address(r25)); // ld4r {v12.8B, v13.8B, v14.8B, v15.8B}, [x25] - __ ld4r(v11, v12, v13, v14, __ T4H, Address(__ post(r15, 8))); // ld4r {v11.4H, v12.4H, v13.4H, v14.4H}, [x15], 8 - __ ld4r(v30, v31, v0, v1, __ T2S, Address(__ post(r6, r28))); // ld4r {v30.2S, v31.2S, v0.2S, v1.2S}, [x6], x28 + __ ld1(v13, __ T8B, Address(r1)); // ld1 {v13.8B}, [x1] + __ ld1(v1, v2, __ T16B, Address(__ post(r16, 32))); // ld1 {v1.16B, v2.16B}, [x16], 32 + __ ld1(v22, v23, v24, __ T1D, Address(__ post(r25, r19))); // ld1 {v22.1D, v23.1D, v24.1D}, [x25], x19 + __ ld1(v25, v26, v27, v28, __ T8H, Address(__ post(r26, 64))); // ld1 {v25.8H, v26.8H, v27.8H, v28.8H}, [x26], 64 + __ ld1r(v9, __ T8B, Address(r15)); // ld1r {v9.8B}, [x15] + __ ld1r(v12, __ T4S, Address(__ post(r25, 4))); // ld1r {v12.4S}, [x25], 4 + __ ld1r(v4, __ T1D, Address(__ post(r14, r29))); // ld1r {v4.1D}, [x14], x29 + __ ld2(v17, v18, __ T2D, Address(r17)); // ld2 {v17.2D, v18.2D}, [x17] + __ ld2(v23, v24, __ T4H, Address(__ post(r27, 16))); // ld2 {v23.4H, v24.4H}, [x27], 16 + __ ld2r(v2, v3, __ T16B, Address(r24)); // ld2r {v2.16B, v3.16B}, [x24] + __ ld2r(v11, v12, __ T2S, Address(__ post(r0, 8))); // ld2r {v11.2S, v12.2S}, [x0], 8 + __ ld2r(v2, v3, __ T2D, Address(__ post(r12, r22))); // ld2r {v2.2D, v3.2D}, [x12], x22 + __ ld3(v21, v22, v23, __ T4S, Address(__ post(r13, r13))); // ld3 {v21.4S, v22.4S, v23.4S}, [x13], x13 + __ ld3(v12, v13, v14, __ T2S, Address(r11)); // ld3 {v12.2S, v13.2S, v14.2S}, [x11] + __ ld3r(v18, v19, v20, __ T8H, Address(r14)); // ld3r {v18.8H, v19.8H, v20.8H}, [x14] + __ ld3r(v25, v26, v27, __ T4S, Address(__ post(r21, 12))); // ld3r {v25.4S, v26.4S, v27.4S}, [x21], 12 + __ ld3r(v20, v21, v22, __ T1D, Address(__ post(r16, r27))); // ld3r {v20.1D, v21.1D, v22.1D}, [x16], x27 + __ ld4(v0, v1, v2, v3, __ T8H, Address(__ post(r1, 64))); // ld4 {v0.8H, v1.8H, v2.8H, v3.8H}, [x1], 64 + __ ld4(v0, v1, v2, v3, __ T8B, Address(__ post(r27, r29))); // ld4 {v0.8B, v1.8B, v2.8B, v3.8B}, [x27], x29 + __ ld4r(v18, v19, v20, v21, __ T8B, Address(r17)); // ld4r {v18.8B, v19.8B, v20.8B, v21.8B}, [x17] + __ ld4r(v5, v6, v7, v8, __ T4H, Address(__ post(r24, 8))); // ld4r {v5.4H, v6.4H, v7.4H, v8.4H}, [x24], 8 + __ ld4r(v28, v29, v30, v31, __ T2S, Address(__ post(r5, r20))); // ld4r {v28.2S, v29.2S, v30.2S, v31.2S}, [x5], x20 // NEONReduceInstruction - __ addv(v27, __ T8B, v28); // addv b27, v28.8B - __ addv(v28, __ T16B, v29); // addv b28, v29.16B - __ addv(v1, __ T4H, v2); // addv h1, v2.4H - __ addv(v28, __ T8H, v29); // addv h28, v29.8H - __ addv(v1, __ T4S, v2); // addv s1, v2.4S - __ smaxv(v20, __ T8B, v21); // smaxv b20, v21.8B + __ addv(v1, __ T8B, v2); // addv b1, v2.8B + __ addv(v20, __ T16B, v21); // addv b20, v21.16B + __ addv(v29, __ T4H, v30); // addv h29, v30.4H + __ addv(v16, __ T8H, v17); // addv h16, v17.8H + __ addv(v13, __ T4S, v14); // addv s13, v14.4S + __ smaxv(v10, __ T8B, v11); // smaxv b10, v11.8B __ smaxv(v29, __ T16B, v30); // smaxv b29, v30.16B - __ smaxv(v16, __ T4H, v17); // smaxv h16, v17.4H - __ smaxv(v13, __ T8H, v14); // smaxv h13, v14.8H - __ smaxv(v10, __ T4S, v11); // smaxv s10, v11.4S - __ fmaxv(v29, __ T4S, v30); // fmaxv s29, v30.4S - __ sminv(v29, __ T8B, v30); // sminv b29, v30.8B - __ uminv(v19, __ T8B, v20); // uminv b19, v20.8B - __ sminv(v22, __ T16B, v23); // sminv b22, v23.16B - __ uminv(v10, __ T16B, v11); // uminv b10, v11.16B - __ sminv(v4, __ T4H, v5); // sminv h4, v5.4H - __ uminv(v31, __ T4H, v0); // uminv h31, v0.4H - __ sminv(v21, __ T8H, v22); // sminv h21, v22.8H - __ uminv(v8, __ T8H, v9); // uminv h8, v9.8H - __ sminv(v31, __ T4S, v0); // sminv s31, v0.4S - __ uminv(v19, __ T4S, v20); // uminv s19, v20.4S - __ fminv(v10, __ T4S, v11); // fminv s10, v11.4S - __ fmaxp(v28, v29, __ S); // fmaxp s28, v29.2S - __ fmaxp(v2, v3, __ D); // fmaxp d2, v3.2D - __ fminp(v25, v26, __ S); // fminp s25, v26.2S - __ fminp(v5, v6, __ D); // fminp d5, v6.2D + __ smaxv(v29, __ T4H, v30); // smaxv h29, v30.4H + __ smaxv(v19, __ T8H, v20); // smaxv h19, v20.8H + __ smaxv(v22, __ T4S, v23); // smaxv s22, v23.4S + __ fmaxv(v10, __ T4S, v11); // fmaxv s10, v11.4S + __ sminv(v4, __ T8B, v5); // sminv b4, v5.8B + __ uminv(v31, __ T8B, v0); // uminv b31, v0.8B + __ sminv(v21, __ T16B, v22); // sminv b21, v22.16B + __ uminv(v8, __ T16B, v9); // uminv b8, v9.16B + __ sminv(v31, __ T4H, v0); // sminv h31, v0.4H + __ uminv(v19, __ T4H, v20); // uminv h19, v20.4H + __ sminv(v10, __ T8H, v11); // sminv h10, v11.8H + __ uminv(v28, __ T8H, v29); // uminv h28, v29.8H + __ sminv(v2, __ T4S, v3); // sminv s2, v3.4S + __ uminv(v25, __ T4S, v26); // uminv s25, v26.4S + __ fminv(v5, __ T4S, v6); // fminv s5, v6.4S + __ fmaxp(v3, v4, __ S); // fmaxp s3, v4.2S + __ fmaxp(v8, v9, __ D); // fmaxp d8, v9.2D + __ fminp(v22, v23, __ S); // fminp s22, v23.2S + __ fminp(v19, v20, __ D); // fminp d19, v20.2D // NEONFloatCompareWithZero - __ fcm(Assembler::GT, v3, __ T2S, v4); // fcmgt v3.2S, v4.2S, #0.0 - __ fcm(Assembler::GT, v8, __ T4S, v9); // fcmgt v8.4S, v9.4S, #0.0 - __ fcm(Assembler::GT, v22, __ T2D, v23); // fcmgt v22.2D, v23.2D, #0.0 - __ fcm(Assembler::GE, v19, __ T2S, v20); // fcmge v19.2S, v20.2S, #0.0 - __ fcm(Assembler::GE, v13, __ T4S, v14); // fcmge v13.4S, v14.4S, #0.0 - __ fcm(Assembler::GE, v5, __ T2D, v6); // fcmge v5.2D, v6.2D, #0.0 - __ fcm(Assembler::EQ, v29, __ T2S, v30); // fcmeq v29.2S, v30.2S, #0.0 - __ fcm(Assembler::EQ, v24, __ T4S, v25); // fcmeq v24.4S, v25.4S, #0.0 - __ fcm(Assembler::EQ, v21, __ T2D, v22); // fcmeq v21.2D, v22.2D, #0.0 + __ fcm(Assembler::GT, v13, __ T2S, v14); // fcmgt v13.2S, v14.2S, #0.0 + __ fcm(Assembler::GT, v5, __ T4S, v6); // fcmgt v5.4S, v6.4S, #0.0 + __ fcm(Assembler::GT, v29, __ T2D, v30); // fcmgt v29.2D, v30.2D, #0.0 + __ fcm(Assembler::GE, v24, __ T2S, v25); // fcmge v24.2S, v25.2S, #0.0 + __ fcm(Assembler::GE, v21, __ T4S, v22); // fcmge v21.4S, v22.4S, #0.0 + __ fcm(Assembler::GE, v26, __ T2D, v27); // fcmge v26.2D, v27.2D, #0.0 + __ fcm(Assembler::EQ, v24, __ T2S, v25); // fcmeq v24.2S, v25.2S, #0.0 + __ fcm(Assembler::EQ, v3, __ T4S, v4); // fcmeq v3.4S, v4.4S, #0.0 + __ fcm(Assembler::EQ, v24, __ T2D, v25); // fcmeq v24.2D, v25.2D, #0.0 __ fcm(Assembler::LT, v26, __ T2S, v27); // fcmlt v26.2S, v27.2S, #0.0 - __ fcm(Assembler::LT, v24, __ T4S, v25); // fcmlt v24.4S, v25.4S, #0.0 - __ fcm(Assembler::LT, v3, __ T2D, v4); // fcmlt v3.2D, v4.2D, #0.0 - __ fcm(Assembler::LE, v24, __ T2S, v25); // fcmle v24.2S, v25.2S, #0.0 - __ fcm(Assembler::LE, v26, __ T4S, v27); // fcmle v26.4S, v27.4S, #0.0 - __ fcm(Assembler::LE, v23, __ T2D, v24); // fcmle v23.2D, v24.2D, #0.0 + __ fcm(Assembler::LT, v23, __ T4S, v24); // fcmlt v23.4S, v24.4S, #0.0 + __ fcm(Assembler::LT, v15, __ T2D, v16); // fcmlt v15.2D, v16.2D, #0.0 + __ fcm(Assembler::LE, v21, __ T2S, v22); // fcmle v21.2S, v22.2S, #0.0 + __ fcm(Assembler::LE, v3, __ T4S, v4); // fcmle v3.4S, v4.4S, #0.0 + __ fcm(Assembler::LE, v24, __ T2D, v25); // fcmle v24.2D, v25.2D, #0.0 // TwoRegNEONOp - __ absr(v15, __ T8B, v16); // abs v15.8B, v16.8B - __ absr(v21, __ T16B, v22); // abs v21.16B, v22.16B - __ absr(v3, __ T4H, v4); // abs v3.4H, v4.4H - __ absr(v24, __ T8H, v25); // abs v24.8H, v25.8H - __ absr(v8, __ T2S, v9); // abs v8.2S, v9.2S - __ absr(v25, __ T4S, v26); // abs v25.4S, v26.4S - __ absr(v20, __ T2D, v21); // abs v20.2D, v21.2D - __ fabs(v16, __ T2S, v17); // fabs v16.2S, v17.2S - __ fabs(v17, __ T4S, v18); // fabs v17.4S, v18.4S - __ fabs(v2, __ T2D, v3); // fabs v2.2D, v3.2D - __ fabs(v1, __ T4H, v2); // fabs v1.4H, v2.4H - __ fabs(v0, __ T8H, v1); // fabs v0.8H, v1.8H - __ fneg(v24, __ T2S, v25); // fneg v24.2S, v25.2S - __ fneg(v4, __ T4S, v5); // fneg v4.4S, v5.4S - __ fneg(v3, __ T2D, v4); // fneg v3.2D, v4.2D - __ fneg(v12, __ T4H, v13); // fneg v12.4H, v13.4H - __ fneg(v31, __ T8H, v0); // fneg v31.8H, v0.8H - __ fsqrt(v28, __ T2S, v29); // fsqrt v28.2S, v29.2S - __ fsqrt(v10, __ T4S, v11); // fsqrt v10.4S, v11.4S - __ fsqrt(v26, __ T2D, v27); // fsqrt v26.2D, v27.2D - __ fsqrt(v2, __ T4H, v3); // fsqrt v2.4H, v3.4H - __ fsqrt(v12, __ T8H, v13); // fsqrt v12.8H, v13.8H - __ notr(v18, __ T8B, v19); // not v18.8B, v19.8B - __ notr(v31, __ T16B, v0); // not v31.16B, v0.16B + __ absr(v8, __ T8B, v9); // abs v8.8B, v9.8B + __ absr(v25, __ T16B, v26); // abs v25.16B, v26.16B + __ absr(v20, __ T4H, v21); // abs v20.4H, v21.4H + __ absr(v16, __ T8H, v17); // abs v16.8H, v17.8H + __ absr(v17, __ T2S, v18); // abs v17.2S, v18.2S + __ absr(v2, __ T4S, v3); // abs v2.4S, v3.4S + __ absr(v1, __ T2D, v2); // abs v1.2D, v2.2D + __ fabs(v0, __ T2S, v1); // fabs v0.2S, v1.2S + __ fabs(v24, __ T4S, v25); // fabs v24.4S, v25.4S + __ fabs(v4, __ T2D, v5); // fabs v4.2D, v5.2D + __ fabs(v3, __ T4H, v4); // fabs v3.4H, v4.4H + __ fabs(v12, __ T8H, v13); // fabs v12.8H, v13.8H + __ fneg(v31, __ T2S, v0); // fneg v31.2S, v0.2S + __ fneg(v28, __ T4S, v29); // fneg v28.4S, v29.4S + __ fneg(v10, __ T2D, v11); // fneg v10.2D, v11.2D + __ fneg(v26, __ T4H, v27); // fneg v26.4H, v27.4H + __ fneg(v2, __ T8H, v3); // fneg v2.8H, v3.8H + __ fsqrt(v12, __ T2S, v13); // fsqrt v12.2S, v13.2S + __ fsqrt(v18, __ T4S, v19); // fsqrt v18.4S, v19.4S + __ fsqrt(v31, __ T2D, v0); // fsqrt v31.2D, v0.2D + __ fsqrt(v1, __ T4H, v2); // fsqrt v1.4H, v2.4H + __ fsqrt(v13, __ T8H, v14); // fsqrt v13.8H, v14.8H + __ notr(v29, __ T8B, v30); // not v29.8B, v30.8B + __ notr(v0, __ T16B, v1); // not v0.16B, v1.16B // ThreeRegNEONOp - __ andr(v1, __ T8B, v2, v3); // and v1.8B, v2.8B, v3.8B - __ andr(v13, __ T16B, v14, v15); // and v13.16B, v14.16B, v15.16B - __ orr(v29, __ T8B, v30, v31); // orr v29.8B, v30.8B, v31.8B - __ orr(v0, __ T16B, v1, v2); // orr v0.16B, v1.16B, v2.16B - __ eor(v19, __ T8B, v20, v21); // eor v19.8B, v20.8B, v21.8B - __ eor(v12, __ T16B, v13, v14); // eor v12.16B, v13.16B, v14.16B - __ addv(v17, __ T8B, v18, v19); // add v17.8B, v18.8B, v19.8B - __ addv(v22, __ T16B, v23, v24); // add v22.16B, v23.16B, v24.16B - __ addv(v13, __ T4H, v14, v15); // add v13.4H, v14.4H, v15.4H - __ addv(v28, __ T8H, v29, v30); // add v28.8H, v29.8H, v30.8H - __ addv(v30, __ T2S, v31, v0); // add v30.2S, v31.2S, v0.2S - __ addv(v31, __ T4S, v0, v1); // add v31.4S, v0.4S, v1.4S - __ addv(v1, __ T2D, v2, v3); // add v1.2D, v2.2D, v3.2D - __ sqaddv(v26, __ T8B, v27, v28); // sqadd v26.8B, v27.8B, v28.8B - __ sqaddv(v28, __ T16B, v29, v30); // sqadd v28.16B, v29.16B, v30.16B - __ sqaddv(v4, __ T4H, v5, v6); // sqadd v4.4H, v5.4H, v6.4H - __ sqaddv(v30, __ T8H, v31, v0); // sqadd v30.8H, v31.8H, v0.8H - __ sqaddv(v4, __ T2S, v5, v6); // sqadd v4.2S, v5.2S, v6.2S - __ sqaddv(v6, __ T4S, v7, v8); // sqadd v6.4S, v7.4S, v8.4S - __ sqaddv(v30, __ T2D, v31, v0); // sqadd v30.2D, v31.2D, v0.2D - __ uqaddv(v26, __ T8B, v27, v28); // uqadd v26.8B, v27.8B, v28.8B - __ uqaddv(v18, __ T16B, v19, v20); // uqadd v18.16B, v19.16B, v20.16B - __ uqaddv(v9, __ T4H, v10, v11); // uqadd v9.4H, v10.4H, v11.4H - __ uqaddv(v8, __ T8H, v9, v10); // uqadd v8.8H, v9.8H, v10.8H - __ uqaddv(v12, __ T2S, v13, v14); // uqadd v12.2S, v13.2S, v14.2S - __ uqaddv(v0, __ T4S, v1, v2); // uqadd v0.4S, v1.4S, v2.4S - __ uqaddv(v20, __ T2D, v21, v22); // uqadd v20.2D, v21.2D, v22.2D - __ fadd(v1, __ T2S, v2, v3); // fadd v1.2S, v2.2S, v3.2S + __ andr(v19, __ T8B, v20, v21); // and v19.8B, v20.8B, v21.8B + __ andr(v12, __ T16B, v13, v14); // and v12.16B, v13.16B, v14.16B + __ orr(v17, __ T8B, v18, v19); // orr v17.8B, v18.8B, v19.8B + __ orr(v22, __ T16B, v23, v24); // orr v22.16B, v23.16B, v24.16B + __ eor(v13, __ T8B, v14, v15); // eor v13.8B, v14.8B, v15.8B + __ eor(v28, __ T16B, v29, v30); // eor v28.16B, v29.16B, v30.16B + __ addv(v30, __ T8B, v31, v0); // add v30.8B, v31.8B, v0.8B + __ addv(v31, __ T16B, v0, v1); // add v31.16B, v0.16B, v1.16B + __ addv(v1, __ T4H, v2, v3); // add v1.4H, v2.4H, v3.4H + __ addv(v26, __ T8H, v27, v28); // add v26.8H, v27.8H, v28.8H + __ addv(v28, __ T2S, v29, v30); // add v28.2S, v29.2S, v30.2S + __ addv(v4, __ T4S, v5, v6); // add v4.4S, v5.4S, v6.4S + __ addv(v30, __ T2D, v31, v0); // add v30.2D, v31.2D, v0.2D + __ sqaddv(v4, __ T8B, v5, v6); // sqadd v4.8B, v5.8B, v6.8B + __ sqaddv(v6, __ T16B, v7, v8); // sqadd v6.16B, v7.16B, v8.16B + __ sqaddv(v30, __ T4H, v31, v0); // sqadd v30.4H, v31.4H, v0.4H + __ sqaddv(v26, __ T8H, v27, v28); // sqadd v26.8H, v27.8H, v28.8H + __ sqaddv(v18, __ T2S, v19, v20); // sqadd v18.2S, v19.2S, v20.2S + __ sqaddv(v9, __ T4S, v10, v11); // sqadd v9.4S, v10.4S, v11.4S + __ sqaddv(v8, __ T2D, v9, v10); // sqadd v8.2D, v9.2D, v10.2D + __ uqaddv(v12, __ T8B, v13, v14); // uqadd v12.8B, v13.8B, v14.8B + __ uqaddv(v0, __ T16B, v1, v2); // uqadd v0.16B, v1.16B, v2.16B + __ uqaddv(v20, __ T4H, v21, v22); // uqadd v20.4H, v21.4H, v22.4H + __ uqaddv(v1, __ T8H, v2, v3); // uqadd v1.8H, v2.8H, v3.8H + __ uqaddv(v24, __ T2S, v25, v26); // uqadd v24.2S, v25.2S, v26.2S + __ uqaddv(v2, __ T4S, v3, v4); // uqadd v2.4S, v3.4S, v4.4S + __ uqaddv(v0, __ T2D, v1, v2); // uqadd v0.2D, v1.2D, v2.2D + __ fadd(v9, __ T2S, v10, v11); // fadd v9.2S, v10.2S, v11.2S __ fadd(v24, __ T4S, v25, v26); // fadd v24.4S, v25.4S, v26.4S - __ fadd(v2, __ T2D, v3, v4); // fadd v2.2D, v3.2D, v4.2D - __ fadd(v0, __ T4H, v1, v2); // fadd v0.4H, v1.4H, v2.4H - __ fadd(v9, __ T8H, v10, v11); // fadd v9.8H, v10.8H, v11.8H - __ subv(v24, __ T8B, v25, v26); // sub v24.8B, v25.8B, v26.8B - __ subv(v26, __ T16B, v27, v28); // sub v26.16B, v27.16B, v28.16B - __ subv(v16, __ T4H, v17, v18); // sub v16.4H, v17.4H, v18.4H - __ subv(v30, __ T8H, v31, v0); // sub v30.8H, v31.8H, v0.8H - __ subv(v3, __ T2S, v4, v5); // sub v3.2S, v4.2S, v5.2S - __ subv(v10, __ T4S, v11, v12); // sub v10.4S, v11.4S, v12.4S - __ subv(v23, __ T2D, v24, v25); // sub v23.2D, v24.2D, v25.2D - __ sqsubv(v10, __ T8B, v11, v12); // sqsub v10.8B, v11.8B, v12.8B - __ sqsubv(v4, __ T16B, v5, v6); // sqsub v4.16B, v5.16B, v6.16B - __ sqsubv(v18, __ T4H, v19, v20); // sqsub v18.4H, v19.4H, v20.4H - __ sqsubv(v2, __ T8H, v3, v4); // sqsub v2.8H, v3.8H, v4.8H - __ sqsubv(v11, __ T2S, v12, v13); // sqsub v11.2S, v12.2S, v13.2S - __ sqsubv(v8, __ T4S, v9, v10); // sqsub v8.4S, v9.4S, v10.4S + __ fadd(v26, __ T2D, v27, v28); // fadd v26.2D, v27.2D, v28.2D + __ fadd(v16, __ T4H, v17, v18); // fadd v16.4H, v17.4H, v18.4H + __ fadd(v30, __ T8H, v31, v0); // fadd v30.8H, v31.8H, v0.8H + __ subv(v3, __ T8B, v4, v5); // sub v3.8B, v4.8B, v5.8B + __ subv(v10, __ T16B, v11, v12); // sub v10.16B, v11.16B, v12.16B + __ subv(v23, __ T4H, v24, v25); // sub v23.4H, v24.4H, v25.4H + __ subv(v10, __ T8H, v11, v12); // sub v10.8H, v11.8H, v12.8H + __ subv(v4, __ T2S, v5, v6); // sub v4.2S, v5.2S, v6.2S + __ subv(v18, __ T4S, v19, v20); // sub v18.4S, v19.4S, v20.4S + __ subv(v2, __ T2D, v3, v4); // sub v2.2D, v3.2D, v4.2D + __ sqsubv(v11, __ T8B, v12, v13); // sqsub v11.8B, v12.8B, v13.8B + __ sqsubv(v8, __ T16B, v9, v10); // sqsub v8.16B, v9.16B, v10.16B + __ sqsubv(v10, __ T4H, v11, v12); // sqsub v10.4H, v11.4H, v12.4H + __ sqsubv(v15, __ T8H, v16, v17); // sqsub v15.8H, v16.8H, v17.8H + __ sqsubv(v17, __ T2S, v18, v19); // sqsub v17.2S, v18.2S, v19.2S + __ sqsubv(v2, __ T4S, v3, v4); // sqsub v2.4S, v3.4S, v4.4S __ sqsubv(v10, __ T2D, v11, v12); // sqsub v10.2D, v11.2D, v12.2D - __ uqsubv(v15, __ T8B, v16, v17); // uqsub v15.8B, v16.8B, v17.8B - __ uqsubv(v17, __ T16B, v18, v19); // uqsub v17.16B, v18.16B, v19.16B - __ uqsubv(v2, __ T4H, v3, v4); // uqsub v2.4H, v3.4H, v4.4H - __ uqsubv(v10, __ T8H, v11, v12); // uqsub v10.8H, v11.8H, v12.8H - __ uqsubv(v12, __ T2S, v13, v14); // uqsub v12.2S, v13.2S, v14.2S - __ uqsubv(v12, __ T4S, v13, v14); // uqsub v12.4S, v13.4S, v14.4S - __ uqsubv(v15, __ T2D, v16, v17); // uqsub v15.2D, v16.2D, v17.2D - __ fsub(v13, __ T2S, v14, v15); // fsub v13.2S, v14.2S, v15.2S - __ fsub(v2, __ T4S, v3, v4); // fsub v2.4S, v3.4S, v4.4S - __ fsub(v7, __ T2D, v8, v9); // fsub v7.2D, v8.2D, v9.2D - __ fsub(v20, __ T4H, v21, v22); // fsub v20.4H, v21.4H, v22.4H - __ fsub(v26, __ T8H, v27, v28); // fsub v26.8H, v27.8H, v28.8H - __ mulv(v16, __ T8B, v17, v18); // mul v16.8B, v17.8B, v18.8B - __ mulv(v4, __ T16B, v5, v6); // mul v4.16B, v5.16B, v6.16B - __ mulv(v2, __ T4H, v3, v4); // mul v2.4H, v3.4H, v4.4H - __ mulv(v4, __ T8H, v5, v6); // mul v4.8H, v5.8H, v6.8H - __ mulv(v12, __ T2S, v13, v14); // mul v12.2S, v13.2S, v14.2S - __ mulv(v18, __ T4S, v19, v20); // mul v18.4S, v19.4S, v20.4S + __ uqsubv(v12, __ T8B, v13, v14); // uqsub v12.8B, v13.8B, v14.8B + __ uqsubv(v12, __ T16B, v13, v14); // uqsub v12.16B, v13.16B, v14.16B + __ uqsubv(v15, __ T4H, v16, v17); // uqsub v15.4H, v16.4H, v17.4H + __ uqsubv(v13, __ T8H, v14, v15); // uqsub v13.8H, v14.8H, v15.8H + __ uqsubv(v2, __ T2S, v3, v4); // uqsub v2.2S, v3.2S, v4.2S + __ uqsubv(v7, __ T4S, v8, v9); // uqsub v7.4S, v8.4S, v9.4S + __ uqsubv(v20, __ T2D, v21, v22); // uqsub v20.2D, v21.2D, v22.2D + __ fsub(v26, __ T2S, v27, v28); // fsub v26.2S, v27.2S, v28.2S + __ fsub(v16, __ T4S, v17, v18); // fsub v16.4S, v17.4S, v18.4S + __ fsub(v4, __ T2D, v5, v6); // fsub v4.2D, v5.2D, v6.2D + __ fsub(v2, __ T4H, v3, v4); // fsub v2.4H, v3.4H, v4.4H + __ fsub(v4, __ T8H, v5, v6); // fsub v4.8H, v5.8H, v6.8H + __ mulv(v12, __ T8B, v13, v14); // mul v12.8B, v13.8B, v14.8B + __ mulv(v18, __ T16B, v19, v20); // mul v18.16B, v19.16B, v20.16B + __ mulv(v21, __ T4H, v22, v23); // mul v21.4H, v22.4H, v23.4H + __ mulv(v16, __ T8H, v17, v18); // mul v16.8H, v17.8H, v18.8H + __ mulv(v18, __ T2S, v19, v20); // mul v18.2S, v19.2S, v20.2S + __ mulv(v11, __ T4S, v12, v13); // mul v11.4S, v12.4S, v13.4S __ fabd(v21, __ T2S, v22, v23); // fabd v21.2S, v22.2S, v23.2S - __ fabd(v16, __ T4S, v17, v18); // fabd v16.4S, v17.4S, v18.4S - __ fabd(v18, __ T2D, v19, v20); // fabd v18.2D, v19.2D, v20.2D - __ fabd(v11, __ T4H, v12, v13); // fabd v11.4H, v12.4H, v13.4H - __ fabd(v21, __ T8H, v22, v23); // fabd v21.8H, v22.8H, v23.8H - __ faddp(v23, __ T2S, v24, v25); // faddp v23.2S, v24.2S, v25.2S - __ faddp(v12, __ T4S, v13, v14); // faddp v12.4S, v13.4S, v14.4S - __ faddp(v26, __ T2D, v27, v28); // faddp v26.2D, v27.2D, v28.2D - __ faddp(v23, __ T4H, v24, v25); // faddp v23.4H, v24.4H, v25.4H - __ faddp(v28, __ T8H, v29, v30); // faddp v28.8H, v29.8H, v30.8H - __ fmul(v14, __ T2S, v15, v16); // fmul v14.2S, v15.2S, v16.2S - __ fmul(v11, __ T4S, v12, v13); // fmul v11.4S, v12.4S, v13.4S - __ fmul(v24, __ T2D, v25, v26); // fmul v24.2D, v25.2D, v26.2D - __ fmul(v1, __ T4H, v2, v3); // fmul v1.4H, v2.4H, v3.4H - __ fmul(v12, __ T8H, v13, v14); // fmul v12.8H, v13.8H, v14.8H - __ mlav(v31, __ T4H, v0, v1); // mla v31.4H, v0.4H, v1.4H - __ mlav(v10, __ T8H, v11, v12); // mla v10.8H, v11.8H, v12.8H - __ mlav(v16, __ T2S, v17, v18); // mla v16.2S, v17.2S, v18.2S - __ mlav(v7, __ T4S, v8, v9); // mla v7.4S, v8.4S, v9.4S - __ fmla(v2, __ T2S, v3, v4); // fmla v2.2S, v3.2S, v4.2S - __ fmla(v3, __ T4S, v4, v5); // fmla v3.4S, v4.4S, v5.4S - __ fmla(v13, __ T2D, v14, v15); // fmla v13.2D, v14.2D, v15.2D - __ fmla(v19, __ T4H, v20, v21); // fmla v19.4H, v20.4H, v21.4H - __ fmla(v17, __ T8H, v18, v19); // fmla v17.8H, v18.8H, v19.8H - __ mlsv(v16, __ T4H, v17, v18); // mls v16.4H, v17.4H, v18.4H - __ mlsv(v3, __ T8H, v4, v5); // mls v3.8H, v4.8H, v5.8H - __ mlsv(v1, __ T2S, v2, v3); // mls v1.2S, v2.2S, v3.2S - __ mlsv(v11, __ T4S, v12, v13); // mls v11.4S, v12.4S, v13.4S - __ fmls(v30, __ T2S, v31, v0); // fmls v30.2S, v31.2S, v0.2S - __ fmls(v5, __ T4S, v6, v7); // fmls v5.4S, v6.4S, v7.4S - __ fmls(v8, __ T2D, v9, v10); // fmls v8.2D, v9.2D, v10.2D - __ fmls(v15, __ T4H, v16, v17); // fmls v15.4H, v16.4H, v17.4H - __ fmls(v29, __ T8H, v30, v31); // fmls v29.8H, v30.8H, v31.8H - __ fdiv(v30, __ T2S, v31, v0); // fdiv v30.2S, v31.2S, v0.2S - __ fdiv(v0, __ T4S, v1, v2); // fdiv v0.4S, v1.4S, v2.4S - __ fdiv(v20, __ T2D, v21, v22); // fdiv v20.2D, v21.2D, v22.2D - __ fdiv(v7, __ T4H, v8, v9); // fdiv v7.4H, v8.4H, v9.4H - __ fdiv(v20, __ T8H, v21, v22); // fdiv v20.8H, v21.8H, v22.8H - __ maxv(v23, __ T8B, v24, v25); // smax v23.8B, v24.8B, v25.8B - __ maxv(v28, __ T16B, v29, v30); // smax v28.16B, v29.16B, v30.16B - __ maxv(v21, __ T4H, v22, v23); // smax v21.4H, v22.4H, v23.4H - __ maxv(v27, __ T8H, v28, v29); // smax v27.8H, v28.8H, v29.8H - __ maxv(v25, __ T2S, v26, v27); // smax v25.2S, v26.2S, v27.2S - __ maxv(v5, __ T4S, v6, v7); // smax v5.4S, v6.4S, v7.4S - __ umaxv(v1, __ T8B, v2, v3); // umax v1.8B, v2.8B, v3.8B - __ umaxv(v23, __ T16B, v24, v25); // umax v23.16B, v24.16B, v25.16B - __ umaxv(v16, __ T4H, v17, v18); // umax v16.4H, v17.4H, v18.4H - __ umaxv(v31, __ T8H, v0, v1); // umax v31.8H, v0.8H, v1.8H - __ umaxv(v5, __ T2S, v6, v7); // umax v5.2S, v6.2S, v7.2S - __ umaxv(v12, __ T4S, v13, v14); // umax v12.4S, v13.4S, v14.4S - __ smaxp(v9, __ T8B, v10, v11); // smaxp v9.8B, v10.8B, v11.8B - __ smaxp(v28, __ T16B, v29, v30); // smaxp v28.16B, v29.16B, v30.16B - __ smaxp(v15, __ T4H, v16, v17); // smaxp v15.4H, v16.4H, v17.4H - __ smaxp(v29, __ T8H, v30, v31); // smaxp v29.8H, v30.8H, v31.8H - __ smaxp(v22, __ T2S, v23, v24); // smaxp v22.2S, v23.2S, v24.2S - __ smaxp(v31, __ T4S, v0, v1); // smaxp v31.4S, v0.4S, v1.4S - __ fmax(v19, __ T2S, v20, v21); // fmax v19.2S, v20.2S, v21.2S + __ fabd(v23, __ T4S, v24, v25); // fabd v23.4S, v24.4S, v25.4S + __ fabd(v12, __ T2D, v13, v14); // fabd v12.2D, v13.2D, v14.2D + __ fabd(v26, __ T4H, v27, v28); // fabd v26.4H, v27.4H, v28.4H + __ fabd(v23, __ T8H, v24, v25); // fabd v23.8H, v24.8H, v25.8H + __ faddp(v28, __ T2S, v29, v30); // faddp v28.2S, v29.2S, v30.2S + __ faddp(v14, __ T4S, v15, v16); // faddp v14.4S, v15.4S, v16.4S + __ faddp(v11, __ T2D, v12, v13); // faddp v11.2D, v12.2D, v13.2D + __ faddp(v24, __ T4H, v25, v26); // faddp v24.4H, v25.4H, v26.4H + __ faddp(v1, __ T8H, v2, v3); // faddp v1.8H, v2.8H, v3.8H + __ fmul(v12, __ T2S, v13, v14); // fmul v12.2S, v13.2S, v14.2S + __ fmul(v31, __ T4S, v0, v1); // fmul v31.4S, v0.4S, v1.4S + __ fmul(v10, __ T2D, v11, v12); // fmul v10.2D, v11.2D, v12.2D + __ fmul(v16, __ T4H, v17, v18); // fmul v16.4H, v17.4H, v18.4H + __ fmul(v7, __ T8H, v8, v9); // fmul v7.8H, v8.8H, v9.8H + __ mlav(v2, __ T4H, v3, v4); // mla v2.4H, v3.4H, v4.4H + __ mlav(v3, __ T8H, v4, v5); // mla v3.8H, v4.8H, v5.8H + __ mlav(v13, __ T2S, v14, v15); // mla v13.2S, v14.2S, v15.2S + __ mlav(v19, __ T4S, v20, v21); // mla v19.4S, v20.4S, v21.4S + __ fmla(v17, __ T2S, v18, v19); // fmla v17.2S, v18.2S, v19.2S + __ fmla(v16, __ T4S, v17, v18); // fmla v16.4S, v17.4S, v18.4S + __ fmla(v3, __ T2D, v4, v5); // fmla v3.2D, v4.2D, v5.2D + __ fmla(v1, __ T4H, v2, v3); // fmla v1.4H, v2.4H, v3.4H + __ fmla(v11, __ T8H, v12, v13); // fmla v11.8H, v12.8H, v13.8H + __ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H + __ mlsv(v5, __ T8H, v6, v7); // mls v5.8H, v6.8H, v7.8H + __ mlsv(v8, __ T2S, v9, v10); // mls v8.2S, v9.2S, v10.2S + __ mlsv(v15, __ T4S, v16, v17); // mls v15.4S, v16.4S, v17.4S + __ fmls(v29, __ T2S, v30, v31); // fmls v29.2S, v30.2S, v31.2S + __ fmls(v30, __ T4S, v31, v0); // fmls v30.4S, v31.4S, v0.4S + __ fmls(v0, __ T2D, v1, v2); // fmls v0.2D, v1.2D, v2.2D + __ fmls(v20, __ T4H, v21, v22); // fmls v20.4H, v21.4H, v22.4H + __ fmls(v7, __ T8H, v8, v9); // fmls v7.8H, v8.8H, v9.8H + __ fdiv(v20, __ T2S, v21, v22); // fdiv v20.2S, v21.2S, v22.2S + __ fdiv(v23, __ T4S, v24, v25); // fdiv v23.4S, v24.4S, v25.4S + __ fdiv(v28, __ T2D, v29, v30); // fdiv v28.2D, v29.2D, v30.2D + __ fdiv(v21, __ T4H, v22, v23); // fdiv v21.4H, v22.4H, v23.4H + __ fdiv(v27, __ T8H, v28, v29); // fdiv v27.8H, v28.8H, v29.8H + __ maxv(v25, __ T8B, v26, v27); // smax v25.8B, v26.8B, v27.8B + __ maxv(v5, __ T16B, v6, v7); // smax v5.16B, v6.16B, v7.16B + __ maxv(v1, __ T4H, v2, v3); // smax v1.4H, v2.4H, v3.4H + __ maxv(v23, __ T8H, v24, v25); // smax v23.8H, v24.8H, v25.8H + __ maxv(v16, __ T2S, v17, v18); // smax v16.2S, v17.2S, v18.2S + __ maxv(v31, __ T4S, v0, v1); // smax v31.4S, v0.4S, v1.4S + __ umaxv(v5, __ T8B, v6, v7); // umax v5.8B, v6.8B, v7.8B + __ umaxv(v12, __ T16B, v13, v14); // umax v12.16B, v13.16B, v14.16B + __ umaxv(v9, __ T4H, v10, v11); // umax v9.4H, v10.4H, v11.4H + __ umaxv(v28, __ T8H, v29, v30); // umax v28.8H, v29.8H, v30.8H + __ umaxv(v15, __ T2S, v16, v17); // umax v15.2S, v16.2S, v17.2S + __ umaxv(v29, __ T4S, v30, v31); // umax v29.4S, v30.4S, v31.4S + __ smaxp(v22, __ T8B, v23, v24); // smaxp v22.8B, v23.8B, v24.8B + __ smaxp(v31, __ T16B, v0, v1); // smaxp v31.16B, v0.16B, v1.16B + __ smaxp(v19, __ T4H, v20, v21); // smaxp v19.4H, v20.4H, v21.4H + __ smaxp(v31, __ T8H, v0, v1); // smaxp v31.8H, v0.8H, v1.8H + __ smaxp(v5, __ T2S, v6, v7); // smaxp v5.2S, v6.2S, v7.2S + __ smaxp(v14, __ T4S, v15, v16); // smaxp v14.4S, v15.4S, v16.4S + __ fmax(v18, __ T2S, v19, v20); // fmax v18.2S, v19.2S, v20.2S __ fmax(v31, __ T4S, v0, v1); // fmax v31.4S, v0.4S, v1.4S - __ fmax(v5, __ T2D, v6, v7); // fmax v5.2D, v6.2D, v7.2D - __ fmax(v14, __ T4H, v15, v16); // fmax v14.4H, v15.4H, v16.4H - __ fmax(v18, __ T8H, v19, v20); // fmax v18.8H, v19.8H, v20.8H - __ minv(v31, __ T8B, v0, v1); // smin v31.8B, v0.8B, v1.8B - __ minv(v18, __ T16B, v19, v20); // smin v18.16B, v19.16B, v20.16B - __ minv(v27, __ T4H, v28, v29); // smin v27.4H, v28.4H, v29.4H - __ minv(v20, __ T8H, v21, v22); // smin v20.8H, v21.8H, v22.8H - __ minv(v16, __ T2S, v17, v18); // smin v16.2S, v17.2S, v18.2S - __ minv(v12, __ T4S, v13, v14); // smin v12.4S, v13.4S, v14.4S - __ uminv(v11, __ T8B, v12, v13); // umin v11.8B, v12.8B, v13.8B - __ uminv(v9, __ T16B, v10, v11); // umin v9.16B, v10.16B, v11.16B - __ uminv(v6, __ T4H, v7, v8); // umin v6.4H, v7.4H, v8.4H + __ fmax(v18, __ T2D, v19, v20); // fmax v18.2D, v19.2D, v20.2D + __ fmax(v27, __ T4H, v28, v29); // fmax v27.4H, v28.4H, v29.4H + __ fmax(v20, __ T8H, v21, v22); // fmax v20.8H, v21.8H, v22.8H + __ minv(v16, __ T8B, v17, v18); // smin v16.8B, v17.8B, v18.8B + __ minv(v12, __ T16B, v13, v14); // smin v12.16B, v13.16B, v14.16B + __ minv(v11, __ T4H, v12, v13); // smin v11.4H, v12.4H, v13.4H + __ minv(v9, __ T8H, v10, v11); // smin v9.8H, v10.8H, v11.8H + __ minv(v6, __ T2S, v7, v8); // smin v6.2S, v7.2S, v8.2S + __ minv(v30, __ T4S, v31, v0); // smin v30.4S, v31.4S, v0.4S + __ uminv(v17, __ T8B, v18, v19); // umin v17.8B, v18.8B, v19.8B + __ uminv(v27, __ T16B, v28, v29); // umin v27.16B, v28.16B, v29.16B + __ uminv(v28, __ T4H, v29, v30); // umin v28.4H, v29.4H, v30.4H __ uminv(v30, __ T8H, v31, v0); // umin v30.8H, v31.8H, v0.8H - __ uminv(v17, __ T2S, v18, v19); // umin v17.2S, v18.2S, v19.2S - __ uminv(v27, __ T4S, v28, v29); // umin v27.4S, v28.4S, v29.4S - __ sminp(v28, __ T8B, v29, v30); // sminp v28.8B, v29.8B, v30.8B - __ sminp(v30, __ T16B, v31, v0); // sminp v30.16B, v31.16B, v0.16B - __ sminp(v7, __ T4H, v8, v9); // sminp v7.4H, v8.4H, v9.4H - __ sminp(v10, __ T8H, v11, v12); // sminp v10.8H, v11.8H, v12.8H - __ sminp(v20, __ T2S, v21, v22); // sminp v20.2S, v21.2S, v22.2S - __ sminp(v10, __ T4S, v11, v12); // sminp v10.4S, v11.4S, v12.4S - __ uminp(v4, __ T8B, v5, v6); // uminp v4.8B, v5.8B, v6.8B - __ uminp(v24, __ T16B, v25, v26); // uminp v24.16B, v25.16B, v26.16B - __ uminp(v17, __ T4H, v18, v19); // uminp v17.4H, v18.4H, v19.4H - __ uminp(v17, __ T8H, v18, v19); // uminp v17.8H, v18.8H, v19.8H + __ uminv(v7, __ T2S, v8, v9); // umin v7.2S, v8.2S, v9.2S + __ uminv(v10, __ T4S, v11, v12); // umin v10.4S, v11.4S, v12.4S + __ sminp(v20, __ T8B, v21, v22); // sminp v20.8B, v21.8B, v22.8B + __ sminp(v10, __ T16B, v11, v12); // sminp v10.16B, v11.16B, v12.16B + __ sminp(v4, __ T4H, v5, v6); // sminp v4.4H, v5.4H, v6.4H + __ sminp(v24, __ T8H, v25, v26); // sminp v24.8H, v25.8H, v26.8H + __ sminp(v17, __ T2S, v18, v19); // sminp v17.2S, v18.2S, v19.2S + __ sminp(v17, __ T4S, v18, v19); // sminp v17.4S, v18.4S, v19.4S + __ uminp(v22, __ T8B, v23, v24); // uminp v22.8B, v23.8B, v24.8B + __ uminp(v3, __ T16B, v4, v5); // uminp v3.16B, v4.16B, v5.16B + __ uminp(v29, __ T4H, v30, v31); // uminp v29.4H, v30.4H, v31.4H + __ uminp(v15, __ T8H, v16, v17); // uminp v15.8H, v16.8H, v17.8H __ uminp(v22, __ T2S, v23, v24); // uminp v22.2S, v23.2S, v24.2S - __ uminp(v3, __ T4S, v4, v5); // uminp v3.4S, v4.4S, v5.4S - __ umaxp(v29, __ T8B, v30, v31); // umaxp v29.8B, v30.8B, v31.8B - __ umaxp(v15, __ T16B, v16, v17); // umaxp v15.16B, v16.16B, v17.16B - __ umaxp(v22, __ T4H, v23, v24); // umaxp v22.4H, v23.4H, v24.4H - __ umaxp(v19, __ T8H, v20, v21); // umaxp v19.8H, v20.8H, v21.8H - __ umaxp(v19, __ T2S, v20, v21); // umaxp v19.2S, v20.2S, v21.2S - __ umaxp(v22, __ T4S, v23, v24); // umaxp v22.4S, v23.4S, v24.4S - __ sqdmulh(v2, __ T4H, v3, v4); // sqdmulh v2.4H, v3.4H, v4.4H - __ sqdmulh(v15, __ T8H, v16, v17); // sqdmulh v15.8H, v16.8H, v17.8H - __ sqdmulh(v6, __ T2S, v7, v8); // sqdmulh v6.2S, v7.2S, v8.2S - __ sqdmulh(v12, __ T4S, v13, v14); // sqdmulh v12.4S, v13.4S, v14.4S - __ shsubv(v16, __ T8B, v17, v18); // shsub v16.8B, v17.8B, v18.8B - __ shsubv(v11, __ T16B, v12, v13); // shsub v11.16B, v12.16B, v13.16B - __ shsubv(v13, __ T4H, v14, v15); // shsub v13.4H, v14.4H, v15.4H - __ shsubv(v23, __ T8H, v24, v25); // shsub v23.8H, v24.8H, v25.8H - __ shsubv(v1, __ T2S, v2, v3); // shsub v1.2S, v2.2S, v3.2S - __ shsubv(v30, __ T4S, v31, v0); // shsub v30.4S, v31.4S, v0.4S - __ fmin(v19, __ T2S, v20, v21); // fmin v19.2S, v20.2S, v21.2S - __ fmin(v5, __ T4S, v6, v7); // fmin v5.4S, v6.4S, v7.4S - __ fmin(v17, __ T2D, v18, v19); // fmin v17.2D, v18.2D, v19.2D - __ fmin(v2, __ T4H, v3, v4); // fmin v2.4H, v3.4H, v4.4H - __ fmin(v16, __ T8H, v17, v18); // fmin v16.8H, v17.8H, v18.8H - __ facgt(v22, __ T2S, v23, v24); // facgt v22.2S, v23.2S, v24.2S - __ facgt(v13, __ T4S, v14, v15); // facgt v13.4S, v14.4S, v15.4S - __ facgt(v10, __ T2D, v11, v12); // facgt v10.2D, v11.2D, v12.2D - __ facgt(v21, __ T4H, v22, v23); // facgt v21.4H, v22.4H, v23.4H - __ facgt(v29, __ T8H, v30, v31); // facgt v29.8H, v30.8H, v31.8H + __ uminp(v19, __ T4S, v20, v21); // uminp v19.4S, v20.4S, v21.4S + __ umaxp(v19, __ T8B, v20, v21); // umaxp v19.8B, v20.8B, v21.8B + __ umaxp(v22, __ T16B, v23, v24); // umaxp v22.16B, v23.16B, v24.16B + __ umaxp(v2, __ T4H, v3, v4); // umaxp v2.4H, v3.4H, v4.4H + __ umaxp(v15, __ T8H, v16, v17); // umaxp v15.8H, v16.8H, v17.8H + __ umaxp(v6, __ T2S, v7, v8); // umaxp v6.2S, v7.2S, v8.2S + __ umaxp(v12, __ T4S, v13, v14); // umaxp v12.4S, v13.4S, v14.4S + __ sqdmulh(v16, __ T4H, v17, v18); // sqdmulh v16.4H, v17.4H, v18.4H + __ sqdmulh(v11, __ T8H, v12, v13); // sqdmulh v11.8H, v12.8H, v13.8H + __ sqdmulh(v13, __ T2S, v14, v15); // sqdmulh v13.2S, v14.2S, v15.2S + __ sqdmulh(v23, __ T4S, v24, v25); // sqdmulh v23.4S, v24.4S, v25.4S + __ shsubv(v1, __ T8B, v2, v3); // shsub v1.8B, v2.8B, v3.8B + __ shsubv(v30, __ T16B, v31, v0); // shsub v30.16B, v31.16B, v0.16B + __ shsubv(v19, __ T4H, v20, v21); // shsub v19.4H, v20.4H, v21.4H + __ shsubv(v5, __ T8H, v6, v7); // shsub v5.8H, v6.8H, v7.8H + __ shsubv(v17, __ T2S, v18, v19); // shsub v17.2S, v18.2S, v19.2S + __ shsubv(v2, __ T4S, v3, v4); // shsub v2.4S, v3.4S, v4.4S + __ fmin(v16, __ T2S, v17, v18); // fmin v16.2S, v17.2S, v18.2S + __ fmin(v22, __ T4S, v23, v24); // fmin v22.4S, v23.4S, v24.4S + __ fmin(v13, __ T2D, v14, v15); // fmin v13.2D, v14.2D, v15.2D + __ fmin(v10, __ T4H, v11, v12); // fmin v10.4H, v11.4H, v12.4H + __ fmin(v21, __ T8H, v22, v23); // fmin v21.8H, v22.8H, v23.8H + __ facgt(v29, __ T2S, v30, v31); // facgt v29.2S, v30.2S, v31.2S + __ facgt(v27, __ T4S, v28, v29); // facgt v27.4S, v28.4S, v29.4S + __ facgt(v12, __ T2D, v13, v14); // facgt v12.2D, v13.2D, v14.2D + __ facgt(v27, __ T4H, v28, v29); // facgt v27.4H, v28.4H, v29.4H + __ facgt(v3, __ T8H, v4, v5); // facgt v3.8H, v4.8H, v5.8H // VectorScalarNEONInstruction - __ fmlavs(v6, __ T2S, v7, v8, 1); // fmla v6.2S, v7.2S, v8.S[1] - __ mulvs(v1, __ T4S, v2, v3, 3); // mul v1.4S, v2.4S, v3.S[3] - __ fmlavs(v15, __ T2D, v0, v1, 0); // fmla v15.2D, v0.2D, v1.D[0] - __ fmlsvs(v9, __ T2S, v10, v11, 1); // fmls v9.2S, v10.2S, v11.S[1] - __ mulvs(v4, __ T4S, v5, v6, 2); // mul v4.4S, v5.4S, v6.S[2] - __ fmlsvs(v13, __ T2D, v14, v15, 1); // fmls v13.2D, v14.2D, v15.D[1] - __ fmulxvs(v3, __ T2S, v4, v5, 0); // fmulx v3.2S, v4.2S, v5.S[0] - __ mulvs(v11, __ T4S, v12, v13, 2); // mul v11.4S, v12.4S, v13.S[2] - __ fmulxvs(v12, __ T2D, v13, v14, 1); // fmulx v12.2D, v13.2D, v14.D[1] - __ mulvs(v15, __ T4H, v0, v1, 0); // mul v15.4H, v0.4H, v1.H[0] - __ mulvs(v9, __ T8H, v10, v11, 6); // mul v9.8H, v10.8H, v11.H[6] - __ mulvs(v11, __ T2S, v12, v13, 0); // mul v11.2S, v12.2S, v13.S[0] - __ mulvs(v1, __ T4S, v2, v3, 2); // mul v1.4S, v2.4S, v3.S[2] + __ fmlavs(v15, __ T2S, v0, v1, 0); // fmla v15.2S, v0.2S, v1.S[0] + __ mulvs(v9, __ T4S, v10, v11, 3); // mul v9.4S, v10.4S, v11.S[3] + __ fmlavs(v4, __ T2D, v5, v6, 1); // fmla v4.2D, v5.2D, v6.D[1] + __ fmlsvs(v13, __ T2S, v14, v15, 1); // fmls v13.2S, v14.2S, v15.S[1] + __ mulvs(v3, __ T4S, v4, v5, 1); // mul v3.4S, v4.4S, v5.S[1] + __ fmlsvs(v11, __ T2D, v12, v13, 1); // fmls v11.2D, v12.2D, v13.D[1] + __ fmulxvs(v12, __ T2S, v13, v14, 1); // fmulx v12.2S, v13.2S, v14.S[1] + __ mulvs(v15, __ T4S, v0, v1, 0); // mul v15.4S, v0.4S, v1.S[0] + __ fmulxvs(v9, __ T2D, v10, v11, 1); // fmulx v9.2D, v10.2D, v11.D[1] + __ mulvs(v11, __ T4H, v12, v13, 1); // mul v11.4H, v12.4H, v13.H[1] + __ mulvs(v1, __ T8H, v2, v3, 4); // mul v1.8H, v2.8H, v3.H[4] + __ mulvs(v0, __ T2S, v1, v2, 1); // mul v0.2S, v1.2S, v2.S[1] + __ mulvs(v13, __ T4S, v14, v15, 3); // mul v13.4S, v14.4S, v15.S[3] // NEONVectorCompare - __ cm(Assembler::GT, v18, __ T8B, v19, v20); // cmgt v18.8B, v19.8B, v20.8B - __ cm(Assembler::GT, v0, __ T16B, v1, v2); // cmgt v0.16B, v1.16B, v2.16B - __ cm(Assembler::GT, v25, __ T4H, v26, v27); // cmgt v25.4H, v26.4H, v27.4H - __ cm(Assembler::GT, v26, __ T8H, v27, v28); // cmgt v26.8H, v27.8H, v28.8H - __ cm(Assembler::GT, v23, __ T2S, v24, v25); // cmgt v23.2S, v24.2S, v25.2S - __ cm(Assembler::GT, v2, __ T4S, v3, v4); // cmgt v2.4S, v3.4S, v4.4S - __ cm(Assembler::GT, v18, __ T2D, v19, v20); // cmgt v18.2D, v19.2D, v20.2D - __ cm(Assembler::GE, v12, __ T8B, v13, v14); // cmge v12.8B, v13.8B, v14.8B - __ cm(Assembler::GE, v4, __ T16B, v5, v6); // cmge v4.16B, v5.16B, v6.16B - __ cm(Assembler::GE, v28, __ T4H, v29, v30); // cmge v28.4H, v29.4H, v30.4H - __ cm(Assembler::GE, v30, __ T8H, v31, v0); // cmge v30.8H, v31.8H, v0.8H - __ cm(Assembler::GE, v29, __ T2S, v30, v31); // cmge v29.2S, v30.2S, v31.2S - __ cm(Assembler::GE, v16, __ T4S, v17, v18); // cmge v16.4S, v17.4S, v18.4S - __ cm(Assembler::GE, v27, __ T2D, v28, v29); // cmge v27.2D, v28.2D, v29.2D - __ cm(Assembler::EQ, v6, __ T8B, v7, v8); // cmeq v6.8B, v7.8B, v8.8B - __ cm(Assembler::EQ, v9, __ T16B, v10, v11); // cmeq v9.16B, v10.16B, v11.16B - __ cm(Assembler::EQ, v29, __ T4H, v30, v31); // cmeq v29.4H, v30.4H, v31.4H - __ cm(Assembler::EQ, v18, __ T8H, v19, v20); // cmeq v18.8H, v19.8H, v20.8H - __ cm(Assembler::EQ, v7, __ T2S, v8, v9); // cmeq v7.2S, v8.2S, v9.2S - __ cm(Assembler::EQ, v4, __ T4S, v5, v6); // cmeq v4.4S, v5.4S, v6.4S - __ cm(Assembler::EQ, v7, __ T2D, v8, v9); // cmeq v7.2D, v8.2D, v9.2D - __ cm(Assembler::HI, v15, __ T8B, v16, v17); // cmhi v15.8B, v16.8B, v17.8B - __ cm(Assembler::HI, v9, __ T16B, v10, v11); // cmhi v9.16B, v10.16B, v11.16B - __ cm(Assembler::HI, v23, __ T4H, v24, v25); // cmhi v23.4H, v24.4H, v25.4H - __ cm(Assembler::HI, v8, __ T8H, v9, v10); // cmhi v8.8H, v9.8H, v10.8H - __ cm(Assembler::HI, v2, __ T2S, v3, v4); // cmhi v2.2S, v3.2S, v4.2S - __ cm(Assembler::HI, v28, __ T4S, v29, v30); // cmhi v28.4S, v29.4S, v30.4S - __ cm(Assembler::HI, v21, __ T2D, v22, v23); // cmhi v21.2D, v22.2D, v23.2D - __ cm(Assembler::HS, v31, __ T8B, v0, v1); // cmhs v31.8B, v0.8B, v1.8B - __ cm(Assembler::HS, v5, __ T16B, v6, v7); // cmhs v5.16B, v6.16B, v7.16B - __ cm(Assembler::HS, v27, __ T4H, v28, v29); // cmhs v27.4H, v28.4H, v29.4H - __ cm(Assembler::HS, v0, __ T8H, v1, v2); // cmhs v0.8H, v1.8H, v2.8H - __ cm(Assembler::HS, v17, __ T2S, v18, v19); // cmhs v17.2S, v18.2S, v19.2S - __ cm(Assembler::HS, v15, __ T4S, v16, v17); // cmhs v15.4S, v16.4S, v17.4S - __ cm(Assembler::HS, v4, __ T2D, v5, v6); // cmhs v4.2D, v5.2D, v6.2D - __ fcm(Assembler::EQ, v26, __ T2S, v27, v28); // fcmeq v26.2S, v27.2S, v28.2S - __ fcm(Assembler::EQ, v8, __ T4S, v9, v10); // fcmeq v8.4S, v9.4S, v10.4S - __ fcm(Assembler::EQ, v28, __ T2D, v29, v30); // fcmeq v28.2D, v29.2D, v30.2D - __ fcm(Assembler::GT, v22, __ T2S, v23, v24); // fcmgt v22.2S, v23.2S, v24.2S - __ fcm(Assembler::GT, v27, __ T4S, v28, v29); // fcmgt v27.4S, v28.4S, v29.4S - __ fcm(Assembler::GT, v27, __ T2D, v28, v29); // fcmgt v27.2D, v28.2D, v29.2D - __ fcm(Assembler::GE, v25, __ T2S, v26, v27); // fcmge v25.2S, v26.2S, v27.2S - __ fcm(Assembler::GE, v23, __ T4S, v24, v25); // fcmge v23.4S, v24.4S, v25.4S + __ cm(Assembler::GT, v23, __ T8B, v24, v25); // cmgt v23.8B, v24.8B, v25.8B + __ cm(Assembler::GT, v2, __ T16B, v3, v4); // cmgt v2.16B, v3.16B, v4.16B + __ cm(Assembler::GT, v18, __ T4H, v19, v20); // cmgt v18.4H, v19.4H, v20.4H + __ cm(Assembler::GT, v12, __ T8H, v13, v14); // cmgt v12.8H, v13.8H, v14.8H + __ cm(Assembler::GT, v4, __ T2S, v5, v6); // cmgt v4.2S, v5.2S, v6.2S + __ cm(Assembler::GT, v28, __ T4S, v29, v30); // cmgt v28.4S, v29.4S, v30.4S + __ cm(Assembler::GT, v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D + __ cm(Assembler::GE, v29, __ T8B, v30, v31); // cmge v29.8B, v30.8B, v31.8B + __ cm(Assembler::GE, v16, __ T16B, v17, v18); // cmge v16.16B, v17.16B, v18.16B + __ cm(Assembler::GE, v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H + __ cm(Assembler::GE, v6, __ T8H, v7, v8); // cmge v6.8H, v7.8H, v8.8H + __ cm(Assembler::GE, v9, __ T2S, v10, v11); // cmge v9.2S, v10.2S, v11.2S + __ cm(Assembler::GE, v29, __ T4S, v30, v31); // cmge v29.4S, v30.4S, v31.4S + __ cm(Assembler::GE, v18, __ T2D, v19, v20); // cmge v18.2D, v19.2D, v20.2D + __ cm(Assembler::EQ, v7, __ T8B, v8, v9); // cmeq v7.8B, v8.8B, v9.8B + __ cm(Assembler::EQ, v4, __ T16B, v5, v6); // cmeq v4.16B, v5.16B, v6.16B + __ cm(Assembler::EQ, v7, __ T4H, v8, v9); // cmeq v7.4H, v8.4H, v9.4H + __ cm(Assembler::EQ, v15, __ T8H, v16, v17); // cmeq v15.8H, v16.8H, v17.8H + __ cm(Assembler::EQ, v9, __ T2S, v10, v11); // cmeq v9.2S, v10.2S, v11.2S + __ cm(Assembler::EQ, v23, __ T4S, v24, v25); // cmeq v23.4S, v24.4S, v25.4S + __ cm(Assembler::EQ, v8, __ T2D, v9, v10); // cmeq v8.2D, v9.2D, v10.2D + __ cm(Assembler::HI, v2, __ T8B, v3, v4); // cmhi v2.8B, v3.8B, v4.8B + __ cm(Assembler::HI, v28, __ T16B, v29, v30); // cmhi v28.16B, v29.16B, v30.16B + __ cm(Assembler::HI, v21, __ T4H, v22, v23); // cmhi v21.4H, v22.4H, v23.4H + __ cm(Assembler::HI, v31, __ T8H, v0, v1); // cmhi v31.8H, v0.8H, v1.8H + __ cm(Assembler::HI, v5, __ T2S, v6, v7); // cmhi v5.2S, v6.2S, v7.2S + __ cm(Assembler::HI, v27, __ T4S, v28, v29); // cmhi v27.4S, v28.4S, v29.4S + __ cm(Assembler::HI, v0, __ T2D, v1, v2); // cmhi v0.2D, v1.2D, v2.2D + __ cm(Assembler::HS, v17, __ T8B, v18, v19); // cmhs v17.8B, v18.8B, v19.8B + __ cm(Assembler::HS, v15, __ T16B, v16, v17); // cmhs v15.16B, v16.16B, v17.16B + __ cm(Assembler::HS, v4, __ T4H, v5, v6); // cmhs v4.4H, v5.4H, v6.4H + __ cm(Assembler::HS, v26, __ T8H, v27, v28); // cmhs v26.8H, v27.8H, v28.8H + __ cm(Assembler::HS, v8, __ T2S, v9, v10); // cmhs v8.2S, v9.2S, v10.2S + __ cm(Assembler::HS, v28, __ T4S, v29, v30); // cmhs v28.4S, v29.4S, v30.4S + __ cm(Assembler::HS, v22, __ T2D, v23, v24); // cmhs v22.2D, v23.2D, v24.2D + __ fcm(Assembler::EQ, v27, __ T2S, v28, v29); // fcmeq v27.2S, v28.2S, v29.2S + __ fcm(Assembler::EQ, v27, __ T4S, v28, v29); // fcmeq v27.4S, v28.4S, v29.4S + __ fcm(Assembler::EQ, v25, __ T2D, v26, v27); // fcmeq v25.2D, v26.2D, v27.2D + __ fcm(Assembler::GT, v23, __ T2S, v24, v25); // fcmgt v23.2S, v24.2S, v25.2S + __ fcm(Assembler::GT, v0, __ T4S, v1, v2); // fcmgt v0.4S, v1.4S, v2.4S + __ fcm(Assembler::GT, v4, __ T2D, v5, v6); // fcmgt v4.2D, v5.2D, v6.2D + __ fcm(Assembler::GE, v6, __ T2S, v7, v8); // fcmge v6.2S, v7.2S, v8.2S + __ fcm(Assembler::GE, v18, __ T4S, v19, v20); // fcmge v18.4S, v19.4S, v20.4S __ fcm(Assembler::GE, v0, __ T2D, v1, v2); // fcmge v0.2D, v1.2D, v2.2D // SVEComparisonWithZero - __ sve_fcm(Assembler::EQ, p2, __ D, p0, z6, 0.0); // fcmeq p2.d, p0/z, z6.d, #0.0 - __ sve_fcm(Assembler::GT, p2, __ S, p2, z15, 0.0); // fcmgt p2.s, p2/z, z15.s, #0.0 - __ sve_fcm(Assembler::GE, p3, __ S, p7, z5, 0.0); // fcmge p3.s, p7/z, z5.s, #0.0 - __ sve_fcm(Assembler::LT, p3, __ D, p5, z20, 0.0); // fcmlt p3.d, p5/z, z20.d, #0.0 - __ sve_fcm(Assembler::LE, p3, __ S, p4, z11, 0.0); // fcmle p3.s, p4/z, z11.s, #0.0 - __ sve_fcm(Assembler::NE, p15, __ D, p0, z6, 0.0); // fcmne p15.d, p0/z, z6.d, #0.0 + __ sve_fcm(Assembler::EQ, p2, __ S, p2, z15, 0.0); // fcmeq p2.s, p2/z, z15.s, #0.0 + __ sve_fcm(Assembler::GT, p3, __ S, p7, z5, 0.0); // fcmgt p3.s, p7/z, z5.s, #0.0 + __ sve_fcm(Assembler::GE, p3, __ D, p5, z20, 0.0); // fcmge p3.d, p5/z, z20.d, #0.0 + __ sve_fcm(Assembler::LT, p3, __ S, p4, z11, 0.0); // fcmlt p3.s, p4/z, z11.s, #0.0 + __ sve_fcm(Assembler::LE, p15, __ D, p0, z6, 0.0); // fcmle p15.d, p0/z, z6.d, #0.0 + __ sve_fcm(Assembler::NE, p6, __ D, p0, z30, 0.0); // fcmne p6.d, p0/z, z30.d, #0.0 // SVEComparisonWithImm - __ sve_cmp(Assembler::EQ, p6, __ D, p0, z30, 11); // cmpeq p6.d, p0/z, z30.d, #11 - __ sve_cmp(Assembler::GT, p11, __ H, p3, z29, 12); // cmpgt p11.h, p3/z, z29.h, #12 - __ sve_cmp(Assembler::GE, p8, __ B, p0, z24, -2); // cmpge p8.b, p0/z, z24.b, #-2 - __ sve_cmp(Assembler::LT, p5, __ H, p6, z16, 7); // cmplt p5.h, p6/z, z16.h, #7 - __ sve_cmp(Assembler::LE, p6, __ S, p4, z4, -12); // cmple p6.s, p4/z, z4.s, #-12 - __ sve_cmp(Assembler::NE, p0, __ S, p4, z19, -3); // cmpne p0.s, p4/z, z19.s, #-3 - __ sve_cmp(Assembler::HS, p7, __ B, p4, z12, 15); // cmphs p7.b, p4/z, z12.b, #15 - __ sve_cmp(Assembler::HI, p10, __ B, p1, z23, 30); // cmphi p10.b, p1/z, z23.b, #30 - __ sve_cmp(Assembler::LS, p9, __ D, p4, z13, 67); // cmpls p9.d, p4/z, z13.d, #67 - __ sve_cmp(Assembler::LO, p3, __ D, p0, z2, 16); // cmplo p3.d, p0/z, z2.d, #16 + __ sve_cmp(Assembler::EQ, p13, __ D, p3, z22, -3); // cmpeq p13.d, p3/z, z22.d, #-3 + __ sve_cmp(Assembler::GT, p14, __ D, p1, z17, -14); // cmpgt p14.d, p1/z, z17.d, #-14 + __ sve_cmp(Assembler::GE, p7, __ S, p2, z10, 11); // cmpge p7.s, p2/z, z10.s, #11 + __ sve_cmp(Assembler::LT, p11, __ B, p5, z12, 1); // cmplt p11.b, p5/z, z12.b, #1 + __ sve_cmp(Assembler::LE, p2, __ S, p4, z1, 2); // cmple p2.s, p4/z, z1.s, #2 + __ sve_cmp(Assembler::NE, p6, __ H, p0, z14, 1); // cmpne p6.h, p0/z, z14.h, #1 + __ sve_cmp(Assembler::HS, p1, __ S, p1, z21, 25); // cmphs p1.s, p1/z, z21.s, #25 + __ sve_cmp(Assembler::HI, p3, __ H, p7, z19, 70); // cmphi p3.h, p7/z, z19.h, #70 + __ sve_cmp(Assembler::LS, p8, __ B, p7, z6, 12); // cmpls p8.b, p7/z, z6.b, #12 + __ sve_cmp(Assembler::LO, p2, __ S, p5, z6, 55); // cmplo p2.s, p5/z, z6.s, #55 // SpecialCases __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE @@ -1210,241 +1216,241 @@ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 // LSEOp - __ swp(Assembler::xword, r6, r16, r20); // swp x6, x16, [x20] - __ ldadd(Assembler::xword, r13, r12, r20); // ldadd x13, x12, [x20] - __ ldbic(Assembler::xword, r8, r25, r20); // ldclr x8, x25, [x20] - __ ldeor(Assembler::xword, r19, r0, r11); // ldeor x19, x0, [x11] - __ ldorr(Assembler::xword, r24, r6, r20); // ldset x24, x6, [x20] - __ ldsmin(Assembler::xword, zr, r14, r16); // ldsmin xzr, x14, [x16] - __ ldsmax(Assembler::xword, r6, r0, r7); // ldsmax x6, x0, [x7] - __ ldumin(Assembler::xword, r15, r19, r26); // ldumin x15, x19, [x26] - __ ldumax(Assembler::xword, r9, r10, r23); // ldumax x9, x10, [x23] + __ swp(Assembler::xword, r12, r20, r8); // swp x12, x20, [x8] + __ ldadd(Assembler::xword, r25, r20, r19); // ldadd x25, x20, [x19] + __ ldbic(Assembler::xword, r0, r11, r24); // ldclr x0, x11, [x24] + __ ldeor(Assembler::xword, r6, r20, sp); // ldeor x6, x20, [sp] + __ ldorr(Assembler::xword, r14, r16, r6); // ldset x14, x16, [x6] + __ ldsmin(Assembler::xword, r0, r7, r15); // ldsmin x0, x7, [x15] + __ ldsmax(Assembler::xword, r19, r26, r9); // ldsmax x19, x26, [x9] + __ ldumin(Assembler::xword, r10, r23, r21); // ldumin x10, x23, [x21] + __ ldumax(Assembler::xword, r22, r28, r2); // ldumax x22, x28, [x2] // LSEOp - __ swpa(Assembler::xword, r21, r22, r28); // swpa x21, x22, [x28] - __ ldadda(Assembler::xword, r2, r3, r15); // ldadda x2, x3, [x15] - __ ldbica(Assembler::xword, r19, r20, r7); // ldclra x19, x20, [x7] - __ ldeora(Assembler::xword, r4, r29, r7); // ldeora x4, x29, [x7] - __ ldorra(Assembler::xword, r0, r9, r16); // ldseta x0, x9, [x16] - __ ldsmina(Assembler::xword, r20, r23, r4); // ldsmina x20, x23, [x4] - __ ldsmaxa(Assembler::xword, r16, r10, r23); // ldsmaxa x16, x10, [x23] - __ ldumina(Assembler::xword, r11, r25, r6); // ldumina x11, x25, [x6] - __ ldumaxa(Assembler::xword, zr, r16, r13); // ldumaxa xzr, x16, [x13] + __ swpa(Assembler::xword, r3, r15, r19); // swpa x3, x15, [x19] + __ ldadda(Assembler::xword, r20, r7, r4); // ldadda x20, x7, [x4] + __ ldbica(Assembler::xword, r29, r7, r0); // ldclra x29, x7, [x0] + __ ldeora(Assembler::xword, r9, r16, r20); // ldeora x9, x16, [x20] + __ ldorra(Assembler::xword, r23, r4, r16); // ldseta x23, x4, [x16] + __ ldsmina(Assembler::xword, r10, r23, r11); // ldsmina x10, x23, [x11] + __ ldsmaxa(Assembler::xword, r25, r6, sp); // ldsmaxa x25, x6, [sp] + __ ldumina(Assembler::xword, r16, r13, r23); // ldumina x16, x13, [x23] + __ ldumaxa(Assembler::xword, r12, r1, r14); // ldumaxa x12, x1, [x14] // LSEOp - __ swpal(Assembler::xword, r23, r12, r1); // swpal x23, x12, [x1] - __ ldaddal(Assembler::xword, r14, r9, r21); // ldaddal x14, x9, [x21] - __ ldbical(Assembler::xword, r16, r26, r15); // ldclral x16, x26, [x15] - __ ldeoral(Assembler::xword, r4, r4, r15); // ldeoral x4, x4, [x15] - __ ldorral(Assembler::xword, r8, r6, r30); // ldsetal x8, x6, [x30] - __ ldsminal(Assembler::xword, r4, r29, r17); // ldsminal x4, x29, [x17] - __ ldsmaxal(Assembler::xword, r29, r26, r9); // ldsmaxal x29, x26, [x9] - __ lduminal(Assembler::xword, r15, r2, r11); // lduminal x15, x2, [x11] - __ ldumaxal(Assembler::xword, r29, r3, r7); // ldumaxal x29, x3, [x7] + __ swpal(Assembler::xword, r9, r21, r16); // swpal x9, x21, [x16] + __ ldaddal(Assembler::xword, r26, r15, r4); // ldaddal x26, x15, [x4] + __ ldbical(Assembler::xword, r4, r16, r8); // ldclral x4, x16, [x8] + __ ldeoral(Assembler::xword, r6, r30, r4); // ldeoral x6, x30, [x4] + __ ldorral(Assembler::xword, r29, r17, r29); // ldsetal x29, x17, [x29] + __ ldsminal(Assembler::xword, r26, r9, r15); // ldsminal x26, x9, [x15] + __ ldsmaxal(Assembler::xword, r2, r11, r29); // ldsmaxal x2, x11, [x29] + __ lduminal(Assembler::xword, r3, r7, r1); // lduminal x3, x7, [x1] + __ ldumaxal(Assembler::xword, r27, r21, r15); // ldumaxal x27, x21, [x15] // LSEOp - __ swpl(Assembler::xword, r1, r27, r21); // swpl x1, x27, [x21] - __ ldaddl(Assembler::xword, r16, r14, r8); // ldaddl x16, x14, [x8] - __ ldbicl(Assembler::xword, r16, r22, r25); // ldclrl x16, x22, [x25] - __ ldeorl(Assembler::xword, r5, r20, r21); // ldeorl x5, x20, [x21] - __ ldorrl(Assembler::xword, r16, r23, r16); // ldsetl x16, x23, [x16] - __ ldsminl(Assembler::xword, r30, r20, r20); // ldsminl x30, x20, [x20] - __ ldsmaxl(Assembler::xword, r0, r4, r19); // ldsmaxl x0, x4, [x19] - __ lduminl(Assembler::xword, r24, r4, r20); // lduminl x24, x4, [x20] - __ ldumaxl(Assembler::xword, r4, r24, r26); // ldumaxl x4, x24, [x26] + __ swpl(Assembler::xword, r14, r8, r15); // swpl x14, x8, [x15] + __ ldaddl(Assembler::xword, r22, r25, r5); // ldaddl x22, x25, [x5] + __ ldbicl(Assembler::xword, r20, r21, r15); // ldclrl x20, x21, [x15] + __ ldeorl(Assembler::xword, r23, r16, r30); // ldeorl x23, x16, [x30] + __ ldorrl(Assembler::xword, r20, r20, r0); // ldsetl x20, x20, [x0] + __ ldsminl(Assembler::xword, r4, r19, r24); // ldsminl x4, x19, [x24] + __ ldsmaxl(Assembler::xword, r4, r20, r4); // ldsmaxl x4, x20, [x4] + __ lduminl(Assembler::xword, r24, r26, r19); // lduminl x24, x26, [x19] + __ ldumaxl(Assembler::xword, r2, r8, r8); // ldumaxl x2, x8, [x8] // LSEOp - __ swp(Assembler::word, r19, r2, r8); // swp w19, w2, [x8] - __ ldadd(Assembler::word, r8, r14, r24); // ldadd w8, w14, [x24] - __ ldbic(Assembler::word, r16, zr, r22); // ldclr w16, wzr, [x22] - __ ldeor(Assembler::word, r4, zr, r1); // ldeor w4, wzr, [x1] - __ ldorr(Assembler::word, r10, r20, r12); // ldset w10, w20, [x12] - __ ldsmin(Assembler::word, r0, r9, r7); // ldsmin w0, w9, [x7] - __ ldsmax(Assembler::word, r24, r16, r4); // ldsmax w24, w16, [x4] - __ ldumin(Assembler::word, r27, r6, r10); // ldumin w27, w6, [x10] - __ ldumax(Assembler::word, r27, r24, r13); // ldumax w27, w24, [x13] + __ swp(Assembler::word, r14, r24, r15); // swp w14, w24, [x15] + __ ldadd(Assembler::word, zr, r22, r4); // ldadd wzr, w22, [x4] + __ ldbic(Assembler::word, zr, r1, r10); // ldclr wzr, w1, [x10] + __ ldeor(Assembler::word, r20, r12, r0); // ldeor w20, w12, [x0] + __ ldorr(Assembler::word, r9, r7, r24); // ldset w9, w7, [x24] + __ ldsmin(Assembler::word, r16, r4, r27); // ldsmin w16, w4, [x27] + __ ldsmax(Assembler::word, r6, r10, r27); // ldsmax w6, w10, [x27] + __ ldumin(Assembler::word, r24, r13, r16); // ldumin w24, w13, [x16] + __ ldumax(Assembler::word, zr, r22, r22); // ldumax wzr, w22, [x22] // LSEOp - __ swpa(Assembler::word, r16, zr, r22); // swpa w16, wzr, [x22] - __ ldadda(Assembler::word, r22, r20, sp); // ldadda w22, w20, [sp] - __ ldbica(Assembler::word, r29, r9, r14); // ldclra w29, w9, [x14] - __ ldeora(Assembler::word, r20, r7, r20); // ldeora w20, w7, [x20] - __ ldorra(Assembler::word, r28, r9, r11); // ldseta w28, w9, [x11] - __ ldsmina(Assembler::word, r14, r12, r20); // ldsmina w14, w12, [x20] - __ ldsmaxa(Assembler::word, r1, r24, r9); // ldsmaxa w1, w24, [x9] - __ ldumina(Assembler::word, r19, r13, r19); // ldumina w19, w13, [x19] - __ ldumaxa(Assembler::word, r16, r16, r5); // ldumaxa w16, w16, [x5] + __ swpa(Assembler::word, r20, zr, r29); // swpa w20, wzr, [x29] + __ ldadda(Assembler::word, r9, r14, r20); // ldadda w9, w14, [x20] + __ ldbica(Assembler::word, r7, r20, r28); // ldclra w7, w20, [x28] + __ ldeora(Assembler::word, r9, r11, r14); // ldeora w9, w11, [x14] + __ ldorra(Assembler::word, r12, r20, r1); // ldseta w12, w20, [x1] + __ ldsmina(Assembler::word, r24, r9, r19); // ldsmina w24, w9, [x19] + __ ldsmaxa(Assembler::word, r13, r19, r15); // ldsmaxa w13, w19, [x15] + __ ldumina(Assembler::word, r16, r5, r0); // ldumina w16, w5, [x0] + __ ldumaxa(Assembler::word, r3, r12, r8); // ldumaxa w3, w12, [x8] // LSEOp - __ swpal(Assembler::word, r0, r3, r12); // swpal w0, w3, [x12] - __ ldaddal(Assembler::word, r8, r15, r15); // ldaddal w8, w15, [x15] - __ ldbical(Assembler::word, r16, r4, r15); // ldclral w16, w4, [x15] - __ ldeoral(Assembler::word, r30, r5, r0); // ldeoral w30, w5, [x0] - __ ldorral(Assembler::word, r10, r22, r27); // ldsetal w10, w22, [x27] - __ ldsminal(Assembler::word, r3, r0, r9); // ldsminal w3, w0, [x9] - __ ldsmaxal(Assembler::word, r19, r29, r10); // ldsmaxal w19, w29, [x10] - __ lduminal(Assembler::word, r24, r4, r20); // lduminal w24, w4, [x20] - __ ldumaxal(Assembler::word, r7, r24, r29); // ldumaxal w7, w24, [x29] + __ swpal(Assembler::word, r15, r15, r16); // swpal w15, w15, [x16] + __ ldaddal(Assembler::word, r4, r15, r30); // ldaddal w4, w15, [x30] + __ ldbical(Assembler::word, r5, r0, r10); // ldclral w5, w0, [x10] + __ ldeoral(Assembler::word, r22, r27, r3); // ldeoral w22, w27, [x3] + __ ldorral(Assembler::word, r0, r9, r19); // ldsetal w0, w9, [x19] + __ ldsminal(Assembler::word, r29, r10, r24); // ldsminal w29, w10, [x24] + __ ldsmaxal(Assembler::word, r4, r20, r7); // ldsmaxal w4, w20, [x7] + __ lduminal(Assembler::word, r24, r29, r14); // lduminal w24, w29, [x14] + __ ldumaxal(Assembler::word, r21, r11, r27); // ldumaxal w21, w11, [x27] // LSEOp - __ swpl(Assembler::word, r14, r21, r11); // swpl w14, w21, [x11] - __ ldaddl(Assembler::word, r27, r13, r15); // ldaddl w27, w13, [x15] - __ ldbicl(Assembler::word, zr, r17, r14); // ldclrl wzr, w17, [x14] - __ ldeorl(Assembler::word, r3, r30, r16); // ldeorl w3, w30, [x16] - __ ldorrl(Assembler::word, r22, r20, r7); // ldsetl w22, w20, [x7] - __ ldsminl(Assembler::word, r20, r3, r1); // ldsminl w20, w3, [x1] - __ ldsmaxl(Assembler::word, r26, r19, r9); // ldsmaxl w26, w19, [x9] - __ lduminl(Assembler::word, r16, r17, r21); // lduminl w16, w17, [x21] - __ ldumaxl(Assembler::word, r0, r4, r2); // ldumaxl w0, w4, [x2] + __ swpl(Assembler::word, r13, r16, sp); // swpl w13, w16, [sp] + __ ldaddl(Assembler::word, r17, r14, r3); // ldaddl w17, w14, [x3] + __ ldbicl(Assembler::word, r30, r16, r22); // ldclrl w30, w16, [x22] + __ ldeorl(Assembler::word, r20, r7, r20); // ldeorl w20, w7, [x20] + __ ldorrl(Assembler::word, r3, r1, r26); // ldsetl w3, w1, [x26] + __ ldsminl(Assembler::word, r19, r9, r16); // ldsminl w19, w9, [x16] + __ ldsmaxl(Assembler::word, r17, r21, r0); // ldsmaxl w17, w21, [x0] + __ lduminl(Assembler::word, r4, r2, r24); // lduminl w4, w2, [x24] + __ ldumaxl(Assembler::word, r14, r6, r11); // ldumaxl w14, w6, [x11] // SHA3SIMDOp - __ bcax(v24, __ T16B, v14, v6, v11); // bcax v24.16B, v14.16B, v6.16B, v11.16B - __ eor3(v21, __ T16B, v14, v17, v30); // eor3 v21.16B, v14.16B, v17.16B, v30.16B - __ rax1(v12, __ T2D, v3, v3); // rax1 v12.2D, v3.2D, v3.2D - __ xar(v23, __ T2D, v9, v3, 49); // xar v23.2D, v9.2D, v3.2D, #49 + __ bcax(v21, __ T16B, v14, v17, v30); // bcax v21.16B, v14.16B, v17.16B, v30.16B + __ eor3(v12, __ T16B, v3, v3, v23); // eor3 v12.16B, v3.16B, v3.16B, v23.16B + __ rax1(v9, __ T2D, v3, v24); // rax1 v9.2D, v3.2D, v24.2D + __ xar(v28, __ T2D, v3, v19, 47); // xar v28.2D, v3.2D, v19.2D, #47 // SHA512SIMDOp - __ sha512h(v28, __ T2D, v3, v19); // sha512h q28, q3, v19.2D - __ sha512h2(v23, __ T2D, v7, v26); // sha512h2 q23, q7, v26.2D - __ sha512su0(v21, __ T2D, v14); // sha512su0 v21.2D, v14.2D - __ sha512su1(v5, __ T2D, v8, v26); // sha512su1 v5.2D, v8.2D, v26.2D + __ sha512h(v7, __ T2D, v26, v21); // sha512h q7, q26, v21.2D + __ sha512h2(v14, __ T2D, v5, v8); // sha512h2 q14, q5, v8.2D + __ sha512su0(v26, __ T2D, v5); // sha512su0 v26.2D, v5.2D + __ sha512su1(v22, __ T2D, v18, v17); // sha512su1 v22.2D, v18.2D, v17.2D // SVEBinaryImmOp - __ sve_add(z5, __ S, 146u); // add z5.s, z5.s, #0x92 - __ sve_sub(z17, __ B, 31u); // sub z17.b, z17.b, #0x1f - __ sve_and(z9, __ S, 16744448u); // and z9.s, z9.s, #0xff8000 - __ sve_eor(z12, __ H, 33279u); // eor z12.h, z12.h, #0x81ff - __ sve_orr(z11, __ H, 49663u); // orr z11.h, z11.h, #0xc1ff + __ sve_add(z0, __ B, 79u); // add z0.b, z0.b, #0x4f + __ sve_sub(z20, __ H, 65u); // sub z20.h, z20.h, #0x41 + __ sve_and(z12, __ H, 33279u); // and z12.h, z12.h, #0x81ff + __ sve_eor(z11, __ H, 49663u); // eor z11.h, z11.h, #0xc1ff + __ sve_orr(z31, __ S, 2147484159u); // orr z31.s, z31.s, #0x800001ff // SVEBinaryImmOp - __ sve_add(z31, __ S, 72u); // add z31.s, z31.s, #0x48 - __ sve_sub(z16, __ H, 218u); // sub z16.h, z16.h, #0xda - __ sve_and(z23, __ D, 562675075514368u); // and z23.d, z23.d, #0x1ffc000000000 - __ sve_eor(z8, __ B, 243u); // eor z8.b, z8.b, #0xf3 - __ sve_orr(z10, __ B, 239u); // orr z10.b, z10.b, #0xef + __ sve_add(z15, __ D, 188u); // add z15.d, z15.d, #0xbc + __ sve_sub(z28, __ S, 64u); // sub z28.s, z28.s, #0x40 + __ sve_and(z8, __ B, 243u); // and z8.b, z8.b, #0xf3 + __ sve_eor(z10, __ B, 239u); // eor z10.b, z10.b, #0xef + __ sve_orr(z22, __ S, 32768u); // orr z22.s, z22.s, #0x8000 // SVEBinaryImmOp - __ sve_add(z22, __ S, 5u); // add z22.s, z22.s, #0x5 - __ sve_sub(z3, __ S, 209u); // sub z3.s, z3.s, #0xd1 - __ sve_and(z5, __ D, 17870287719452639231u); // and z5.d, z5.d, #0xf80003ffffffffff - __ sve_eor(z17, __ B, 128u); // eor z17.b, z17.b, #0x80 - __ sve_orr(z30, __ H, 49663u); // orr z30.h, z30.h, #0xc1ff + __ sve_add(z23, __ D, 46u); // add z23.d, z23.d, #0x2e + __ sve_sub(z26, __ D, 154u); // sub z26.d, z26.d, #0x9a + __ sve_and(z17, __ B, 128u); // and z17.b, z17.b, #0x80 + __ sve_eor(z30, __ H, 49663u); // eor z30.h, z30.h, #0xc1ff + __ sve_orr(z2, __ D, 18444492273897963519u); // orr z2.d, z2.d, #0xfff80000001fffff // SVEBinaryImmOp - __ sve_add(z2, __ D, 168u); // add z2.d, z2.d, #0xa8 - __ sve_sub(z23, __ S, 240u); // sub z23.s, z23.s, #0xf0 - __ sve_and(z12, __ H, 1u); // and z12.h, z12.h, #0x1 - __ sve_eor(z15, __ S, 1u); // eor z15.s, z15.s, #0x1 - __ sve_orr(z19, __ D, 18446532967477018623u); // orr z19.d, z19.d, #0xffff3fffffffffff + __ sve_add(z22, __ D, 103u); // add z22.d, z22.d, #0x67 + __ sve_sub(z8, __ B, 5u); // sub z8.b, z8.b, #0x5 + __ sve_and(z15, __ S, 1u); // and z15.s, z15.s, #0x1 + __ sve_eor(z19, __ D, 18446532967477018623u); // eor z19.d, z19.d, #0xffff3fffffffffff + __ sve_orr(z13, __ S, 7168u); // orr z13.s, z13.s, #0x1c00 // SVEBinaryImmOp - __ sve_add(z13, __ S, 179u); // add z13.s, z13.s, #0xb3 - __ sve_sub(z2, __ B, 88u); // sub z2.b, z2.b, #0x58 - __ sve_and(z20, __ H, 57855u); // and z20.h, z20.h, #0xe1ff - __ sve_eor(z24, __ H, 33279u); // eor z24.h, z24.h, #0x81ff - __ sve_orr(z20, __ S, 917504u); // orr z20.s, z20.s, #0xe0000 + __ sve_add(z1, __ H, 164u); // add z1.h, z1.h, #0xa4 + __ sve_sub(z12, __ S, 194u); // sub z12.s, z12.s, #0xc2 + __ sve_and(z24, __ H, 33279u); // and z24.h, z24.h, #0x81ff + __ sve_eor(z20, __ S, 917504u); // eor z20.s, z20.s, #0xe0000 + __ sve_orr(z21, __ H, 57343u); // orr z21.h, z21.h, #0xdfff // SVEBinaryImmOp - __ sve_add(z21, __ H, 247u); // add z21.h, z21.h, #0xf7 - __ sve_sub(z22, __ D, 253u); // sub z22.d, z22.d, #0xfd - __ sve_and(z26, __ S, 1610637312u); // and z26.s, z26.s, #0x60006000 - __ sve_eor(z11, __ H, 51199u); // eor z11.h, z11.h, #0xc7ff - __ sve_orr(z5, __ B, 128u); // orr z5.b, z5.b, #0x80 + __ sve_add(z31, __ D, 213u); // add z31.d, z31.d, #0xd5 + __ sve_sub(z18, __ S, 120u); // sub z18.s, z18.s, #0x78 + __ sve_and(z11, __ H, 51199u); // and z11.h, z11.h, #0xc7ff + __ sve_eor(z5, __ B, 128u); // eor z5.b, z5.b, #0x80 + __ sve_orr(z2, __ B, 124u); // orr z2.b, z2.b, #0x7c // SVEVectorOp - __ sve_add(z2, __ H, z7, z10); // add z2.h, z7.h, z10.h - __ sve_sub(z19, __ H, z4, z26); // sub z19.h, z4.h, z26.h - __ sve_fadd(z2, __ S, z3, z30); // fadd z2.s, z3.s, z30.s - __ sve_fmul(z20, __ D, z5, z20); // fmul z20.d, z5.d, z20.d - __ sve_fsub(z29, __ S, z13, z13); // fsub z29.s, z13.s, z13.s - __ sve_sqadd(z14, __ H, z30, z1); // sqadd z14.h, z30.h, z1.h - __ sve_sqsub(z28, __ D, z3, z3); // sqsub z28.d, z3.d, z3.d - __ sve_uqadd(z9, __ B, z25, z9); // uqadd z9.b, z25.b, z9.b - __ sve_uqsub(z26, __ B, z10, z14); // uqsub z26.b, z10.b, z14.b - __ sve_abs(z20, __ D, p6, z7); // abs z20.d, p6/m, z7.d - __ sve_add(z20, __ D, p4, z6); // add z20.d, p4/m, z20.d, z6.d - __ sve_and(z13, __ H, p0, z29); // and z13.h, p0/m, z13.h, z29.h - __ sve_asr(z9, __ B, p0, z1); // asr z9.b, p0/m, z9.b, z1.b - __ sve_bic(z27, __ B, p6, z15); // bic z27.b, p6/m, z27.b, z15.b - __ sve_clz(z4, __ D, p7, z17); // clz z4.d, p7/m, z17.d - __ sve_cnt(z2, __ B, p0, z24); // cnt z2.b, p0/m, z24.b - __ sve_eor(z26, __ B, p7, z13); // eor z26.b, p7/m, z26.b, z13.b - __ sve_lsl(z22, __ D, p3, z16); // lsl z22.d, p3/m, z22.d, z16.d - __ sve_lsr(z17, __ D, p1, z11); // lsr z17.d, p1/m, z17.d, z11.d - __ sve_mul(z16, __ B, p0, z16); // mul z16.b, p0/m, z16.b, z16.b - __ sve_neg(z28, __ D, p1, z23); // neg z28.d, p1/m, z23.d - __ sve_not(z28, __ S, p4, z10); // not z28.s, p4/m, z10.s - __ sve_orr(z17, __ S, p7, z7); // orr z17.s, p7/m, z17.s, z7.s - __ sve_rbit(z4, __ H, p3, z24); // rbit z4.h, p3/m, z24.h - __ sve_revb(z9, __ H, p2, z11); // revb z9.h, p2/m, z11.h - __ sve_smax(z4, __ S, p5, z22); // smax z4.s, p5/m, z4.s, z22.s - __ sve_smin(z4, __ H, p0, z15); // smin z4.h, p0/m, z4.h, z15.h - __ sve_umax(z4, __ D, p7, z26); // umax z4.d, p7/m, z4.d, z26.d - __ sve_umin(z5, __ H, p5, z26); // umin z5.h, p5/m, z5.h, z26.h - __ sve_sub(z31, __ B, p0, z25); // sub z31.b, p0/m, z31.b, z25.b - __ sve_fabs(z8, __ D, p1, z3); // fabs z8.d, p1/m, z3.d - __ sve_fadd(z7, __ D, p6, z24); // fadd z7.d, p6/m, z7.d, z24.d - __ sve_fdiv(z24, __ S, p7, z17); // fdiv z24.s, p7/m, z24.s, z17.s - __ sve_fmax(z10, __ S, p3, z30); // fmax z10.s, p3/m, z10.s, z30.s - __ sve_fmin(z8, __ S, p6, z29); // fmin z8.s, p6/m, z8.s, z29.s - __ sve_fmul(z31, __ D, p5, z31); // fmul z31.d, p5/m, z31.d, z31.d - __ sve_fneg(z0, __ D, p5, z7); // fneg z0.d, p5/m, z7.d - __ sve_frintm(z29, __ S, p6, z22); // frintm z29.s, p6/m, z22.s - __ sve_frintn(z29, __ S, p6, z20); // frintn z29.s, p6/m, z20.s - __ sve_frintp(z6, __ S, p4, z18); // frintp z6.s, p4/m, z18.s - __ sve_fsqrt(z26, __ S, p5, z8); // fsqrt z26.s, p5/m, z8.s - __ sve_fsub(z19, __ S, p2, z28); // fsub z19.s, p2/m, z19.s, z28.s - __ sve_fmad(z17, __ D, p1, z30, z20); // fmad z17.d, p1/m, z30.d, z20.d - __ sve_fmla(z28, __ D, p3, z17, z14); // fmla z28.d, p3/m, z17.d, z14.d - __ sve_fmls(z10, __ S, p6, z11, z24); // fmls z10.s, p6/m, z11.s, z24.s - __ sve_fmsb(z11, __ D, p3, z28, z23); // fmsb z11.d, p3/m, z28.d, z23.d - __ sve_fnmad(z20, __ D, p7, z23, z20); // fnmad z20.d, p7/m, z23.d, z20.d - __ sve_fnmsb(z24, __ D, p0, z27, z6); // fnmsb z24.d, p0/m, z27.d, z6.d - __ sve_fnmla(z13, __ D, p3, z4, z13); // fnmla z13.d, p3/m, z4.d, z13.d - __ sve_fnmls(z26, __ S, p5, z20, z6); // fnmls z26.s, p5/m, z20.s, z6.s - __ sve_mla(z29, __ S, p7, z0, z29); // mla z29.s, p7/m, z0.s, z29.s - __ sve_mls(z3, __ D, p1, z5, z8); // mls z3.d, p1/m, z5.d, z8.d - __ sve_and(z13, z17, z13); // and z13.d, z17.d, z13.d - __ sve_eor(z8, z10, z8); // eor z8.d, z10.d, z8.d - __ sve_orr(z19, z0, z29); // orr z19.d, z0.d, z29.d - __ sve_bic(z16, z13, z23); // bic z16.d, z13.d, z23.d - __ sve_uzp1(z23, __ B, z30, z13); // uzp1 z23.b, z30.b, z13.b - __ sve_uzp2(z25, __ H, z22, z0); // uzp2 z25.h, z22.h, z0.h - __ sve_fabd(z25, __ S, p7, z11); // fabd z25.s, p7/m, z25.s, z11.s - __ sve_bext(z14, __ H, z23, z22); // bext z14.h, z23.h, z22.h - __ sve_bdep(z5, __ H, z18, z0); // bdep z5.h, z18.h, z0.h - __ sve_eor3(z9, z2, z3); // eor3 z9.d, z9.d, z2.d, z3.d - __ sve_sqadd(z14, __ H, p1, z29); // sqadd z14.h, p1/m, z14.h, z29.h - __ sve_sqsub(z14, __ D, p5, z4); // sqsub z14.d, p5/m, z14.d, z4.d - __ sve_uqadd(z27, __ S, p3, z22); // uqadd z27.s, p3/m, z27.s, z22.s - __ sve_uqsub(z31, __ S, p6, z11); // uqsub z31.s, p6/m, z31.s, z11.s + __ sve_add(z19, __ H, z4, z26); // add z19.h, z4.h, z26.h + __ sve_sub(z2, __ B, z3, z30); // sub z2.b, z3.b, z30.b + __ sve_fadd(z20, __ D, z5, z20); // fadd z20.d, z5.d, z20.d + __ sve_fmul(z29, __ S, z13, z13); // fmul z29.s, z13.s, z13.s + __ sve_fsub(z14, __ S, z30, z1); // fsub z14.s, z30.s, z1.s + __ sve_sqadd(z28, __ D, z3, z3); // sqadd z28.d, z3.d, z3.d + __ sve_sqsub(z9, __ B, z25, z9); // sqsub z9.b, z25.b, z9.b + __ sve_uqadd(z26, __ B, z10, z14); // uqadd z26.b, z10.b, z14.b + __ sve_uqsub(z20, __ D, z26, z7); // uqsub z20.d, z26.d, z7.d + __ sve_abs(z20, __ D, p4, z6); // abs z20.d, p4/m, z6.d + __ sve_add(z13, __ H, p0, z29); // add z13.h, p0/m, z13.h, z29.h + __ sve_and(z9, __ B, p0, z1); // and z9.b, p0/m, z9.b, z1.b + __ sve_asr(z27, __ B, p6, z15); // asr z27.b, p6/m, z27.b, z15.b + __ sve_bic(z4, __ D, p7, z17); // bic z4.d, p7/m, z4.d, z17.d + __ sve_clz(z2, __ B, p0, z24); // clz z2.b, p0/m, z24.b + __ sve_cnt(z26, __ B, p7, z13); // cnt z26.b, p7/m, z13.b + __ sve_eor(z22, __ D, p3, z16); // eor z22.d, p3/m, z22.d, z16.d + __ sve_lsl(z17, __ D, p1, z11); // lsl z17.d, p1/m, z17.d, z11.d + __ sve_lsr(z16, __ B, p0, z16); // lsr z16.b, p0/m, z16.b, z16.b + __ sve_mul(z28, __ D, p1, z23); // mul z28.d, p1/m, z28.d, z23.d + __ sve_neg(z28, __ S, p4, z10); // neg z28.s, p4/m, z10.s + __ sve_not(z17, __ S, p7, z7); // not z17.s, p7/m, z7.s + __ sve_orr(z4, __ H, p3, z24); // orr z4.h, p3/m, z4.h, z24.h + __ sve_rbit(z9, __ B, p2, z11); // rbit z9.b, p2/m, z11.b + __ sve_revb(z4, __ S, p5, z22); // revb z4.s, p5/m, z22.s + __ sve_smax(z4, __ H, p0, z15); // smax z4.h, p0/m, z4.h, z15.h + __ sve_smin(z4, __ D, p7, z26); // smin z4.d, p7/m, z4.d, z26.d + __ sve_umax(z5, __ H, p5, z26); // umax z5.h, p5/m, z5.h, z26.h + __ sve_umin(z31, __ B, p0, z25); // umin z31.b, p0/m, z31.b, z25.b + __ sve_sub(z8, __ S, p1, z3); // sub z8.s, p1/m, z8.s, z3.s + __ sve_fabs(z7, __ D, p6, z24); // fabs z7.d, p6/m, z24.d + __ sve_fadd(z24, __ S, p7, z17); // fadd z24.s, p7/m, z24.s, z17.s + __ sve_fdiv(z10, __ S, p3, z30); // fdiv z10.s, p3/m, z10.s, z30.s + __ sve_fmax(z8, __ S, p6, z29); // fmax z8.s, p6/m, z8.s, z29.s + __ sve_fmin(z31, __ D, p5, z31); // fmin z31.d, p5/m, z31.d, z31.d + __ sve_fmul(z0, __ D, p5, z7); // fmul z0.d, p5/m, z0.d, z7.d + __ sve_fneg(z29, __ S, p6, z22); // fneg z29.s, p6/m, z22.s + __ sve_frintm(z29, __ S, p6, z20); // frintm z29.s, p6/m, z20.s + __ sve_frintn(z6, __ S, p4, z18); // frintn z6.s, p4/m, z18.s + __ sve_frintp(z26, __ S, p5, z8); // frintp z26.s, p5/m, z8.s + __ sve_fsqrt(z19, __ S, p2, z28); // fsqrt z19.s, p2/m, z28.s + __ sve_fsub(z17, __ D, p1, z30); // fsub z17.d, p1/m, z17.d, z30.d + __ sve_fmad(z24, __ S, p7, z14, z17); // fmad z24.s, p7/m, z14.s, z17.s + __ sve_fmla(z19, __ D, p2, z26, z11); // fmla z19.d, p2/m, z26.d, z11.d + __ sve_fmls(z0, __ D, p2, z15, z28); // fmls z0.d, p2/m, z15.d, z28.d + __ sve_fmsb(z23, __ D, p5, z28, z23); // fmsb z23.d, p5/m, z28.d, z23.d + __ sve_fnmad(z29, __ S, p6, z0, z27); // fnmad z29.s, p6/m, z0.s, z27.s + __ sve_fnmsb(z23, __ S, p3, z12, z4); // fnmsb z23.s, p3/m, z12.s, z4.s + __ sve_fnmla(z31, __ S, p6, z23, z20); // fnmla z31.s, p6/m, z23.s, z20.s + __ sve_fnmls(z2, __ D, p7, z29, z0); // fnmls z2.d, p7/m, z29.d, z0.d + __ sve_mla(z23, __ H, p0, z4, z5); // mla z23.h, p0/m, z4.h, z5.h + __ sve_mls(z28, __ H, p3, z17, z13); // mls z28.h, p3/m, z17.h, z13.h + __ sve_and(z8, z10, z8); // and z8.d, z10.d, z8.d + __ sve_eor(z19, z0, z29); // eor z19.d, z0.d, z29.d + __ sve_orr(z16, z13, z23); // orr z16.d, z13.d, z23.d + __ sve_bic(z23, z30, z13); // bic z23.d, z30.d, z13.d + __ sve_uzp1(z25, __ H, z22, z0); // uzp1 z25.h, z22.h, z0.h + __ sve_uzp2(z25, __ H, z30, z11); // uzp2 z25.h, z30.h, z11.h + __ sve_fabd(z14, __ S, p5, z22); // fabd z14.s, p5/m, z14.s, z22.s + __ sve_bext(z5, __ H, z18, z0); // bext z5.h, z18.h, z0.h + __ sve_bdep(z9, __ D, z2, z3); // bdep z9.d, z2.d, z3.d + __ sve_eor3(z14, z4, z29); // eor3 z14.d, z14.d, z4.d, z29.d + __ sve_sqadd(z14, __ D, p5, z4); // sqadd z14.d, p5/m, z14.d, z4.d + __ sve_sqsub(z27, __ S, p3, z22); // sqsub z27.s, p3/m, z27.s, z22.s + __ sve_uqadd(z31, __ S, p6, z11); // uqadd z31.s, p6/m, z31.s, z11.s + __ sve_uqsub(z12, __ B, p4, z28); // uqsub z12.b, p4/m, z12.b, z28.b // SVEReductionOp - __ sve_andv(v12, __ B, p4, z28); // andv b12, p4, z28.b - __ sve_orv(v28, __ D, p4, z4); // orv d28, p4, z4.d - __ sve_eorv(v6, __ S, p0, z15); // eorv s6, p0, z15.s - __ sve_smaxv(v1, __ S, p5, z18); // smaxv s1, p5, z18.s - __ sve_sminv(v2, __ H, p2, z4); // sminv h2, p2, z4.h - __ sve_umaxv(v11, __ S, p2, z28); // umaxv s11, p2, z28.s - __ sve_uminv(v3, __ H, p5, z31); // uminv h3, p5, z31.h - __ sve_fminv(v24, __ S, p5, z15); // fminv s24, p5, z15.s - __ sve_fmaxv(v6, __ S, p3, z8); // fmaxv s6, p3, z8.s - __ sve_fadda(v21, __ D, p7, z4); // fadda d21, p7, d21, z4.d - __ sve_uaddv(v24, __ B, p5, z6); // uaddv d24, p5, z6.b + __ sve_andv(v28, __ D, p4, z4); // andv d28, p4, z4.d + __ sve_orv(v6, __ S, p0, z15); // orv s6, p0, z15.s + __ sve_eorv(v1, __ S, p5, z18); // eorv s1, p5, z18.s + __ sve_smaxv(v2, __ H, p2, z4); // smaxv h2, p2, z4.h + __ sve_sminv(v11, __ S, p2, z28); // sminv s11, p2, z28.s + __ sve_umaxv(v3, __ H, p5, z31); // umaxv h3, p5, z31.h + __ sve_uminv(v24, __ H, p5, z15); // uminv h24, p5, z15.h + __ sve_fminv(v6, __ S, p3, z8); // fminv s6, p3, z8.s + __ sve_fmaxv(v21, __ D, p7, z4); // fmaxv d21, p7, z4.d + __ sve_fadda(v24, __ S, p5, z6); // fadda s24, p5, s24, z6.s + __ sve_uaddv(v4, __ D, p2, z9); // uaddv d4, p2, z9.d // AddWideNEONOp - __ saddwv(v4, v5, __ T8H, v6, __ T8B); // saddw v4.8H, v5.8H, v6.8B - __ saddwv2(v10, v11, __ T8H, v12, __ T16B); // saddw2 v10.8H, v11.8H, v12.16B - __ saddwv(v9, v10, __ T4S, v11, __ T4H); // saddw v9.4S, v10.4S, v11.4H - __ saddwv2(v25, v26, __ T4S, v27, __ T8H); // saddw2 v25.4S, v26.4S, v27.8H - __ saddwv(v10, v11, __ T2D, v12, __ T2S); // saddw v10.2D, v11.2D, v12.2S - __ saddwv2(v5, v6, __ T2D, v7, __ T4S); // saddw2 v5.2D, v6.2D, v7.4S - __ uaddwv(v31, v0, __ T8H, v1, __ T8B); // uaddw v31.8H, v0.8H, v1.8B - __ uaddwv2(v22, v23, __ T8H, v24, __ T16B); // uaddw2 v22.8H, v23.8H, v24.16B - __ uaddwv(v25, v26, __ T4S, v27, __ T4H); // uaddw v25.4S, v26.4S, v27.4H - __ uaddwv2(v15, v16, __ T4S, v17, __ T8H); // uaddw2 v15.4S, v16.4S, v17.8H - __ uaddwv(v3, v4, __ T2D, v5, __ T2S); // uaddw v3.2D, v4.2D, v5.2S - __ uaddwv2(v18, v19, __ T2D, v20, __ T4S); // uaddw2 v18.2D, v19.2D, v20.4S + __ saddwv(v10, v11, __ T8H, v12, __ T8B); // saddw v10.8H, v11.8H, v12.8B + __ saddwv2(v5, v6, __ T8H, v7, __ T16B); // saddw2 v5.8H, v6.8H, v7.16B + __ saddwv(v31, v0, __ T4S, v1, __ T4H); // saddw v31.4S, v0.4S, v1.4H + __ saddwv2(v22, v23, __ T4S, v24, __ T8H); // saddw2 v22.4S, v23.4S, v24.8H + __ saddwv(v25, v26, __ T2D, v27, __ T2S); // saddw v25.2D, v26.2D, v27.2S + __ saddwv2(v15, v16, __ T2D, v17, __ T4S); // saddw2 v15.2D, v16.2D, v17.4S + __ uaddwv(v3, v4, __ T8H, v5, __ T8B); // uaddw v3.8H, v4.8H, v5.8B + __ uaddwv2(v18, v19, __ T8H, v20, __ T16B); // uaddw2 v18.8H, v19.8H, v20.16B + __ uaddwv(v14, v15, __ T4S, v16, __ T4H); // uaddw v14.4S, v15.4S, v16.4H + __ uaddwv2(v10, v11, __ T4S, v12, __ T8H); // uaddw2 v10.4S, v11.4S, v12.8H + __ uaddwv(v2, v3, __ T2D, v4, __ T2S); // uaddw v2.2D, v3.2D, v4.2S + __ uaddwv2(v10, v11, __ T2D, v12, __ T4S); // uaddw2 v10.2D, v11.2D, v12.4S __ bind(forth); @@ -1463,312 +1469,313 @@ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x140004c9, 0x94000000, - 0x97ffffd4, 0x940004c6, 0x3400000a, 0x34fffa2a, - 0x3400986a, 0x35000008, 0x35fff9c8, 0x35009808, - 0xb400000b, 0xb4fff96b, 0xb40097ab, 0xb500001d, - 0xb5fff91d, 0xb500975d, 0x10000013, 0x10fff8b3, - 0x100096f3, 0x90000013, 0x36300016, 0x3637f836, - 0x36309676, 0x3758000c, 0x375ff7cc, 0x3758960c, + 0x14000000, 0x17ffffd7, 0x140004cb, 0x94000000, + 0x97ffffd4, 0x940004c8, 0x3400000a, 0x34fffa2a, + 0x340098aa, 0x35000008, 0x35fff9c8, 0x35009848, + 0xb400000b, 0xb4fff96b, 0xb40097eb, 0xb500001d, + 0xb5fff91d, 0xb500979d, 0x10000013, 0x10fff8b3, + 0x10009733, 0x90000013, 0x36300016, 0x3637f836, + 0x363096b6, 0x3758000c, 0x375ff7cc, 0x3758964c, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x540093e0, 0x54000001, 0x54fff541, 0x54009381, - 0x54000002, 0x54fff4e2, 0x54009322, 0x54000002, - 0x54fff482, 0x540092c2, 0x54000003, 0x54fff423, - 0x54009263, 0x54000003, 0x54fff3c3, 0x54009203, - 0x54000004, 0x54fff364, 0x540091a4, 0x54000005, - 0x54fff305, 0x54009145, 0x54000006, 0x54fff2a6, - 0x540090e6, 0x54000007, 0x54fff247, 0x54009087, - 0x54000008, 0x54fff1e8, 0x54009028, 0x54000009, - 0x54fff189, 0x54008fc9, 0x5400000a, 0x54fff12a, - 0x54008f6a, 0x5400000b, 0x54fff0cb, 0x54008f0b, - 0x5400000c, 0x54fff06c, 0x54008eac, 0x5400000d, - 0x54fff00d, 0x54008e4d, 0x5400000e, 0x54ffefae, - 0x54008dee, 0x5400000f, 0x54ffef4f, 0x54008d8f, + 0x54009420, 0x54000001, 0x54fff541, 0x540093c1, + 0x54000002, 0x54fff4e2, 0x54009362, 0x54000002, + 0x54fff482, 0x54009302, 0x54000003, 0x54fff423, + 0x540092a3, 0x54000003, 0x54fff3c3, 0x54009243, + 0x54000004, 0x54fff364, 0x540091e4, 0x54000005, + 0x54fff305, 0x54009185, 0x54000006, 0x54fff2a6, + 0x54009126, 0x54000007, 0x54fff247, 0x540090c7, + 0x54000008, 0x54fff1e8, 0x54009068, 0x54000009, + 0x54fff189, 0x54009009, 0x5400000a, 0x54fff12a, + 0x54008faa, 0x5400000b, 0x54fff0cb, 0x54008f4b, + 0x5400000c, 0x54fff06c, 0x54008eec, 0x5400000d, + 0x54fff00d, 0x54008e8d, 0x5400000e, 0x54ffefae, + 0x54008e2e, 0x5400000f, 0x54ffef4f, 0x54008dcf, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f, 0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf, 0xd503239f, 0xd50321df, 0xd50323ff, 0xd50323df, 0xd503211f, 0xd503233f, 0xd503231f, 0xd503215f, 0xd503237f, 0xd503235f, 0xd69f03e0, 0xd6bf03e0, - 0xd5033fdf, 0xd50330ff, 0xd503207f, 0xd50320ff, - 0xd5033e9f, 0xd50332bf, 0xd61f0200, 0xd63f0280, - 0xdac123ea, 0xdac127fb, 0xdac12be8, 0xdac12fe0, - 0xdac133e1, 0xdac137f5, 0xdac13bf1, 0xdac13ffd, - 0xdac147fd, 0xd61f0b9f, 0xd61f0c3f, 0xd63f0aff, - 0xd63f0ebf, 0xd51b4434, 0xd51b4216, 0xd53b443b, - 0xd53b4213, 0xd53b00eb, 0xd53b0030, 0xdac143e6, - 0xc8117c80, 0xc80afed8, 0xc85f7e6a, 0xc85ffca1, - 0xc89ffd1e, 0xc8dffe2c, 0x88097cee, 0x8801fe05, - 0x885f7d82, 0x885ffd8a, 0x889fff83, 0x88dfff4e, - 0x481e7dca, 0x4815fd2d, 0x485f7f76, 0x485ffe7c, - 0x489fffcb, 0x48dffc53, 0x08027c37, 0x0800fe0c, - 0x085f7ded, 0x085ffeb1, 0x089ffd6d, 0x08dffd1e, - 0xc87f3578, 0xc87feaa1, 0xc83b506d, 0xc82c87a6, - 0x887f1166, 0x887f93d0, 0x883e32a4, 0x883bf12f, - 0xf80011f9, 0xb81b1022, 0x381ea354, 0x79002fd7, - 0xf85cf39a, 0xb8580309, 0x385e218c, 0x784051e1, - 0x389e11d8, 0x789fa1f8, 0x79c01865, 0xb881131b, - 0xfc5dd3ad, 0xbc5d1137, 0xfc00900b, 0xbc181015, - 0xf818ec7d, 0xb81b8c91, 0x381efc40, 0x78007c3d, - 0xf857beb0, 0xb8413dd4, 0x385fddd6, 0x78409e2f, - 0x389eddea, 0x789e7d94, 0x78de3d55, 0xb8805c13, - 0xfc5cadc0, 0xbc428c23, 0xfc1a2dc4, 0xbc1caf92, - 0xf81475f6, 0xb81f95d1, 0x381e757e, 0x78014561, - 0xf8402436, 0xb85896e2, 0x385f4763, 0x785db4f0, - 0x3880374f, 0x789e25e7, 0x78dd0563, 0xb88166f9, - 0xfc529540, 0xbc4374d3, 0xfc1166ae, 0xbc1ba6c0, - 0xf820ea7b, 0xb82d68c8, 0x38367a04, 0x782f4b59, - 0xf878c8a4, 0xb8674a24, 0x386b78f1, 0x78776bc0, - 0x38a15aca, 0x78bedbd5, 0x78fcd94b, 0xb8aa4a7c, - 0xfc6ecbbe, 0xbc65d8a8, 0xfc2de919, 0xbc3a7b11, - 0xf91f1193, 0xb91ed5f7, 0x391ec9bd, 0x79182ceb, - 0xf95d4b0a, 0xb9581010, 0x395fc034, 0x795fb221, - 0x399d8731, 0x799efb3b, 0x79dd1a2e, 0xb998e4ea, - 0xfd583723, 0xbd5ea12c, 0xfd18dc38, 0xbd1b0e83, - 0x58ffda82, 0x1800001d, 0xf885d1c0, 0xd8ffda20, - 0xf8a77820, 0xf9980220, 0x1a030301, 0x3a140311, - 0x5a0d000b, 0x7a07015c, 0x9a1001e4, 0xba140182, - 0xda0d01bd, 0xfa0c00ce, 0x0b31f194, 0x2b206d7b, - 0xcb29f027, 0x6b210f63, 0x8b2cb34d, 0xab2a88b1, - 0xcb2f511e, 0xeb3332f3, 0x3a4533aa, 0x7a4d312b, - 0xba442146, 0xfa42818c, 0x3a466a02, 0x7a4b68ed, - 0xba4a9b6b, 0xfa4dd86d, 0x1a8a637a, 0x1a9cd6aa, - 0x5a9bd137, 0x5a8fd7aa, 0x9a95233e, 0x9a95c620, - 0xda9422b0, 0xda8397d3, 0x5ac00173, 0x5ac00418, - 0x5ac00b3b, 0x5ac0106e, 0x5ac0162e, 0xdac001e7, - 0xdac00798, 0xdac00b31, 0xdac00f42, 0xdac010bc, - 0xdac01759, 0xdac1021b, 0xdac104d1, 0xdac10995, - 0xdac10c80, 0xdac1136c, 0xdac11791, 0xdac1185c, - 0xdac11d51, 0xd71f09ee, 0xd71f0dc3, 0xd73f0b2f, - 0xd73f0e6e, 0x1ac40a05, 0x1ac40f3a, 0x1acc2042, - 0x1ac8263d, 0x1ac42867, 0x1ada2c99, 0x9ad10899, - 0x9ad10f40, 0x9ad521f7, 0x9adb263c, 0x9ac0286a, - 0x9ac92f27, 0x9bdd7de6, 0x9b427d4f, 0x1b0b2cf1, - 0x1b1ddcf7, 0x9b0b2f6e, 0x9b0cbf04, 0x9b2b728e, - 0x9b2cdd6d, 0x9bae275e, 0x9ba7954d, 0x7ec315fe, - 0x1ef0098c, 0x1ef21bff, 0x1ef02ab3, 0x1ef5394f, - 0x1efc4942, 0x1eff5bc7, 0x1ee28832, 0x7ea3d546, - 0x1e270979, 0x1e201981, 0x1e3d2a63, 0x1e263ae6, - 0x1e3b4b80, 0x1e2758a2, 0x1e39899d, 0x7ef8d58d, - 0x1e720913, 0x1e751b56, 0x1e622a74, 0x1e683ade, - 0x1e754a76, 0x1e755a4c, 0x1e638a06, 0x1fc373a3, - 0x1f0a35cf, 0x1f0aea4c, 0x1f2f74e7, 0x1f2032e0, - 0x1f4d21d8, 0x1f49d0ef, 0x1f7f43b3, 0x1f705522, - 0x1e20409e, 0x1e20c361, 0x1e214319, 0x1e21c2ae, - 0x1e22c0cd, 0x1e23c32c, 0x1ee243d9, 0x1e6042bc, - 0x1e60c2f0, 0x1e6143a5, 0x1e61c276, 0x1e62428d, - 0x1ee1c393, 0x1e3800d1, 0x9e3800ed, 0x1e78035c, - 0x9e7800d1, 0x1e220081, 0x9e22028e, 0x1e6202a7, - 0x9e6202fb, 0x1e24028d, 0x9e64039e, 0x1e3002aa, - 0x9e700225, 0x1e2601cb, 0x9e6602ad, 0x1e2701db, - 0x9e6702e4, 0x1e3e2300, 0x1e6e2180, 0x1e202228, - 0x1e602388, 0x29021b40, 0x297c78c0, 0x69660970, - 0xa908018f, 0xa9427ae7, 0x29a03cfa, 0x29fc3d4b, - 0x69c84033, 0xa988240e, 0xa9fa0d9b, 0x28a02d88, - 0x28c8408a, 0x68f87a6a, 0xa8ba09f8, 0xa8c52a18, - 0x280257be, 0x28727948, 0xa83868de, 0xa8440a98, - 0x0c40733f, 0x4cdfa1e5, 0x0ccd6cea, 0x4cdf260d, - 0x0d40c227, 0x4ddfcb30, 0x0dc7cc6b, 0x4c408ced, - 0x0cdf8769, 0x4d60c346, 0x0dffca17, 0x4de8cda6, - 0x4cda4834, 0x0c4049ef, 0x4d40e6dd, 0x4ddfe946, - 0x0dcfeccf, 0x4cdf0546, 0x0cc7006b, 0x0d60e32c, - 0x0dffe5eb, 0x0dfce8de, 0x0e31bb9b, 0x4e31bbbc, - 0x0e71b841, 0x4e71bbbc, 0x4eb1b841, 0x0e30aab4, - 0x4e30abdd, 0x0e70aa30, 0x4e70a9cd, 0x4eb0a96a, - 0x6e30fbdd, 0x0e31abdd, 0x2e31aa93, 0x4e31aaf6, - 0x6e31a96a, 0x0e71a8a4, 0x2e71a81f, 0x4e71aad5, - 0x6e71a928, 0x4eb1a81f, 0x6eb1aa93, 0x6eb0f96a, - 0x7e30fbbc, 0x7e70f862, 0x7eb0fb59, 0x7ef0f8c5, - 0x0ea0c883, 0x4ea0c928, 0x4ee0caf6, 0x2ea0ca93, - 0x6ea0c9cd, 0x6ee0c8c5, 0x0ea0dbdd, 0x4ea0db38, - 0x4ee0dad5, 0x0ea0eb7a, 0x4ea0eb38, 0x4ee0e883, - 0x2ea0db38, 0x6ea0db7a, 0x6ee0db17, 0x0e20ba0f, - 0x4e20bad5, 0x0e60b883, 0x4e60bb38, 0x0ea0b928, - 0x4ea0bb59, 0x4ee0bab4, 0x0ea0fa30, 0x4ea0fa51, - 0x4ee0f862, 0x0ef8f841, 0x4ef8f820, 0x2ea0fb38, - 0x6ea0f8a4, 0x6ee0f883, 0x2ef8f9ac, 0x6ef8f81f, - 0x2ea1fbbc, 0x6ea1f96a, 0x6ee1fb7a, 0x2ef9f862, - 0x6ef9f9ac, 0x2e205a72, 0x6e20581f, 0x0e231c41, - 0x4e2f1dcd, 0x0ebf1fdd, 0x4ea21c20, 0x2e351e93, - 0x6e2e1dac, 0x0e338651, 0x4e3886f6, 0x0e6f85cd, - 0x4e7e87bc, 0x0ea087fe, 0x4ea1841f, 0x4ee38441, - 0x0e3c0f7a, 0x4e3e0fbc, 0x0e660ca4, 0x4e600ffe, - 0x0ea60ca4, 0x4ea80ce6, 0x4ee00ffe, 0x2e3c0f7a, - 0x6e340e72, 0x2e6b0d49, 0x6e6a0d28, 0x2eae0dac, - 0x6ea20c20, 0x6ef60eb4, 0x0e23d441, 0x4e3ad738, - 0x4e64d462, 0x0e421420, 0x4e4b1549, 0x2e3a8738, - 0x6e3c877a, 0x2e728630, 0x6e6087fe, 0x2ea58483, - 0x6eac856a, 0x6ef98717, 0x0e2c2d6a, 0x4e262ca4, - 0x0e742e72, 0x4e642c62, 0x0ead2d8b, 0x4eaa2d28, - 0x4eec2d6a, 0x2e312e0f, 0x6e332e51, 0x2e642c62, - 0x6e6c2d6a, 0x2eae2dac, 0x6eae2dac, 0x6ef12e0f, - 0x0eafd5cd, 0x4ea4d462, 0x4ee9d507, 0x0ed616b4, - 0x4edc177a, 0x0e329e30, 0x4e269ca4, 0x0e649c62, - 0x4e669ca4, 0x0eae9dac, 0x4eb49e72, 0x2eb7d6d5, - 0x6eb2d630, 0x6ef4d672, 0x2ecd158b, 0x6ed716d5, - 0x2e39d717, 0x6e2ed5ac, 0x6e7cd77a, 0x2e591717, - 0x6e5e17bc, 0x2e30ddee, 0x6e2ddd8b, 0x6e7adf38, - 0x2e431c41, 0x6e4e1dac, 0x0e61941f, 0x4e6c956a, - 0x0eb29630, 0x4ea99507, 0x0e24cc62, 0x4e25cc83, - 0x4e6fcdcd, 0x0e550e93, 0x4e530e51, 0x2e729630, - 0x6e659483, 0x2ea39441, 0x6ead958b, 0x0ea0cffe, - 0x4ea7ccc5, 0x4eeacd28, 0x0ed10e0f, 0x4edf0fdd, - 0x2e20fffe, 0x6e22fc20, 0x6e76feb4, 0x2e493d07, - 0x6e563eb4, 0x0e396717, 0x4e3e67bc, 0x0e7766d5, - 0x4e7d679b, 0x0ebb6759, 0x4ea764c5, 0x2e236441, - 0x6e396717, 0x2e726630, 0x6e61641f, 0x2ea764c5, - 0x6eae65ac, 0x0e2ba549, 0x4e3ea7bc, 0x0e71a60f, - 0x4e7fa7dd, 0x0eb8a6f6, 0x4ea1a41f, 0x0e35f693, - 0x4e21f41f, 0x4e67f4c5, 0x0e5035ee, 0x4e543672, - 0x0e216c1f, 0x4e346e72, 0x0e7d6f9b, 0x4e766eb4, - 0x0eb26e30, 0x4eae6dac, 0x2e2d6d8b, 0x6e2b6d49, - 0x2e686ce6, 0x6e606ffe, 0x2eb36e51, 0x6ebd6f9b, - 0x0e3eafbc, 0x4e20affe, 0x0e69ad07, 0x4e6cad6a, - 0x0eb6aeb4, 0x4eacad6a, 0x2e26aca4, 0x6e3aaf38, - 0x2e73ae51, 0x6e73ae51, 0x2eb8aef6, 0x6ea5ac83, - 0x2e3fa7dd, 0x6e31a60f, 0x2e78a6f6, 0x6e75a693, - 0x2eb5a693, 0x6eb8a6f6, 0x0e64b462, 0x4e71b60f, - 0x0ea8b4e6, 0x4eaeb5ac, 0x0e322630, 0x4e2d258b, - 0x0e6f25cd, 0x4e792717, 0x0ea32441, 0x4ea027fe, - 0x0eb5f693, 0x4ea7f4c5, 0x4ef3f651, 0x0ec43462, - 0x4ed23630, 0x2eb8eef6, 0x6eafedcd, 0x6eeced6a, - 0x2ed72ed5, 0x6edf2fdd, 0x0fa810e6, 0x4fa38841, - 0x4fc1100f, 0x0fab5149, 0x4f8688a4, 0x4fcf59cd, - 0x2f859083, 0x4f8d898b, 0x6fce99ac, 0x0f41800f, - 0x4f6b8949, 0x0f8d818b, 0x4f838841, 0x0e343672, - 0x4e223420, 0x0e7b3759, 0x4e7c377a, 0x0eb93717, - 0x4ea43462, 0x4ef43672, 0x0e2e3dac, 0x4e263ca4, - 0x0e7e3fbc, 0x4e603ffe, 0x0ebf3fdd, 0x4eb23e30, - 0x4efd3f9b, 0x2e288ce6, 0x6e2b8d49, 0x2e7f8fdd, - 0x6e748e72, 0x2ea98d07, 0x6ea68ca4, 0x6ee98d07, - 0x2e31360f, 0x6e2b3549, 0x2e793717, 0x6e6a3528, - 0x2ea43462, 0x6ebe37bc, 0x6ef736d5, 0x2e213c1f, - 0x6e273cc5, 0x2e7d3f9b, 0x6e623c20, 0x2eb33e51, - 0x6eb13e0f, 0x6ee63ca4, 0x0e3ce77a, 0x4e2ae528, - 0x4e7ee7bc, 0x2eb8e6f6, 0x6ebde79b, 0x6efde79b, - 0x2e3be759, 0x6e39e717, 0x6e62e420, 0x65d220c2, - 0x659029f2, 0x65903ca3, 0x65d13683, 0x65913173, - 0x65d320cf, 0x25cb83c6, 0x254c0fbb, 0x251e0308, - 0x25473a05, 0x25943096, 0x259d9270, 0x2423d187, - 0x242786fa, 0x24f0f1b9, 0x24e42043, 0xba5fd3e3, - 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff, - 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff, - 0x4e010fe0, 0x5e040420, 0x4e081fe1, 0x4e0c1fe1, - 0x4e0a1fe1, 0x4e071fe1, 0x4e042c20, 0x4e062c20, - 0x4e052c20, 0x4e083c20, 0x0e0c3c20, 0x0e0a3c20, - 0x0e073c20, 0x9eae0020, 0x0f03f409, 0x6f03f40e, - 0x4cc0ac3f, 0x0ea1b820, 0x0ef9b820, 0x4ef9b820, - 0x4e21c862, 0x0e79c862, 0x4e79c862, 0x4e61b8a4, - 0x0e79b8a4, 0x4e79b8a4, 0x05a08020, 0x05104fe0, - 0x05505001, 0x05906fe2, 0x05d03005, 0x05101fea, - 0x05901feb, 0x0590cc0b, 0x0590de0b, 0x04b0e3e0, - 0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x047f9c20, - 0x04ff9c20, 0x04299420, 0x04319160, 0x0461943e, - 0x04a19020, 0x04038100, 0x040381a0, 0x040387e1, - 0x04438be2, 0x04c38fe3, 0x040181e0, 0x04018100, - 0x04018621, 0x04418b22, 0x04418822, 0x04818c23, - 0x040081e0, 0x04008120, 0x04008761, 0x04008621, - 0x04408822, 0x04808c23, 0x042053ff, 0x047f5401, - 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2, - 0x25f8f007, 0x2538dfea, 0x25b8dfeb, 0xa400a3e0, - 0xa420a7e0, 0xa4484be0, 0xa467afe0, 0xa4a8a7ea, - 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, - 0xe400fbf6, 0xe408ffff, 0xe420e7e0, 0xe4484be0, - 0xe460efe0, 0xe547e400, 0xe4014be0, 0xe4a84fe0, - 0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08, - 0x0420e3e9, 0x0460e3ea, 0x04a0e3eb, 0x04e0e3ec, - 0x25104042, 0x25104871, 0x25904861, 0x25904c92, - 0x05344020, 0x05744041, 0x05b44062, 0x05f44083, - 0x252c8840, 0x253c1420, 0x25681572, 0x25a21ce3, - 0x25ea1e34, 0x253c0421, 0x25680572, 0x25a20ce3, - 0x25ea0e34, 0x0522c020, 0x05e6c0a4, 0x2401a001, - 0x2443a051, 0x24858881, 0x24c78cd1, 0x24850891, - 0x24c70cc1, 0x250f9001, 0x25508051, 0x25802491, - 0x25df28c1, 0x25850c81, 0x251e10d1, 0x65816001, - 0x65c36051, 0x65854891, 0x65c74cc1, 0x05733820, - 0x05b238a4, 0x05f138e6, 0x0570396a, 0x65d0a001, - 0x65d6a443, 0x65d4a826, 0x6594ac26, 0x6554ac26, - 0x6556ac26, 0x6552ac26, 0x65cbac85, 0x65caac01, - 0x6589ac85, 0x6588ac01, 0x65c9ac85, 0x65c8ac01, - 0x65dea833, 0x659ca509, 0x65d8a801, 0x65dcac01, - 0x655cb241, 0x0520a1e0, 0x0521a601, 0x052281e0, - 0x05238601, 0x04a14026, 0x042244a6, 0x046344a6, - 0x04a444a6, 0x04e544a7, 0x0568aca7, 0x05b23230, - 0x05302a30, 0x05702a30, 0x05b02a30, 0x05f02a30, - 0x853040af, 0xc5b040af, 0xe57080af, 0xe5b080af, - 0x25034440, 0x254054c4, 0x25034640, 0x25415a05, - 0x25834440, 0x25c54489, 0x250b5d3a, 0x2550dc20, - 0x2518e3e1, 0x2518e021, 0x2518e0a1, 0x2518e121, - 0x2518e1a1, 0x2558e3e2, 0x2558e042, 0x2558e0c2, - 0x2558e142, 0x2598e3e3, 0x2598e063, 0x2598e0e3, - 0x2598e163, 0x25d8e3e4, 0x25d8e084, 0x25d8e104, - 0x25d8e184, 0x2518e407, 0x05214800, 0x05614800, - 0x05a14800, 0x05e14800, 0x05214c00, 0x05614c00, - 0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001, - 0x05a18610, 0x05e18610, 0x0420bc31, 0x05271e11, - 0x6545e891, 0x6585e891, 0x65c5e891, 0x6545c891, - 0x6585c891, 0x65c5c891, 0x052c8020, 0x056c8020, - 0x05ac8020, 0x05ec8020, 0x45b0c210, 0x45f1c231, - 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, - 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, - 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, - 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, - 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, - 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, - 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, - 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, - 0xf8268290, 0xf82d028c, 0xf8281299, 0xf8332160, - 0xf8383286, 0xf83f520e, 0xf82640e0, 0xf82f7353, - 0xf82962ea, 0xf8b58396, 0xf8a201e3, 0xf8b310f4, - 0xf8a420fd, 0xf8a03209, 0xf8b45097, 0xf8b042ea, - 0xf8ab70d9, 0xf8bf61b0, 0xf8f7802c, 0xf8ee02a9, - 0xf8f011fa, 0xf8e421e4, 0xf8e833c6, 0xf8e4523d, - 0xf8fd413a, 0xf8ef7162, 0xf8fd60e3, 0xf86182bb, - 0xf870010e, 0xf8701336, 0xf86522b4, 0xf8703217, - 0xf87e5294, 0xf8604264, 0xf8787284, 0xf8646358, - 0xb8338102, 0xb828030e, 0xb83012df, 0xb824203f, - 0xb82a3194, 0xb82050e9, 0xb8384090, 0xb83b7146, - 0xb83b61b8, 0xb8b082df, 0xb8b603f4, 0xb8bd11c9, - 0xb8b42287, 0xb8bc3169, 0xb8ae528c, 0xb8a14138, - 0xb8b3726d, 0xb8b060b0, 0xb8e08183, 0xb8e801ef, - 0xb8f011e4, 0xb8fe2005, 0xb8ea3376, 0xb8e35120, - 0xb8f3415d, 0xb8f87284, 0xb8e763b8, 0xb86e8175, - 0xb87b01ed, 0xb87f11d1, 0xb863221e, 0xb87630f4, - 0xb8745023, 0xb87a4133, 0xb87072b1, 0xb8606044, - 0xce262dd8, 0xce1179d5, 0xce638c6c, 0xce83c537, - 0xce73807c, 0xce7a84f7, 0xcec081d5, 0xce7a8905, - 0x25a0d245, 0x2521c3f1, 0x05808909, 0x05400d2c, - 0x0500154b, 0x25a0c91f, 0x2561db50, 0x0582d157, - 0x054026a8, 0x05001eca, 0x25a0c0b6, 0x25a1da23, - 0x05822dc5, 0x05400e11, 0x0500155e, 0x25e0d502, - 0x25a1de17, 0x0580040c, 0x0540000f, 0x050287b3, - 0x25a0d66d, 0x2521cb02, 0x05801d74, 0x05400d38, - 0x05007854, 0x2560def5, 0x25e1dfb6, 0x05801c3a, - 0x0540158b, 0x05000e05, 0x046a00e2, 0x047a0493, - 0x659e0062, 0x65d408b4, 0x658d05bd, 0x046113ce, - 0x04e3187c, 0x04291729, 0x042e1d5a, 0x04d6b8f4, - 0x04c010d4, 0x045a03ad, 0x04108029, 0x041b19fb, - 0x04d9be24, 0x041aa302, 0x04191dba, 0x04d38e16, - 0x04d18571, 0x04100210, 0x04d7a6fc, 0x049eb15c, - 0x04981cf1, 0x05678f04, 0x05648969, 0x048816c4, - 0x044a01e4, 0x04c91f44, 0x044b1745, 0x0401033f, - 0x04dca468, 0x65c09b07, 0x658d9e38, 0x65868fca, - 0x65879ba8, 0x65c297ff, 0x04ddb4e0, 0x6582badd, - 0x6580ba9d, 0x6581b246, 0x658db51a, 0x65818b93, - 0x65f487d1, 0x65ee0e3c, 0x65b8396a, 0x65f7af8b, - 0x65f4def4, 0x65e6e378, 0x65ed4c8d, 0x65a6769a, - 0x049d5c1d, 0x04c864a3, 0x042d322d, 0x04a83148, - 0x047d3013, 0x04f731b0, 0x052d6bd7, 0x05606ed9, - 0x65889d79, 0x4556b2ee, 0x4540b645, 0x04223869, - 0x445887ae, 0x44da948e, 0x44998edb, 0x449b997f, - 0x041a338c, 0x04d8309c, 0x049921e6, 0x04883641, - 0x044a2882, 0x04892b8b, 0x044b37e3, 0x658735f8, - 0x65862d06, 0x65d83c95, 0x040134d8, 0x0e2610a4, - 0x4e2c116a, 0x0e6b1149, 0x4e7b1359, 0x0eac116a, - 0x4ea710c5, 0x2e21101f, 0x6e3812f6, 0x2e7b1359, - 0x6e71120f, 0x2ea51083, 0x6eb41272, + 0xd5033fdf, 0xd50330ff, 0xd503101a, 0xd503207f, + 0xd50320ff, 0xd503329f, 0xd50339bf, 0xd61f0280, + 0xd63f0140, 0xdac123fb, 0xdac127e8, 0xdac12be0, + 0xdac12fe1, 0xdac133f5, 0xdac137f1, 0xdac13bfd, + 0xdac13ffd, 0xdac147fc, 0xd61f083f, 0xd61f0eff, + 0xd63f0abf, 0xd63f0e9f, 0xd51b4436, 0xd51b421b, + 0xd53b4433, 0xd53b420b, 0xd53b00f0, 0xd53b0026, + 0xd53be0d1, 0xdac143e0, 0xc8047f0a, 0xc816fe6a, + 0xc85f7ca1, 0xc85ffd1e, 0xc89ffe2c, 0xc8dffdc9, + 0x88077ca1, 0x8810fd82, 0x885f7d8a, 0x885fff83, + 0x889fff4e, 0x88dffd5e, 0x480e7db5, 0x4809ff76, + 0x485f7e7c, 0x485fffcb, 0x489ffc53, 0x48dffee2, + 0x08017d80, 0x0810fded, 0x085f7eb1, 0x085ffd6d, + 0x089ffd1e, 0x08dffdb8, 0xc87f074b, 0xc87fedb5, + 0xc83430c3, 0xc821989d, 0x887f2e06, 0x887ff984, + 0x88353f9b, 0x8829d0cf, 0xf8183238, 0xb800a341, + 0x381ef00f, 0x7800d231, 0xf8431325, 0xb858c1f3, + 0x39401c74, 0x785e7093, 0x389fa26e, 0x789cb27c, + 0x78c0d36e, 0xb8988306, 0xfc457052, 0xbc5d0380, + 0xfc01739f, 0xbc02b32b, 0xf8147d0e, 0xb81a3d39, + 0x381f4ef0, 0x781c8d36, 0xf846df0b, 0xb8434e75, + 0x385feee6, 0x78415d07, 0x389e1d84, 0x789fcd68, + 0x78de4eae, 0xb89b9de0, 0xfc41dc5b, 0xbc410ead, + 0xfc010f6c, 0xbc1a5f22, 0xf81eb5aa, 0xb8032427, + 0x3800546a, 0x781df4c5, 0xf85626cd, 0xb859644d, + 0x385e860d, 0x785ea458, 0x389ff6e3, 0x789d5513, + 0x78df86e2, 0xb89a9793, 0xfc5af46f, 0xbc5b17ac, + 0xfc184435, 0xbc19d527, 0xf8226a85, 0xb83ed94e, + 0x383cd824, 0x783af945, 0xf87c7856, 0xb87c780c, + 0x386ff908, 0x786e5ab1, 0x38a8fb11, 0x78bef94a, + 0x78e07af1, 0xb8a07bd9, 0xfc775b05, 0xbc6f7876, + 0xfc30fba4, 0xbc3cda87, 0xf91ff41b, 0xb91f3115, + 0x391c0fb1, 0x791c685b, 0xf9586021, 0xb958b23d, + 0x39598921, 0x795d3077, 0x399d0675, 0x7998d8f3, + 0x79dbd02a, 0xb99d068a, 0xfd5d11a0, 0xbd58d76b, + 0xfd1ac72d, 0xbd1d9c14, 0x5800001a, 0x18ffda33, + 0xf8991100, 0xd8007880, 0xf8a758e0, 0xf9989d80, + 0x1a0b0298, 0x3a1c01a0, 0x5a0400ea, 0x7a02020f, + 0x9a1d028c, 0xba0e01ad, 0xda140186, 0xfa19022c, + 0x0b2b877e, 0x2b21c8ee, 0xcb3ba47d, 0x6b3ae9a0, + 0x8b256a36, 0xab28efd1, 0xcb37ce6a, 0xeb254fa6, + 0x3a498264, 0x7a4a72c2, 0xba4c91aa, 0xfa502303, + 0x3a4b68ed, 0x7a4a9b6b, 0xba4dd86d, 0xfa50cb45, + 0x1a9cd2aa, 0x1a9bd537, 0x5a8fd3aa, 0x5a95273e, + 0x9a95c220, 0x9a9426b0, 0xda8393d3, 0xda980573, + 0x5ac0033b, 0x5ac0046e, 0x5ac00a2e, 0x5ac011e7, + 0x5ac01798, 0xdac00331, 0xdac00742, 0xdac008bc, + 0xdac00f59, 0xdac0121b, 0xdac014d1, 0xdac10195, + 0xdac10480, 0xdac10b6c, 0xdac10f91, 0xdac1105c, + 0xdac11551, 0xdac119cf, 0xdac11c6e, 0xd71f0b2f, + 0xd71f0e6e, 0xd73f08b0, 0xd73f0c9a, 0x1ac20899, + 0x1add0d82, 0x1ac72111, 0x1ad92483, 0x1ad92b44, + 0x1ac02e24, 0x9ad70a3a, 0x9adc0eaf, 0x9aca2371, + 0x9ac72403, 0x9ac62939, 0x9acf2faf, 0x9bd17c4a, + 0x9b4b7d67, 0x1b1d5cf7, 0x1b0baf6e, 0x9b0c3f04, + 0x9b0bf28e, 0x9b2c5d6d, 0x9b2ea75e, 0x9ba7154d, + 0x9ba3adfd, 0x7edf160c, 0x1ef30a5f, 0x1eef1a15, + 0x1ee22aaa, 0x1ee73b8a, 0x1ef24bfe, 0x1ee65841, + 0x1ef9886a, 0x7ea1d4eb, 0x1e23080c, 0x1e261bb3, + 0x1e2028d7, 0x1e223b7c, 0x1e3d48e5, 0x1e2d5b2c, + 0x1e338b0c, 0x7ef6d648, 0x1e740aba, 0x1e7e1853, + 0x1e762916, 0x1e6c3ab3, 0x1e664ab2, 0x1e635870, + 0x1e7c887d, 0x1fca35cf, 0x1f0a6a4c, 0x1f0ff4e7, + 0x1f2032e0, 0x1f2d21d8, 0x1f4950ef, 0x1f5fc3b3, + 0x1f705522, 0x1f616c9e, 0x1e204319, 0x1e20c2ae, + 0x1e2140cd, 0x1e21c32c, 0x1e22c3d9, 0x1e23c2bc, + 0x1ee242f0, 0x1e6043a5, 0x1e60c276, 0x1e61428d, + 0x1e61c393, 0x1e6240d2, 0x1ee1c0ee, 0x1e38035c, + 0x9e3800d1, 0x1e780081, 0x9e7802ad, 0x1e2202a7, + 0x9e2202fb, 0x1e62028d, 0x9e62037f, 0x1e2402aa, + 0x9e640225, 0x1e3001cb, 0x9e7002ad, 0x1e2601da, + 0x9e660304, 0x1e2703b8, 0x9e6701cc, 0x1e3c2220, + 0x1e6022c0, 0x1e2020c8, 0x1e602368, 0x293c30db, + 0x29602e6e, 0x697a31e0, 0xa9025ee9, 0xa975134f, + 0x29ac20d1, 0x29f20887, 0x69fe26ce, 0xa9b0530d, + 0xa9c62d48, 0x28b21618, 0x28f06920, 0x68f00a38, + 0xa8bd45da, 0xa8c357be, 0x28325d51, 0x286c1bda, + 0xa804229e, 0xa8437536, 0x0c40702d, 0x4cdfa201, + 0x0cd36f36, 0x4cdf2759, 0x0d40c1e9, 0x4ddfcb2c, + 0x0dddcdc4, 0x4c408e31, 0x0cdf8777, 0x4d60c302, + 0x0dffc80b, 0x4df6cd82, 0x4ccd49b5, 0x0c40496c, + 0x4d40e5d2, 0x4ddfeab9, 0x0ddbee14, 0x4cdf0420, + 0x0cdd0360, 0x0d60e232, 0x0dffe705, 0x0df4e8bc, + 0x0e31b841, 0x4e31bab4, 0x0e71bbdd, 0x4e71ba30, + 0x4eb1b9cd, 0x0e30a96a, 0x4e30abdd, 0x0e70abdd, + 0x4e70aa93, 0x4eb0aaf6, 0x6e30f96a, 0x0e31a8a4, + 0x2e31a81f, 0x4e31aad5, 0x6e31a928, 0x0e71a81f, + 0x2e71aa93, 0x4e71a96a, 0x6e71abbc, 0x4eb1a862, + 0x6eb1ab59, 0x6eb0f8c5, 0x7e30f883, 0x7e70f928, + 0x7eb0faf6, 0x7ef0fa93, 0x0ea0c9cd, 0x4ea0c8c5, + 0x4ee0cbdd, 0x2ea0cb38, 0x6ea0cad5, 0x6ee0cb7a, + 0x0ea0db38, 0x4ea0d883, 0x4ee0db38, 0x0ea0eb7a, + 0x4ea0eb17, 0x4ee0ea0f, 0x2ea0dad5, 0x6ea0d883, + 0x6ee0db38, 0x0e20b928, 0x4e20bb59, 0x0e60bab4, + 0x4e60ba30, 0x0ea0ba51, 0x4ea0b862, 0x4ee0b841, + 0x0ea0f820, 0x4ea0fb38, 0x4ee0f8a4, 0x0ef8f883, + 0x4ef8f9ac, 0x2ea0f81f, 0x6ea0fbbc, 0x6ee0f96a, + 0x2ef8fb7a, 0x6ef8f862, 0x2ea1f9ac, 0x6ea1fa72, + 0x6ee1f81f, 0x2ef9f841, 0x6ef9f9cd, 0x2e205bdd, + 0x6e205820, 0x0e351e93, 0x4e2e1dac, 0x0eb31e51, + 0x4eb81ef6, 0x2e2f1dcd, 0x6e3e1fbc, 0x0e2087fe, + 0x4e21841f, 0x0e638441, 0x4e7c877a, 0x0ebe87bc, + 0x4ea684a4, 0x4ee087fe, 0x0e260ca4, 0x4e280ce6, + 0x0e600ffe, 0x4e7c0f7a, 0x0eb40e72, 0x4eab0d49, + 0x4eea0d28, 0x2e2e0dac, 0x6e220c20, 0x2e760eb4, + 0x6e630c41, 0x2eba0f38, 0x6ea40c62, 0x6ee20c20, + 0x0e2bd549, 0x4e3ad738, 0x4e7cd77a, 0x0e521630, + 0x4e4017fe, 0x2e258483, 0x6e2c856a, 0x2e798717, + 0x6e6c856a, 0x2ea684a4, 0x6eb48672, 0x6ee48462, + 0x0e2d2d8b, 0x4e2a2d28, 0x0e6c2d6a, 0x4e712e0f, + 0x0eb32e51, 0x4ea42c62, 0x4eec2d6a, 0x2e2e2dac, + 0x6e2e2dac, 0x2e712e0f, 0x6e6f2dcd, 0x2ea42c62, + 0x6ea92d07, 0x6ef62eb4, 0x0ebcd77a, 0x4eb2d630, + 0x4ee6d4a4, 0x0ec41462, 0x4ec614a4, 0x0e2e9dac, + 0x4e349e72, 0x0e779ed5, 0x4e729e30, 0x0eb49e72, + 0x4ead9d8b, 0x2eb7d6d5, 0x6eb9d717, 0x6eeed5ac, + 0x2edc177a, 0x6ed91717, 0x2e3ed7bc, 0x6e30d5ee, + 0x6e6dd58b, 0x2e5a1738, 0x6e431441, 0x2e2eddac, + 0x6e21dc1f, 0x6e6cdd6a, 0x2e521e30, 0x6e491d07, + 0x0e649462, 0x4e659483, 0x0eaf95cd, 0x4eb59693, + 0x0e33ce51, 0x4e32ce30, 0x4e65cc83, 0x0e430c41, + 0x4e4d0d8b, 0x2e6097fe, 0x6e6794c5, 0x2eaa9528, + 0x6eb1960f, 0x0ebfcfdd, 0x4ea0cffe, 0x4ee2cc20, + 0x0ed60eb4, 0x4ec90d07, 0x2e36feb4, 0x6e39ff17, + 0x6e7effbc, 0x2e573ed5, 0x6e5d3f9b, 0x0e3b6759, + 0x4e2764c5, 0x0e636441, 0x4e796717, 0x0eb26630, + 0x4ea1641f, 0x2e2764c5, 0x6e2e65ac, 0x2e6b6549, + 0x6e7e67bc, 0x2eb1660f, 0x6ebf67dd, 0x0e38a6f6, + 0x4e21a41f, 0x0e75a693, 0x4e61a41f, 0x0ea7a4c5, + 0x4eb0a5ee, 0x0e34f672, 0x4e21f41f, 0x4e74f672, + 0x0e5d379b, 0x4e5636b4, 0x0e326e30, 0x4e2e6dac, + 0x0e6d6d8b, 0x4e6b6d49, 0x0ea86ce6, 0x4ea06ffe, + 0x2e336e51, 0x6e3d6f9b, 0x2e7e6fbc, 0x6e606ffe, + 0x2ea96d07, 0x6eac6d6a, 0x0e36aeb4, 0x4e2cad6a, + 0x0e66aca4, 0x4e7aaf38, 0x0eb3ae51, 0x4eb3ae51, + 0x2e38aef6, 0x6e25ac83, 0x2e7fafdd, 0x6e71ae0f, + 0x2eb8aef6, 0x6eb5ae93, 0x2e35a693, 0x6e38a6f6, + 0x2e64a462, 0x6e71a60f, 0x2ea8a4e6, 0x6eaea5ac, + 0x0e72b630, 0x4e6db58b, 0x0eafb5cd, 0x4eb9b717, + 0x0e232441, 0x4e2027fe, 0x0e752693, 0x4e6724c5, + 0x0eb32651, 0x4ea42462, 0x0eb2f630, 0x4eb8f6f6, + 0x4eeff5cd, 0x0ecc356a, 0x4ed736d5, 0x2ebfefdd, + 0x6ebdef9b, 0x6eeeedac, 0x2edd2f9b, 0x6ec52c83, + 0x0f81100f, 0x4fab8949, 0x4fc618a4, 0x0faf51cd, + 0x4fa58083, 0x4fcd598b, 0x2fae91ac, 0x4f81800f, + 0x6fcb9949, 0x0f5d818b, 0x4f438841, 0x0fa28020, + 0x4faf89cd, 0x0e393717, 0x4e243462, 0x0e743672, + 0x4e6e35ac, 0x0ea634a4, 0x4ebe37bc, 0x4ee037fe, + 0x0e3f3fdd, 0x4e323e30, 0x0e7d3f9b, 0x4e683ce6, + 0x0eab3d49, 0x4ebf3fdd, 0x4ef43e72, 0x2e298d07, + 0x6e268ca4, 0x2e698d07, 0x6e718e0f, 0x2eab8d49, + 0x6eb98f17, 0x6eea8d28, 0x2e243462, 0x6e3e37bc, + 0x2e7736d5, 0x6e61341f, 0x2ea734c5, 0x6ebd379b, + 0x6ee23420, 0x2e333e51, 0x6e313e0f, 0x2e663ca4, + 0x6e7c3f7a, 0x2eaa3d28, 0x6ebe3fbc, 0x6ef83ef6, + 0x0e3de79b, 0x4e3de79b, 0x4e7be759, 0x2eb9e717, + 0x6ea2e420, 0x6ee6e4a4, 0x2e28e4e6, 0x6e34e672, + 0x6e62e420, 0x659229e2, 0x65903cb3, 0x65d03683, + 0x65913163, 0x65d120df, 0x65d323c6, 0x25dd8ecd, + 0x25d2063e, 0x258b0947, 0x2501358b, 0x25823032, + 0x254181d6, 0x24a646a1, 0x24719e73, 0x24233cd8, + 0x24adf4c2, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, + 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f, + 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x5e040420, + 0x4e081fe1, 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, + 0x4e042c20, 0x4e062c20, 0x4e052c20, 0x4e083c20, + 0x0e0c3c20, 0x0e0a3c20, 0x0e073c20, 0x9eae0020, + 0x0f03f409, 0x6f03f40e, 0x4cc0ac3f, 0x0ea1b820, + 0x0ef9b820, 0x4ef9b820, 0x4e21c862, 0x0e79c862, + 0x4e79c862, 0x4e61b8a4, 0x0e79b8a4, 0x4e79b8a4, + 0x05a08020, 0x05104fe0, 0x05505001, 0x05906fe2, + 0x05d03005, 0x05101fea, 0x05901feb, 0x0590cc0b, + 0x0590de0b, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, + 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420, + 0x04319160, 0x0461943e, 0x04a19020, 0x04038100, + 0x040381a0, 0x040387e1, 0x04438be2, 0x04c38fe3, + 0x040181e0, 0x04018100, 0x04018621, 0x04418b22, + 0x04418822, 0x04818c23, 0x040081e0, 0x04008120, + 0x04008761, 0x04008621, 0x04408822, 0x04808c23, + 0x042053ff, 0x047f5401, 0x25208028, 0x2538cfe0, + 0x2578d001, 0x25b8efe2, 0x25f8f007, 0x2538dfea, + 0x25b8dfeb, 0xa400a3e0, 0xa420a7e0, 0xa4484be0, + 0xa467afe0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe, + 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff, + 0xe420e7e0, 0xe4484be0, 0xe460efe0, 0xe547e400, + 0xe4014be0, 0xe4a84fe0, 0xe5f15000, 0x858043e0, + 0x85a043ff, 0xe59f5d08, 0x0420e3e9, 0x0460e3ea, + 0x04a0e3eb, 0x04e0e3ec, 0x25104042, 0x25104871, + 0x25904861, 0x25904c92, 0x05344020, 0x05744041, + 0x05b44062, 0x05f44083, 0x252c8840, 0x253c1420, + 0x25681572, 0x25a21ce3, 0x25ea1e34, 0x253c0421, + 0x25680572, 0x25a20ce3, 0x25ea0e34, 0x0522c020, + 0x05e6c0a4, 0x2401a001, 0x2443a051, 0x24858881, + 0x24c78cd1, 0x24850891, 0x24c70cc1, 0x250f9001, + 0x25508051, 0x25802491, 0x25df28c1, 0x25850c81, + 0x251e10d1, 0x65816001, 0x65c36051, 0x65854891, + 0x65c74cc1, 0x05733820, 0x05b238a4, 0x05f138e6, + 0x0570396a, 0x65d0a001, 0x65d6a443, 0x65d4a826, + 0x6594ac26, 0x6554ac26, 0x6556ac26, 0x6552ac26, + 0x65cbac85, 0x65caac01, 0x6589ac85, 0x6588ac01, + 0x65c9ac85, 0x65c8ac01, 0x65dea833, 0x659ca509, + 0x65d8a801, 0x65dcac01, 0x655cb241, 0x0520a1e0, + 0x0521a601, 0x052281e0, 0x05238601, 0x04a14026, + 0x042244a6, 0x046344a6, 0x04a444a6, 0x04e544a7, + 0x0568aca7, 0x05b23230, 0x05302a30, 0x05702a30, + 0x05b02a30, 0x05f02a30, 0x853040af, 0xc5b040af, + 0xe57080af, 0xe5b080af, 0x25034440, 0x254054c4, + 0x25034640, 0x25415a05, 0x25834440, 0x25c54489, + 0x250b5d3a, 0x2550dc20, 0x2518e3e1, 0x2518e021, + 0x2518e0a1, 0x2518e121, 0x2518e1a1, 0x2558e3e2, + 0x2558e042, 0x2558e0c2, 0x2558e142, 0x2598e3e3, + 0x2598e063, 0x2598e0e3, 0x2598e163, 0x25d8e3e4, + 0x25d8e084, 0x25d8e104, 0x25d8e184, 0x2518e407, + 0x05214800, 0x05614800, 0x05a14800, 0x05e14800, + 0x05214c00, 0x05614c00, 0x05a14c00, 0x05e14c00, + 0x05304001, 0x05314001, 0x05a18610, 0x05e18610, + 0x0420bc31, 0x05271e11, 0x6545e891, 0x6585e891, + 0x65c5e891, 0x6545c891, 0x6585c891, 0x65c5c891, + 0x052c8020, 0x056c8020, 0x05ac8020, 0x05ec8020, + 0x45b0c210, 0x45f1c231, 0x1e601000, 0x1e603000, + 0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000, + 0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000, + 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, + 0x1e6e1000, 0x1e6e3000, 0x1e701000, 0x1e703000, + 0x1e721000, 0x1e723000, 0x1e741000, 0x1e743000, + 0x1e761000, 0x1e763000, 0x1e781000, 0x1e783000, + 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, + 0x1e7e1000, 0x1e7e3000, 0xf82c8114, 0xf8390274, + 0xf820130b, 0xf82623f4, 0xf82e30d0, 0xf82051e7, + 0xf833413a, 0xf82a72b7, 0xf836605c, 0xf8a3826f, + 0xf8b40087, 0xf8bd1007, 0xf8a92290, 0xf8b73204, + 0xf8aa5177, 0xf8b943e6, 0xf8b072ed, 0xf8ac61c1, + 0xf8e98215, 0xf8fa008f, 0xf8e41110, 0xf8e6209e, + 0xf8fd33b1, 0xf8fa51e9, 0xf8e243ab, 0xf8e37027, + 0xf8fb61f5, 0xf86e81e8, 0xf87600b9, 0xf87411f5, + 0xf87723d0, 0xf8743014, 0xf8645313, 0xf8644094, + 0xf878727a, 0xf8626108, 0xb82e81f8, 0xb83f0096, + 0xb83f1141, 0xb834200c, 0xb8293307, 0xb8305364, + 0xb826436a, 0xb838720d, 0xb83f62d6, 0xb8b483bf, + 0xb8a9028e, 0xb8a71394, 0xb8a921cb, 0xb8ac3034, + 0xb8b85269, 0xb8ad41f3, 0xb8b07005, 0xb8a3610c, + 0xb8ef820f, 0xb8e403cf, 0xb8e51140, 0xb8f6207b, + 0xb8e03269, 0xb8fd530a, 0xb8e440f4, 0xb8f871dd, + 0xb8f5636b, 0xb86d83f0, 0xb871006e, 0xb87e12d0, + 0xb8742287, 0xb8633341, 0xb8735209, 0xb8714015, + 0xb8647302, 0xb86e6166, 0xce3179d5, 0xce035c6c, + 0xce788c69, 0xce93bc7c, 0xce758347, 0xce6884ae, + 0xcec080ba, 0xce718a56, 0x2520c9e0, 0x2561c834, + 0x05800d2c, 0x0540154b, 0x0500093f, 0x25e0d78f, + 0x25a1c81c, 0x058026a8, 0x05401eca, 0x05008816, + 0x25e0c5d7, 0x25e1d35a, 0x05800e11, 0x0540155e, + 0x05026c22, 0x25e0ccf6, 0x2521c0a8, 0x0580000f, + 0x054287b3, 0x0500b04d, 0x2560d481, 0x25a1d84c, + 0x05800d38, 0x05407854, 0x050015d5, 0x25e0dabf, + 0x25a1cf12, 0x0580158b, 0x05400e05, 0x05003682, + 0x047a0093, 0x043e0462, 0x65d400b4, 0x658d09bd, + 0x658107ce, 0x04e3107c, 0x04291b29, 0x042e155a, + 0x04e71f54, 0x04d6b0d4, 0x044003ad, 0x041a0029, + 0x041099fb, 0x04db1e24, 0x0419a302, 0x041abdba, + 0x04d90e16, 0x04d38571, 0x04118210, 0x04d006fc, + 0x0497b15c, 0x049ebcf1, 0x04580f04, 0x05278969, + 0x05a496c4, 0x044801e4, 0x04ca1f44, 0x04491745, + 0x040b033f, 0x04810468, 0x04dcbb07, 0x65809e38, + 0x658d8fca, 0x65869ba8, 0x65c797ff, 0x65c294e0, + 0x049dbadd, 0x6582ba9d, 0x6580b246, 0x6581b51a, + 0x658dab93, 0x65c187d1, 0x65b19dd8, 0x65eb0b53, + 0x65fc29e0, 0x65f7b797, 0x65bbd81d, 0x65a4ed97, + 0x65b45aff, 0x65e07fa2, 0x04454097, 0x044d6e3c, + 0x04283148, 0x04bd3013, 0x047731b0, 0x04ed33d7, + 0x05606ad9, 0x056b6fd9, 0x658896ce, 0x4540b245, + 0x45c3b449, 0x04243bae, 0x44d8948e, 0x449a8edb, + 0x4499997f, 0x441b938c, 0x04da309c, 0x049821e6, + 0x04993641, 0x04482882, 0x048a2b8b, 0x044937e3, + 0x044b35f8, 0x65872d06, 0x65c63c95, 0x659834d8, + 0x04c12924, 0x0e2c116a, 0x4e2710c5, 0x0e61101f, + 0x4e7812f6, 0x0ebb1359, 0x4eb1120f, 0x2e251083, + 0x6e341272, 0x2e7011ee, 0x6e6c116a, 0x2ea41062, + 0x6eac116a, }; // END Generated code -- do not edit diff --git a/test/hotspot/jtreg/compiler/onSpinWait/TestOnSpinWaitAArch64.java b/test/hotspot/jtreg/compiler/onSpinWait/TestOnSpinWaitAArch64.java index 3bcd60ed50c..e487e921dd2 100644 --- a/test/hotspot/jtreg/compiler/onSpinWait/TestOnSpinWaitAArch64.java +++ b/test/hotspot/jtreg/compiler/onSpinWait/TestOnSpinWaitAArch64.java @@ -22,7 +22,7 @@ */ /** - * @test TestOnSpinWaitAArch64 + * @test id=TestOnSpinWaitAArch64 * @summary Checks that java.lang.Thread.onSpinWait is intrinsified with instructions specified with '-XX:OnSpinWaitInst' and '-XX:OnSpinWaitInstCount' * @bug 8186670 * @library /test/lib @@ -41,6 +41,22 @@ * @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 sb 1 */ +/** + * @test id=TestOnSpinWaitAArch64-wfet + * @summary Checks that java.lang.Thread.onSpinWait is intrinsified when -XX:OnSpinWaitInst=wfet is used + * @bug 8366441 + * @library /test/lib + * + * @requires vm.flagless + * @requires (os.arch=="aarch64" & os.family=="linux") + * @requires vm.debug + * + * @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c2 wfet 1 1 + * @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c2 wfet 1 1000 + * @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 wfet 1 1 + * @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 wfet 1 1000 + */ + package compiler.onSpinWait; import java.util.Arrays; @@ -56,6 +72,7 @@ public class TestOnSpinWaitAArch64 { String compiler = args[0]; String spinWaitInst = args[1]; String spinWaitInstCount = args[2]; + String spinWaitDelay = (args.length >= 4 ? args[3] : ""); ArrayList command = new ArrayList(); command.add("-XX:+IgnoreUnrecognizedVMOptions"); command.add("-showversion"); @@ -70,8 +87,14 @@ public class TestOnSpinWaitAArch64 { throw new RuntimeException("Unknown compiler: " + compiler); } command.add("-Xbatch"); + if ("wfet".equals(spinWaitInst)) { + command.add("-XX:+UnlockExperimentalVMOptions"); + } command.add("-XX:OnSpinWaitInst=" + spinWaitInst); command.add("-XX:OnSpinWaitInstCount=" + spinWaitInstCount); + if (!spinWaitDelay.isEmpty()) { + command.add("-XX:OnSpinWaitDelay=" + spinWaitDelay); + } command.add("-XX:CompileCommand=compileonly," + Launcher.class.getName() + "::" + "test"); command.add("-XX:CompileCommand=print," + Launcher.class.getName() + "::" + "test"); command.add(Launcher.class.getName()); @@ -85,6 +108,14 @@ public class TestOnSpinWaitAArch64 { return; } + if ("wfet".equals(spinWaitInst) && + (analyzer.contains("CPU does not support the SB instruction") || + analyzer.contains("CPU does not support the FEAT_ECV") || + analyzer.contains("CPU does not support the WFET instruction"))) { + System.out.println("Skipping the test. The CPU does not support SB or WFET instruction, or FEAT_ECV."); + return; + } + analyzer.shouldHaveExitValue(0); System.out.println(analyzer.getOutput()); @@ -101,6 +132,9 @@ public class TestOnSpinWaitAArch64 { return "3f2003d5"; } else if ("sb".equals(spinWaitInst)) { return "ff3003d5"; + } else if ("wfet".equals(spinWaitInst)) { + // This assumes rscratch1 is r8. + return "081003d5"; } else { throw new RuntimeException("Unknown spin wait instruction: " + spinWaitInst); } @@ -166,7 +200,7 @@ public class TestOnSpinWaitAArch64 { // When code is disassembled, we have one instruction per line. // Otherwise, there can be multiple hex instructions separated by '|'. foundCount += (int)Arrays.stream(line.split("\\|")) - .takeWhile(i -> i.startsWith(expectedInst)) + .filter(i -> i.startsWith(expectedInst)) .count(); }