From 58b6fc5baa0931fa6f2aa37bf0bb125497cf6cc9 Mon Sep 17 00:00:00 2001 From: Tobias Hartmann Date: Mon, 30 Sep 2024 10:56:52 +0000 Subject: [PATCH] 8341197: [BACKOUT] 8322770: Implement C2 VectorizedHashCode on AArch64 Reviewed-by: shade, jpai --- src/hotspot/cpu/aarch64/aarch64.ad | 78 -- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 68 +- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 91 -- .../cpu/aarch64/c2_MacroAssembler_aarch64.hpp | 3 - .../cpu/aarch64/stubGenerator_aarch64.cpp | 312 ----- .../cpu/aarch64/stubRoutines_aarch64.cpp | 7 +- .../cpu/aarch64/stubRoutines_aarch64.hpp | 26 +- .../cpu/aarch64/vm_version_aarch64.cpp | 4 - src/hotspot/share/utilities/intpow.hpp | 46 - test/hotspot/gtest/aarch64/aarch64-asmtest.py | 111 -- test/hotspot/gtest/aarch64/asmtest.out.h | 1189 ++++++++--------- 11 files changed, 580 insertions(+), 1355 deletions(-) delete mode 100644 src/hotspot/share/utilities/intpow.hpp diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 0a93c27c268..39eae43a287 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -4931,60 +4931,6 @@ operand vRegD_V7() interface(REG_INTER); %} -operand vRegD_V12() -%{ - constraint(ALLOC_IN_RC(v12_reg)); - match(RegD); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - -operand vRegD_V13() -%{ - constraint(ALLOC_IN_RC(v13_reg)); - match(RegD); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - -operand vRegD_V14() -%{ - constraint(ALLOC_IN_RC(v14_reg)); - match(RegD); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - -operand vRegD_V15() -%{ - constraint(ALLOC_IN_RC(v15_reg)); - match(RegD); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - -operand vRegD_V16() -%{ - constraint(ALLOC_IN_RC(v16_reg)); - match(RegD); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - -operand vRegD_V17() -%{ - constraint(ALLOC_IN_RC(v17_reg)); - match(RegD); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - operand pReg() %{ constraint(ALLOC_IN_RC(pr_reg)); @@ -16605,30 +16551,6 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, ins_pipe(pipe_class_memory); %} -instruct arrays_hashcode(iRegP_R1 ary, iRegI_R2 cnt, iRegI_R0 result, immI basic_type, - vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3, - vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7, - vRegD_V12 vtmp8, vRegD_V13 vtmp9, vRegD_V14 vtmp10, - vRegD_V15 vtmp11, vRegD_V16 vtmp12, vRegD_V17 vtmp13, - rFlagsReg cr) -%{ - match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type))); - effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, - TEMP vtmp7, TEMP vtmp8, TEMP vtmp9, TEMP vtmp10, TEMP vtmp11, TEMP vtmp12, TEMP vtmp13, - USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr); - - format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %} - ins_encode %{ - address tpc = __ arrays_hashcode($ary$$Register, $cnt$$Register, $result$$Register, - (BasicType)$basic_type$$constant); - if (tpc == nullptr) { - ciEnv::current()->record_failure("CodeCache is full"); - return; - } - %} - ins_pipe(pipe_class_memory); -%} - instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr) %{ match(Set result (CountPositives ary1 len)); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index a5e0e2665af..28a0cc2c7d9 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -287,11 +287,6 @@ public: f(r->raw_encoding(), lsb + 4, lsb); } - //<0-15>reg: As `rf(FloatRegister)`, but only the lower 16 FloatRegisters are allowed. - void lrf(FloatRegister r, int lsb) { - f(r->raw_encoding(), lsb + 3, lsb); - } - void prf(PRegister r, int lsb) { f(r->raw_encoding(), lsb + 3, lsb); } @@ -770,7 +765,6 @@ public: #define f current_insn.f #define sf current_insn.sf #define rf current_insn.rf -#define lrf current_insn.lrf #define srf current_insn.srf #define zrf current_insn.zrf #define prf current_insn.prf @@ -1596,16 +1590,6 @@ public: #undef INSN - // Load/store a register, but with a BasicType parameter. Loaded signed integer values are - // extended to 64 bits. - void load(Register Rt, const Address &adr, BasicType bt) { - int op = (is_signed_subword_type(bt) || bt == T_INT) ? 0b10 : 0b01; - ld_st2(Rt, adr, exact_log2(type2aelembytes(bt)), op); - } - void store(Register Rt, const Address &adr, BasicType bt) { - ld_st2(Rt, adr, exact_log2(type2aelembytes(bt)), 0b00); - } - /* SIMD extensions * * We just use FloatRegister in the following. They are exactly the same @@ -2603,7 +2587,6 @@ template INSN(addpv, 0, 0b101111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S - INSN(smlalv, 0, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S @@ -2877,28 +2860,6 @@ template // FMULX - Vector - Scalar INSN(fmulxvs, 1, 0b1001); -#undef INSN - -#define INSN(NAME, op1, op2) \ - void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) { \ - starti; \ - assert(T == T4H || T == T8H || T == T2S || T == T4S, "invalid arrangement"); \ - assert(index >= 0 && \ - ((T == T2S && index <= 1) || (T != T2S && index <= 3) || (T == T8H && index <= 7)), \ - "invalid index"); \ - assert((T != T4H && T != T8H) || Vm->encoding() < 16, "invalid source SIMD&FP register"); \ - f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01111, 28, 24); \ - if (T == T4H || T == T8H) { \ - f(0b01, 23, 22), f(index & 0b11, 21, 20), lrf(Vm, 16), f(index >> 2 & 1, 11); \ - } else { \ - f(0b10, 23, 22), f(index & 1, 21), rf(Vm, 16), f(index >> 1, 11); \ - } \ - f(op2, 15, 12), f(0, 10), rf(Vn, 5), rf(Vd, 0); \ - } - - // MUL - Vector - Scalar - INSN(mulvs, 0, 0b1000); - #undef INSN // Floating-point Reciprocal Estimate @@ -3062,33 +3023,6 @@ public: umov(Xd, Vn, T, index); } - protected: - void _xaddwv(bool is_unsigned, FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement Ta, - FloatRegister Vm, SIMD_Arrangement Tb) { - starti; - assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement"); - f(0, 31), f((int)Tb & 1, 30), f(is_unsigned ? 1 : 0, 29), f(0b01110, 28, 24); - f((int)(Ta >> 1) - 1, 23, 22), f(1, 21), rf(Vm, 16), f(0b000100, 15, 10), rf(Vn, 5), rf(Vd, 0); - } - - public: -#define INSN(NAME, assertion, is_unsigned) \ - void NAME(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement Ta, FloatRegister Vm, \ - SIMD_Arrangement Tb) { \ - assert((assertion), "invalid arrangement"); \ - _xaddwv(is_unsigned, Vd, Vn, Ta, Vm, Tb); \ - } - -public: - - INSN(uaddwv, Tb == T8B || Tb == T4H || Tb == T2S, /*is_unsigned*/true) - INSN(uaddwv2, Tb == T16B || Tb == T8H || Tb == T4S, /*is_unsigned*/true) - INSN(saddwv, Tb == T8B || Tb == T4H || Tb == T2S, /*is_unsigned*/false) - INSN(saddwv2, Tb == T16B || Tb == T8H || Tb == T4S, /*is_unsigned*/false) - -#undef INSN - - private: void _pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { starti; diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index ab2bd7d782c..b4c12ecd4a8 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -33,7 +33,6 @@ #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/globalDefinitions.hpp" -#include "utilities/powerOfTwo.hpp" #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -47,96 +46,6 @@ typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); -// jdk.internal.util.ArraysSupport.vectorizedHashCode -address C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register result, - BasicType eltype) { - assert_different_registers(ary, cnt, result, rscratch1, rscratch2); - - Register tmp1 = rscratch1, tmp2 = rscratch2; - - Label TAIL, STUB_SWITCH, STUB_SWITCH_OUT, LOOP, BR_BASE, LARGE, DONE; - - // Vectorization factor. Number of array elements loaded to one SIMD&FP registers by the stubs. We - // use 8H load arrangements for chars and shorts and 8B for booleans and bytes. It's possible to - // use 4H for chars and shorts instead, but using 8H gives better performance. - const size_t vf = eltype == T_BOOLEAN || eltype == T_BYTE ? 8 - : eltype == T_CHAR || eltype == T_SHORT ? 8 - : eltype == T_INT ? 4 - : 0; - guarantee(vf, "unsupported eltype"); - - // Unroll factor for the scalar loop below. The value is chosen based on performance analysis. - const size_t unroll_factor = 4; - - switch (eltype) { - case T_BOOLEAN: - BLOCK_COMMENT("arrays_hashcode(unsigned byte) {"); - break; - case T_CHAR: - BLOCK_COMMENT("arrays_hashcode(char) {"); - break; - case T_BYTE: - BLOCK_COMMENT("arrays_hashcode(byte) {"); - break; - case T_SHORT: - BLOCK_COMMENT("arrays_hashcode(short) {"); - break; - case T_INT: - BLOCK_COMMENT("arrays_hashcode(int) {"); - break; - default: - ShouldNotReachHere(); - } - - // large_arrays_hashcode(T_INT) performs worse than the scalar loop below when the Neon loop - // implemented by the stub executes just once. Call the stub only if at least two iterations will - // be executed. - const size_t large_threshold = eltype == T_INT ? vf * 2 : vf; - cmpw(cnt, large_threshold); - br(Assembler::HS, LARGE); - - bind(TAIL); - - // The andr performs cnt % uf where uf = unroll_factor. The subtract shifted by 3 offsets past - // uf - (cnt % uf) pairs of load + madd insns i.e. it only executes cnt % uf load + madd pairs. - // Iteration eats up the remainder, uf elements at a time. - assert(is_power_of_2(unroll_factor), "can't use this value to calculate the jump target PC"); - andr(tmp2, cnt, unroll_factor - 1); - adr(tmp1, BR_BASE); - sub(tmp1, tmp1, tmp2, ext::sxtw, 3); - movw(tmp2, 0x1f); - br(tmp1); - - bind(LOOP); - for (size_t i = 0; i < unroll_factor; ++i) { - load(tmp1, Address(post(ary, type2aelembytes(eltype))), eltype); - maddw(result, result, tmp2, tmp1); - } - bind(BR_BASE); - subsw(cnt, cnt, unroll_factor); - br(Assembler::HS, LOOP); - - b(DONE); - - bind(LARGE); - - RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_arrays_hashcode(eltype)); - assert(stub.target() != nullptr, "array_hashcode stub has not been generated"); - address tpc = trampoline_call(stub); - if (tpc == nullptr) { - DEBUG_ONLY(reset_labels(TAIL, BR_BASE)); - postcond(pc() == badAddress); - return nullptr; - } - - bind(DONE); - - BLOCK_COMMENT("} // arrays_hashcode"); - - postcond(pc() != badAddress); - return pc(); -} - void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register tmpReg, Register tmp2Reg, Register tmp3Reg) { Register oop = objectReg; diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index 28cc401a1b2..43e60ae5a48 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -35,9 +35,6 @@ enum shift_kind kind = Assembler::LSL, unsigned shift = 0); public: - // jdk.internal.util.ArraysSupport.vectorizedHashCode - address arrays_hashcode(Register ary, Register cnt, Register result, BasicType eltype); - // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3); void fast_unlock(Register object, Register box, Register tmp, Register tmp2); diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 3dbde1ae824..eb235f8472c 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -53,9 +53,7 @@ #include "runtime/stubRoutines.hpp" #include "utilities/align.hpp" #include "utilities/checkedCast.hpp" -#include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" -#include "utilities/intpow.hpp" #include "utilities/powerOfTwo.hpp" #ifdef COMPILER2 #include "opto/runtime.hpp" @@ -5313,309 +5311,6 @@ class StubGenerator: public StubCodeGenerator { return entry; } - // result = r0 - return value. Contains initial hashcode value on entry. - // ary = r1 - array address - // cnt = r2 - elements count - // Clobbers: v0-v13, rscratch1, rscratch2 - address generate_large_arrays_hashcode(BasicType eltype) { - const Register result = r0, ary = r1, cnt = r2; - const FloatRegister vdata0 = v3, vdata1 = v2, vdata2 = v1, vdata3 = v0; - const FloatRegister vmul0 = v4, vmul1 = v5, vmul2 = v6, vmul3 = v7; - const FloatRegister vpow = v8; // powers of 31: <31^3, ..., 31^0> - const FloatRegister vpowm = v9; - - assert_different_registers(ary, cnt, result); - assert_different_registers(vdata0, vdata1, vdata2, vdata3, vmul0, vmul1, vmul2, vmul3, vpow, - vpowm); - - Label SMALL_LOOP, LARGE_LOOP_PREHEADER, LARGE_LOOP, TAIL, TAIL_SHORTCUT, BR_BASE; - - unsigned int vf; // vectorization factor - bool multiply_by_halves; - Assembler::SIMD_Arrangement load_arrangement; - switch (eltype) { - case T_BOOLEAN: - case T_BYTE: - load_arrangement = Assembler::T8B; - multiply_by_halves = true; - vf = 8; - break; - case T_CHAR: - case T_SHORT: - load_arrangement = Assembler::T8H; - multiply_by_halves = true; - vf = 8; - break; - case T_INT: - load_arrangement = Assembler::T4S; - multiply_by_halves = false; - vf = 4; - break; - default: - ShouldNotReachHere(); - } - - // Unroll factor - const unsigned uf = 4; - - // Effective vectorization factor - const unsigned evf = vf * uf; - - __ align(CodeEntryAlignment); - - const char *mark_name = ""; - switch (eltype) { - case T_BOOLEAN: - mark_name = "_large_arrays_hashcode_boolean"; - break; - case T_BYTE: - mark_name = "_large_arrays_hashcode_byte"; - break; - case T_CHAR: - mark_name = "_large_arrays_hashcode_char"; - break; - case T_SHORT: - mark_name = "_large_arrays_hashcode_short"; - break; - case T_INT: - mark_name = "_large_arrays_hashcode_int"; - break; - default: - mark_name = "_large_arrays_hashcode_incorrect_type"; - __ should_not_reach_here(); - }; - - StubCodeMark mark(this, "StubRoutines", mark_name); - - address entry = __ pc(); - __ enter(); - - // Put 0-3'th powers of 31 into a single SIMD register together. The register will be used in - // the SMALL and LARGE LOOPS' epilogues. The initialization is hoisted here and the register's - // value shouldn't change throughout both loops. - __ movw(rscratch1, intpow(31U, 3)); - __ mov(vpow, Assembler::S, 0, rscratch1); - __ movw(rscratch1, intpow(31U, 2)); - __ mov(vpow, Assembler::S, 1, rscratch1); - __ movw(rscratch1, intpow(31U, 1)); - __ mov(vpow, Assembler::S, 2, rscratch1); - __ movw(rscratch1, intpow(31U, 0)); - __ mov(vpow, Assembler::S, 3, rscratch1); - - __ mov(vmul0, Assembler::T16B, 0); - __ mov(vmul0, Assembler::S, 3, result); - - __ andr(rscratch2, cnt, (uf - 1) * vf); - __ cbz(rscratch2, LARGE_LOOP_PREHEADER); - - __ movw(rscratch1, intpow(31U, multiply_by_halves ? vf / 2 : vf)); - __ mov(vpowm, Assembler::S, 0, rscratch1); - - // SMALL LOOP - __ bind(SMALL_LOOP); - - __ ld1(vdata0, load_arrangement, Address(__ post(ary, vf * type2aelembytes(eltype)))); - __ mulvs(vmul0, Assembler::T4S, vmul0, vpowm, 0); - __ subsw(rscratch2, rscratch2, vf); - - if (load_arrangement == Assembler::T8B) { - // Extend 8B to 8H to be able to use vector multiply - // instructions - assert(load_arrangement == Assembler::T8B, "expected to extend 8B to 8H"); - if (is_signed_subword_type(eltype)) { - __ sxtl(vdata0, Assembler::T8H, vdata0, load_arrangement); - } else { - __ uxtl(vdata0, Assembler::T8H, vdata0, load_arrangement); - } - } - - switch (load_arrangement) { - case Assembler::T4S: - __ addv(vmul0, load_arrangement, vmul0, vdata0); - break; - case Assembler::T8B: - case Assembler::T8H: - assert(is_subword_type(eltype), "subword type expected"); - if (is_signed_subword_type(eltype)) { - __ saddwv(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T4H); - } else { - __ uaddwv(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T4H); - } - break; - default: - __ should_not_reach_here(); - } - - // Process the upper half of a vector - if (load_arrangement == Assembler::T8B || load_arrangement == Assembler::T8H) { - __ mulvs(vmul0, Assembler::T4S, vmul0, vpowm, 0); - if (is_signed_subword_type(eltype)) { - __ saddwv2(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T8H); - } else { - __ uaddwv2(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T8H); - } - } - - __ br(Assembler::HI, SMALL_LOOP); - - // SMALL LOOP'S EPILOQUE - __ lsr(rscratch2, cnt, exact_log2(evf)); - __ cbnz(rscratch2, LARGE_LOOP_PREHEADER); - - __ mulv(vmul0, Assembler::T4S, vmul0, vpow); - __ addv(vmul0, Assembler::T4S, vmul0); - __ umov(result, vmul0, Assembler::S, 0); - - // TAIL - __ bind(TAIL); - - // The andr performs cnt % vf. The subtract shifted by 3 offsets past vf - 1 - (cnt % vf) pairs - // of load + madd insns i.e. it only executes cnt % vf load + madd pairs. - assert(is_power_of_2(vf), "can't use this value to calculate the jump target PC"); - __ andr(rscratch2, cnt, vf - 1); - __ bind(TAIL_SHORTCUT); - __ adr(rscratch1, BR_BASE); - __ sub(rscratch1, rscratch1, rscratch2, ext::uxtw, 3); - __ movw(rscratch2, 0x1f); - __ br(rscratch1); - - for (size_t i = 0; i < vf - 1; ++i) { - __ load(rscratch1, Address(__ post(ary, type2aelembytes(eltype))), - eltype); - __ maddw(result, result, rscratch2, rscratch1); - } - __ bind(BR_BASE); - - __ leave(); - __ ret(lr); - - // LARGE LOOP - __ bind(LARGE_LOOP_PREHEADER); - - __ lsr(rscratch2, cnt, exact_log2(evf)); - - if (multiply_by_halves) { - // 31^4 - multiplier between lower and upper parts of a register - __ movw(rscratch1, intpow(31U, vf / 2)); - __ mov(vpowm, Assembler::S, 1, rscratch1); - // 31^28 - remainder of the iteraion multiplier, 28 = 32 - 4 - __ movw(rscratch1, intpow(31U, evf - vf / 2)); - __ mov(vpowm, Assembler::S, 0, rscratch1); - } else { - // 31^16 - __ movw(rscratch1, intpow(31U, evf)); - __ mov(vpowm, Assembler::S, 0, rscratch1); - } - - __ mov(vmul3, Assembler::T16B, 0); - __ mov(vmul2, Assembler::T16B, 0); - __ mov(vmul1, Assembler::T16B, 0); - - __ bind(LARGE_LOOP); - - __ mulvs(vmul3, Assembler::T4S, vmul3, vpowm, 0); - __ mulvs(vmul2, Assembler::T4S, vmul2, vpowm, 0); - __ mulvs(vmul1, Assembler::T4S, vmul1, vpowm, 0); - __ mulvs(vmul0, Assembler::T4S, vmul0, vpowm, 0); - - __ ld1(vdata3, vdata2, vdata1, vdata0, load_arrangement, - Address(__ post(ary, evf * type2aelembytes(eltype)))); - - if (load_arrangement == Assembler::T8B) { - // Extend 8B to 8H to be able to use vector multiply - // instructions - assert(load_arrangement == Assembler::T8B, "expected to extend 8B to 8H"); - if (is_signed_subword_type(eltype)) { - __ sxtl(vdata3, Assembler::T8H, vdata3, load_arrangement); - __ sxtl(vdata2, Assembler::T8H, vdata2, load_arrangement); - __ sxtl(vdata1, Assembler::T8H, vdata1, load_arrangement); - __ sxtl(vdata0, Assembler::T8H, vdata0, load_arrangement); - } else { - __ uxtl(vdata3, Assembler::T8H, vdata3, load_arrangement); - __ uxtl(vdata2, Assembler::T8H, vdata2, load_arrangement); - __ uxtl(vdata1, Assembler::T8H, vdata1, load_arrangement); - __ uxtl(vdata0, Assembler::T8H, vdata0, load_arrangement); - } - } - - switch (load_arrangement) { - case Assembler::T4S: - __ addv(vmul3, load_arrangement, vmul3, vdata3); - __ addv(vmul2, load_arrangement, vmul2, vdata2); - __ addv(vmul1, load_arrangement, vmul1, vdata1); - __ addv(vmul0, load_arrangement, vmul0, vdata0); - break; - case Assembler::T8B: - case Assembler::T8H: - assert(is_subword_type(eltype), "subword type expected"); - if (is_signed_subword_type(eltype)) { - __ saddwv(vmul3, vmul3, Assembler::T4S, vdata3, Assembler::T4H); - __ saddwv(vmul2, vmul2, Assembler::T4S, vdata2, Assembler::T4H); - __ saddwv(vmul1, vmul1, Assembler::T4S, vdata1, Assembler::T4H); - __ saddwv(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T4H); - } else { - __ uaddwv(vmul3, vmul3, Assembler::T4S, vdata3, Assembler::T4H); - __ uaddwv(vmul2, vmul2, Assembler::T4S, vdata2, Assembler::T4H); - __ uaddwv(vmul1, vmul1, Assembler::T4S, vdata1, Assembler::T4H); - __ uaddwv(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T4H); - } - break; - default: - __ should_not_reach_here(); - } - - // Process the upper half of a vector - if (load_arrangement == Assembler::T8B || load_arrangement == Assembler::T8H) { - __ mulvs(vmul3, Assembler::T4S, vmul3, vpowm, 1); - __ mulvs(vmul2, Assembler::T4S, vmul2, vpowm, 1); - __ mulvs(vmul1, Assembler::T4S, vmul1, vpowm, 1); - __ mulvs(vmul0, Assembler::T4S, vmul0, vpowm, 1); - if (is_signed_subword_type(eltype)) { - __ saddwv2(vmul3, vmul3, Assembler::T4S, vdata3, Assembler::T8H); - __ saddwv2(vmul2, vmul2, Assembler::T4S, vdata2, Assembler::T8H); - __ saddwv2(vmul1, vmul1, Assembler::T4S, vdata1, Assembler::T8H); - __ saddwv2(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T8H); - } else { - __ uaddwv2(vmul3, vmul3, Assembler::T4S, vdata3, Assembler::T8H); - __ uaddwv2(vmul2, vmul2, Assembler::T4S, vdata2, Assembler::T8H); - __ uaddwv2(vmul1, vmul1, Assembler::T4S, vdata1, Assembler::T8H); - __ uaddwv2(vmul0, vmul0, Assembler::T4S, vdata0, Assembler::T8H); - } - } - - __ subsw(rscratch2, rscratch2, 1); - __ br(Assembler::HI, LARGE_LOOP); - - __ mulv(vmul3, Assembler::T4S, vmul3, vpow); - __ addv(vmul3, Assembler::T4S, vmul3); - __ umov(result, vmul3, Assembler::S, 0); - - __ mov(rscratch2, intpow(31U, vf)); - - __ mulv(vmul2, Assembler::T4S, vmul2, vpow); - __ addv(vmul2, Assembler::T4S, vmul2); - __ umov(rscratch1, vmul2, Assembler::S, 0); - __ maddw(result, result, rscratch2, rscratch1); - - __ mulv(vmul1, Assembler::T4S, vmul1, vpow); - __ addv(vmul1, Assembler::T4S, vmul1); - __ umov(rscratch1, vmul1, Assembler::S, 0); - __ maddw(result, result, rscratch2, rscratch1); - - __ mulv(vmul0, Assembler::T4S, vmul0, vpow); - __ addv(vmul0, Assembler::T4S, vmul0); - __ umov(rscratch1, vmul0, Assembler::S, 0); - __ maddw(result, result, rscratch2, rscratch1); - - __ andr(rscratch2, cnt, vf - 1); - __ cbnz(rscratch2, TAIL_SHORTCUT); - - __ leave(); - __ ret(lr); - - return entry; - } - address generate_dsin_dcos(bool isCos) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin"); @@ -8562,13 +8257,6 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::aarch64::_large_array_equals = generate_large_array_equals(); } - // arrays_hascode stub for large arrays. - StubRoutines::aarch64::_large_arrays_hashcode_boolean = generate_large_arrays_hashcode(T_BOOLEAN); - StubRoutines::aarch64::_large_arrays_hashcode_byte = generate_large_arrays_hashcode(T_BYTE); - StubRoutines::aarch64::_large_arrays_hashcode_char = generate_large_arrays_hashcode(T_CHAR); - StubRoutines::aarch64::_large_arrays_hashcode_int = generate_large_arrays_hashcode(T_INT); - StubRoutines::aarch64::_large_arrays_hashcode_short = generate_large_arrays_hashcode(T_SHORT); - // byte_array_inflate stub for large arrays. StubRoutines::aarch64::_large_byte_array_inflate = generate_large_byte_array_inflate(); diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp index dee615df5a5..80875a3b3cd 100644 --- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -48,11 +48,6 @@ address StubRoutines::aarch64::_zero_blocks = nullptr; address StubRoutines::aarch64::_count_positives = nullptr; address StubRoutines::aarch64::_count_positives_long = nullptr; address StubRoutines::aarch64::_large_array_equals = nullptr; -address StubRoutines::aarch64::_large_arrays_hashcode_boolean = nullptr; -address StubRoutines::aarch64::_large_arrays_hashcode_byte = nullptr; -address StubRoutines::aarch64::_large_arrays_hashcode_char = nullptr; -address StubRoutines::aarch64::_large_arrays_hashcode_int = nullptr; -address StubRoutines::aarch64::_large_arrays_hashcode_short = nullptr; address StubRoutines::aarch64::_compare_long_string_LL = nullptr; address StubRoutines::aarch64::_compare_long_string_UU = nullptr; address StubRoutines::aarch64::_compare_long_string_LU = nullptr; diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp index 7d3b72a8836..e6438908ce4 100644 --- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -62,11 +62,6 @@ class aarch64 { static address _zero_blocks; static address _large_array_equals; - static address _large_arrays_hashcode_boolean; - static address _large_arrays_hashcode_byte; - static address _large_arrays_hashcode_char; - static address _large_arrays_hashcode_int; - static address _large_arrays_hashcode_short; static address _compare_long_string_LL; static address _compare_long_string_LU; static address _compare_long_string_UL; @@ -150,25 +145,6 @@ class aarch64 { return _large_array_equals; } - static address large_arrays_hashcode(BasicType eltype) { - switch (eltype) { - case T_BOOLEAN: - return _large_arrays_hashcode_boolean; - case T_BYTE: - return _large_arrays_hashcode_byte; - case T_CHAR: - return _large_arrays_hashcode_char; - case T_SHORT: - return _large_arrays_hashcode_short; - case T_INT: - return _large_arrays_hashcode_int; - default: - ShouldNotReachHere(); - } - - return nullptr; - } - static address compare_long_string_LL() { return _compare_long_string_LL; } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index 81e39113afa..d71162ac568 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -574,10 +574,6 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); } - - if (FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { - FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); - } #endif _spin_wait = get_spin_wait_desc(); diff --git a/src/hotspot/share/utilities/intpow.hpp b/src/hotspot/share/utilities/intpow.hpp deleted file mode 100644 index 0b441a55c4c..00000000000 --- a/src/hotspot/share/utilities/intpow.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2024, Arm Limited. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_UTILITIES_INTPOW_HPP -#define SHARE_UTILITIES_INTPOW_HPP - -#include "metaprogramming/enableIf.hpp" -#include -#include - -// Raise v to the power p mod 2**N, where N is the width of the type T. -template ::value && std::is_unsigned::value)> -static constexpr T intpow(T v, unsigned p) { - if (p == 0) { - return 1; - } - - // We use exponentiation by squaring to calculate the required power. - T a = intpow(v, p / 2); - T b = (p % 2) ? v : 1; - - return a * a * b; -} - -#endif // SHARE_UTILITIES_INTPOW_HPP diff --git a/test/hotspot/gtest/aarch64/aarch64-asmtest.py b/test/hotspot/gtest/aarch64/aarch64-asmtest.py index 64f3e787356..7e9d557d11c 100644 --- a/test/hotspot/gtest/aarch64/aarch64-asmtest.py +++ b/test/hotspot/gtest/aarch64/aarch64-asmtest.py @@ -77,29 +77,11 @@ class FloatRegister(Register): def __str__(self): return self.astr("v") - def generate(self): - self.number = random.randint(0, 31) - return self - def nextReg(self): next = FloatRegister() next.number = (self.number + 1) % 32 return next -class LowFloatRegister(Register): - - def __str__(self): - return self.astr("v") - - def generate(self): - self.number = random.randint(0, 15) - return self - - def nextReg(self): - next = FloatRegister() - next.number = (self.number + 1) % 16 - return next - class GeneralRegister(Register): def __str__(self): @@ -1289,75 +1271,6 @@ class CommonNEONInstruction(Instruction): def aname(self): return self._name -class VectorScalarNEONInstruction(Instruction): - def __init__(self, args): - self._name, self.insname, self.arrangement = args - - def generate(self): - vectorLength = {"8B" : 8, "16B" : 16, "4H" : 4, "8H" : 8, "2S" : 2, "4S" : 4, "1D" : 1, "2D" : 2} [self.arrangement] - self.elemIndex = random.randrange(0, vectorLength) - self.elemSizeSpecifier = self.arrangement[len(self.arrangement) - 1:] - self._firstSIMDreg = LowFloatRegister().generate() - self.numRegs = 3 - return self - - def cstr(self): - buf = Instruction.cstr(self) + str(self._firstSIMDreg) - buf = '%s, __ T%s' % (buf, self.arrangement) - current = self._firstSIMDreg - for cnt in range(1, self.numRegs - 1): - buf = '%s, %s' % (buf, current.nextReg()) - current = current.nextReg() - buf = '%s, %s, %d' % (buf, current.nextReg(), self.elemIndex) - return '%s);' % (buf) - - def astr(self): - buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement) - current = self._firstSIMDreg - for cnt in range(1, self.numRegs - 1): - buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement) - current = current.nextReg() - buf = '%s, %s.%s[%d]' % (buf, current.nextReg(), self.elemSizeSpecifier, self.elemIndex) - return buf - - def aname(self): - return self._name - -class WideningNEONInstruction(Instruction): - def __init__(self, args): - self._name, self.insname, self.widerArrangement, self.narrowerArrangement = args - - def generate(self): - self._firstSIMDreg = FloatRegister().generate() - return self - - def cstr(self): - buf = Instruction.cstr(self) + str(self._firstSIMDreg) - current = self._firstSIMDreg - for cnt in range(1, self.numWiderRegs): - buf = '%s, %s' % (buf, current.nextReg()) - current = current.nextReg() - buf = '%s, __ T%s' % (buf, self.widerArrangement) - for cnt in range(0, self.numNarrowerRegs): - buf = '%s, %s' % (buf, current.nextReg()) - current = current.nextReg() - buf = '%s, __ T%s' % (buf, self.narrowerArrangement) - return '%s);' % (buf) - - def astr(self): - buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.widerArrangement) - current = self._firstSIMDreg - for cnt in range(1, self.numWiderRegs): - buf = '%s, %s.%s' % (buf, current.nextReg(), self.widerArrangement) - current = current.nextReg() - for cnt in range(0, self.numNarrowerRegs): - buf = '%s, %s.%s' % (buf, current.nextReg(), self.narrowerArrangement) - current = current.nextReg() - return buf - - def aname(self): - return self._name - class SHA512SIMDOp(Instruction): def generate(self): @@ -1477,10 +1390,6 @@ class TwoRegNEONOp(CommonNEONInstruction): class ThreeRegNEONOp(TwoRegNEONOp): numRegs = 3 -class AddWideNEONOp(WideningNEONInstruction): - numWiderRegs = 2 - numNarrowerRegs = 1 - class NEONFloatCompareWithZero(TwoRegNEONOp): def __init__(self, args): self._name = 'fcm' @@ -1839,17 +1748,6 @@ generate(ThreeRegNEONOp, ["facgt", "facgt", "2D"], ]) -generate(VectorScalarNEONInstruction, - [["fmlavs", "fmla", "2S"], ["mulvs", "mul", "4S"], - ["fmlavs", "fmla", "2D"], - ["fmlsvs", "fmls", "2S"], ["mulvs", "mul", "4S"], - ["fmlsvs", "fmls", "2D"], - ["fmulxvs", "fmulx", "2S"], ["mulvs", "mul", "4S"], - ["fmulxvs", "fmulx", "2D"], - ["mulvs", "mul", "4H"], ["mulvs", "mul", "8H"], - ["mulvs", "mul", "2S"], ["mulvs", "mul", "4S"], - ]) - neonVectorCompareInstructionPrefix = ['cm', 'fcm'] neonIntegerVectorCompareConditions = ['GT', 'GE', 'EQ', 'HI', 'HS'] neonFloatVectorCompareConditions = ['EQ', 'GT', 'GE'] @@ -2183,15 +2081,6 @@ generate(SVEVectorOp, [["add", "ZZZ"], generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0], ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]]) -generate(AddWideNEONOp, - [["saddwv", "saddw", "8H", "8B"], ["saddwv2", "saddw2", "8H", "16B"], - ["saddwv", "saddw", "4S", "4H"], ["saddwv2", "saddw2", "4S", "8H"], - ["saddwv", "saddw", "2D", "2S"], ["saddwv2", "saddw2", "2D", "4S"], - ["uaddwv", "uaddw", "8H", "8B"], ["uaddwv2", "uaddw2", "8H", "16B"], - ["uaddwv", "uaddw", "4S", "4H"], ["uaddwv2", "uaddw2", "4S", "8H"], - ["uaddwv", "uaddw", "2D", "2S"], ["uaddwv2", "uaddw2", "2D", "4S"], - ]) - print "\n __ bind(forth);" outfile.write("forth:\n") diff --git a/test/hotspot/gtest/aarch64/asmtest.out.h b/test/hotspot/gtest/aarch64/asmtest.out.h index 9805a05c5c1..b8260aaf932 100644 --- a/test/hotspot/gtest/aarch64/asmtest.out.h +++ b/test/hotspot/gtest/aarch64/asmtest.out.h @@ -293,9 +293,9 @@ __ ldrshw(r5, Address(r3, 12)); // ldrsh w5, [x3, 12] __ ldrsw(r27, Address(r24, 17)); // ldrsw x27, [x24, 17] __ ldrd(v13, Address(r29, -35)); // ldr d13, [x29, -35] - __ ldrs(v23, Address(r9, -47)); // ldr s23, [x9, -47] + __ ldrs(v22, Address(r9, -47)); // ldr s22, [x9, -47] __ strd(v11, Address(r0, 9)); // str d11, [x0, 9] - __ strs(v21, Address(r0, -127)); // str s21, [x0, -127] + __ strs(v20, Address(r0, -127)); // str s20, [x0, -127] // pre // LoadStoreOp @@ -314,7 +314,7 @@ __ ldrd(v0, Address(__ pre(r14, -54))); // ldr d0, [x14, -54]! __ ldrs(v3, Address(__ pre(r1, 40))); // ldr s3, [x1, 40]! __ strd(v4, Address(__ pre(r14, -94))); // str d4, [x14, -94]! - __ strs(v18, Address(__ pre(r28, -54))); // str s18, [x28, -54]! + __ strs(v17, Address(__ pre(r28, -54))); // str s17, [x28, -54]! // post // LoadStoreOp @@ -331,8 +331,8 @@ __ ldrshw(r3, Address(__ post(r11, -48))); // ldrsh w3, [x11], -48 __ ldrsw(r25, Address(__ post(r23, 22))); // ldrsw x25, [x23], 22 __ ldrd(v0, Address(__ post(r10, -215))); // ldr d0, [x10], -215 - __ ldrs(v19, Address(__ post(r6, 55))); // ldr s19, [x6], 55 - __ strd(v14, Address(__ post(r21, -234))); // str d14, [x21], -234 + __ ldrs(v17, Address(__ post(r6, 55))); // ldr s17, [x6], 55 + __ strd(v13, Address(__ post(r21, -234))); // str d13, [x21], -234 __ strs(v0, Address(__ post(r22, -70))); // str s0, [x22], -70 // base_plus_reg @@ -349,9 +349,9 @@ __ ldrsh(r21, Address(r30, r30, Address::sxtw(1))); // ldrsh x21, [x30, w30, sxtw #1] __ ldrshw(r11, Address(r10, r28, Address::sxtw(1))); // ldrsh w11, [x10, w28, sxtw #1] __ ldrsw(r28, Address(r19, r10, Address::uxtw(0))); // ldrsw x28, [x19, w10, uxtw #0] - __ ldrd(v30, Address(r29, r14, Address::sxtw(0))); // ldr d30, [x29, w14, sxtw #0] + __ ldrd(v29, Address(r29, r14, Address::sxtw(0))); // ldr d29, [x29, w14, sxtw #0] __ ldrs(v8, Address(r5, r5, Address::sxtw(2))); // ldr s8, [x5, w5, sxtw #2] - __ strd(v25, Address(r8, r13, Address::sxtx(0))); // str d25, [x8, x13, sxtx #0] + __ strd(v24, Address(r8, r13, Address::sxtx(0))); // str d24, [x8, x13, sxtx #0] __ strs(v17, Address(r24, r26, Address::lsl(2))); // str s17, [x24, x26, lsl #2] // base_plus_scaled_offset @@ -370,7 +370,7 @@ __ ldrsw(r10, Address(r7, 6372)); // ldrsw x10, [x7, 6372] __ ldrd(v3, Address(r25, 12392)); // ldr d3, [x25, 12392] __ ldrs(v12, Address(r9, 7840)); // ldr s12, [x9, 7840] - __ strd(v24, Address(r1, 12728)); // str d24, [x1, 12728] + __ strd(v23, Address(r1, 12728)); // str d23, [x1, 12728] __ strs(v3, Address(r20, 6924)); // str s3, [x20, 6924] // pcrel @@ -484,63 +484,63 @@ __ umsubl(r13, r10, r7, r5); // umsubl x13, w10, w7, x5 // ThreeRegFloatOp - __ fabds(v30, v15, v3); // fabd s30, s15, s3 - __ fmuls(v12, v12, v16); // fmul s12, s12, s16 - __ fdivs(v31, v31, v18); // fdiv s31, s31, s18 - __ fadds(v19, v21, v16); // fadd s19, s21, s16 - __ fsubs(v15, v10, v21); // fsub s15, s10, s21 - __ fabdd(v2, v10, v28); // fabd d2, d10, d28 - __ fmuld(v7, v30, v31); // fmul d7, d30, d31 - __ fdivd(v18, v1, v2); // fdiv d18, d1, d2 + __ fabds(v29, v15, v3); // fabd s29, s15, s3 + __ fmuls(v11, v12, v15); // fmul s11, s12, s15 + __ fdivs(v30, v30, v17); // fdiv s30, s30, s17 + __ fadds(v19, v20, v15); // fadd s19, s20, s15 + __ fsubs(v15, v9, v21); // fsub s15, s9, s21 + __ fabdd(v2, v9, v27); // fabd d2, d9, d27 + __ fmuld(v7, v29, v30); // fmul d7, d29, d30 + __ fdivd(v17, v1, v2); // fdiv d17, d1, d2 __ faddd(v6, v10, v3); // fadd d6, d10, d3 - __ fsubd(v25, v11, v7); // fsub d25, d11, d7 + __ fsubd(v24, v11, v7); // fsub d24, d11, d7 // FourRegFloatOp - __ fmadds(v1, v12, v0, v3); // fmadd s1, s12, s0, s3 - __ fmsubs(v19, v29, v6, v23); // fmsub s19, s29, s6, s23 - __ fnmadds(v6, v0, v28, v27); // fnmadd s6, s0, s28, s27 - __ fnmadds(v2, v5, v7, v29); // fnmadd s2, s5, s7, s29 - __ fmaddd(v12, v25, v13, v12); // fmadd d12, d25, d13, d12 - __ fmsubd(v24, v19, v8, v18); // fmsub d24, d19, d8, d18 - __ fnmaddd(v22, v26, v21, v20); // fnmadd d22, d26, d21, d20 - __ fnmaddd(v19, v2, v30, v22); // fnmadd d19, d2, d30, d22 + __ fmadds(v1, v11, v0, v3); // fmadd s1, s11, s0, s3 + __ fmsubs(v17, v28, v6, v22); // fmsub s17, s28, s6, s22 + __ fnmadds(v6, v0, v27, v26); // fnmadd s6, s0, s27, s26 + __ fnmadds(v2, v5, v7, v28); // fnmadd s2, s5, s7, s28 + __ fmaddd(v11, v25, v13, v11); // fmadd d11, d25, d13, d11 + __ fmsubd(v23, v19, v8, v17); // fmsub d23, d19, d8, d17 + __ fnmaddd(v21, v25, v20, v19); // fnmadd d21, d25, d20, d19 + __ fnmaddd(v17, v2, v29, v22); // fnmadd d17, d2, d29, d22 // TwoRegFloatOp - __ fmovs(v8, v22); // fmov s8, s22 - __ fabss(v19, v21); // fabs s19, s21 - __ fnegs(v12, v18); // fneg s12, s18 - __ fsqrts(v21, v6); // fsqrt s21, s6 - __ fcvts(v16, v3); // fcvt d16, s3 - __ fcvtsh(v3, v29); // fcvt h3, s29 - __ fcvths(v3, v28); // fcvt s3, h28 - __ fmovd(v15, v14); // fmov d15, d14 - __ fabsd(v10, v13); // fabs d10, d13 - __ fnegd(v12, v18); // fneg d12, d18 - __ fsqrtd(v10, v26); // fsqrt d10, d26 + __ fmovs(v8, v21); // fmov s8, s21 + __ fabss(v19, v20); // fabs s19, s20 + __ fnegs(v11, v17); // fneg s11, s17 + __ fsqrts(v20, v6); // fsqrt s20, s6 + __ fcvts(v15, v3); // fcvt d15, s3 + __ fcvtsh(v3, v28); // fcvt h3, s28 + __ fcvths(v3, v27); // fcvt s3, h27 + __ fmovd(v14, v14); // fmov d14, d14 + __ fabsd(v10, v12); // fabs d10, d12 + __ fnegd(v11, v17); // fneg d11, d17 + __ fsqrtd(v10, v25); // fsqrt d10, d25 __ fcvtd(v7, v7); // fcvt s7, d7 // FloatConvertOp - __ fcvtzsw(r14, v29); // fcvtzs w14, s29 - __ fcvtzs(r0, v23); // fcvtzs x0, s23 + __ fcvtzsw(r14, v28); // fcvtzs w14, s28 + __ fcvtzs(r0, v22); // fcvtzs x0, s22 __ fcvtzdw(r0, v12); // fcvtzs w0, d12 - __ fcvtzd(r23, v14); // fcvtzs x23, d14 + __ fcvtzd(r23, v13); // fcvtzs x23, d13 __ scvtfws(v13, r7); // scvtf s13, w7 - __ scvtfs(v15, r7); // scvtf s15, x7 - __ scvtfwd(v9, r20); // scvtf d9, w20 - __ scvtfd(v19, r28); // scvtf d19, x28 + __ scvtfs(v14, r7); // scvtf s14, x7 + __ scvtfwd(v8, r20); // scvtf d8, w20 + __ scvtfd(v17, r28); // scvtf d17, x28 __ fcvtassw(r30, v16); // fcvtas w30, s16 __ fcvtasd(r2, v9); // fcvtas x2, d9 - __ fcvtmssw(r16, v21); // fcvtms w16, s21 + __ fcvtmssw(r16, v20); // fcvtms w16, s20 __ fcvtmsd(r29, v4); // fcvtms x29, d4 - __ fmovs(r1, v27); // fmov w1, s27 - __ fmovd(r24, v24); // fmov x24, d24 + __ fmovs(r1, v26); // fmov w1, s26 + __ fmovd(r24, v23); // fmov x24, d23 __ fmovs(v14, r21); // fmov s14, w21 - __ fmovd(v13, r5); // fmov d13, x5 + __ fmovd(v12, r5); // fmov d12, x5 // TwoRegFloatOp - __ fcmps(v12, v25); // fcmp s12, s25 - __ fcmpd(v25, v30); // fcmp d25, d30 - __ fcmps(v28, 0.0); // fcmp s28, #0.0 + __ fcmps(v12, v24); // fcmp s12, s24 + __ fcmpd(v24, v29); // fcmp d24, d29 + __ fcmps(v27, 0.0); // fcmp s27, #0.0 __ fcmpd(v21, 0.0); // fcmp d21, #0.0 // LoadStorePairOp @@ -573,265 +573,250 @@ // LdStNEONOp __ ld1(v0, __ T8B, Address(r11)); // ld1 {v0.8B}, [x11] __ ld1(v16, v17, __ T16B, Address(__ post(r26, 32))); // ld1 {v16.16B, v17.16B}, [x26], 32 - __ ld1(v22, v23, v24, __ T1D, Address(__ post(r26, r17))); // ld1 {v22.1D, v23.1D, v24.1D}, [x26], x17 - __ ld1(v27, v28, v29, v30, __ T8H, Address(__ post(r29, 64))); // ld1 {v27.8H, v28.8H, v29.8H, v30.8H}, [x29], 64 - __ ld1r(v22, __ T8B, Address(r6)); // ld1r {v22.8B}, [x6] - __ ld1r(v14, __ T4S, Address(__ post(r29, 4))); // ld1r {v14.4S}, [x29], 4 - __ ld1r(v22, __ T1D, Address(__ post(r12, r16))); // ld1r {v22.1D}, [x12], x16 + __ ld1(v21, v22, v23, __ T1D, Address(__ post(r26, r17))); // ld1 {v21.1D, v22.1D, v23.1D}, [x26], x17 + __ ld1(v26, v27, v28, v29, __ T8H, Address(__ post(r29, 64))); // ld1 {v26.8H, v27.8H, v28.8H, v29.8H}, [x29], 64 + __ ld1r(v21, __ T8B, Address(r6)); // ld1r {v21.8B}, [x6] + __ ld1r(v13, __ T4S, Address(__ post(r29, 4))); // ld1r {v13.4S}, [x29], 4 + __ ld1r(v21, __ T1D, Address(__ post(r12, r16))); // ld1r {v21.1D}, [x12], x16 __ ld2(v1, v2, __ T2D, Address(r0)); // ld2 {v1.2D, v2.2D}, [x0] - __ ld2(v10, v11, __ T4H, Address(__ post(r21, 16))); // ld2 {v10.4H, v11.4H}, [x21], 16 + __ ld2(v9, v10, __ T4H, Address(__ post(r21, 16))); // ld2 {v9.4H, v10.4H}, [x21], 16 __ ld2r(v7, v8, __ T16B, Address(r25)); // ld2r {v7.16B, v8.16B}, [x25] - __ ld2r(v9, v10, __ T2S, Address(__ post(r9, 8))); // ld2r {v9.2S, v10.2S}, [x9], 8 + __ ld2r(v8, v9, __ T2S, Address(__ post(r9, 8))); // ld2r {v8.2S, v9.2S}, [x9], 8 __ ld2r(v9, v10, __ T2D, Address(__ post(r12, r14))); // ld2r {v9.2D, v10.2D}, [x12], x14 __ ld3(v7, v8, v9, __ T4S, Address(__ post(r4, r17))); // ld3 {v7.4S, v8.4S, v9.4S}, [x4], x17 __ ld3(v23, v24, v25, __ T2S, Address(r17)); // ld3 {v23.2S, v24.2S, v25.2S}, [x17] - __ ld3r(v4, v5, v6, __ T8H, Address(r22)); // ld3r {v4.8H, v5.8H, v6.8H}, [x22] - __ ld3r(v13, v14, v15, __ T4S, Address(__ post(r2, 12))); // ld3r {v13.4S, v14.4S, v15.4S}, [x2], 12 - __ ld3r(v16, v17, v18, __ T1D, Address(__ post(r10, r12))); // ld3r {v16.1D, v17.1D, v18.1D}, [x10], x12 + __ ld3r(v3, v4, v5, __ T8H, Address(r22)); // ld3r {v3.8H, v4.8H, v5.8H}, [x22] + __ ld3r(v12, v13, v14, __ T4S, Address(__ post(r2, 12))); // ld3r {v12.4S, v13.4S, v14.4S}, [x2], 12 + __ ld3r(v15, v16, v17, __ T1D, Address(__ post(r10, r12))); // ld3r {v15.1D, v16.1D, v17.1D}, [x10], x12 __ ld4(v4, v5, v6, v7, __ T8H, Address(__ post(r2, 64))); // ld4 {v4.8H, v5.8H, v6.8H, v7.8H}, [x2], 64 __ ld4(v6, v7, v8, v9, __ T8B, Address(__ post(r20, r11))); // ld4 {v6.8B, v7.8B, v8.8B, v9.8B}, [x20], x11 - __ ld4r(v12, v13, v14, v15, __ T8B, Address(r12)); // ld4r {v12.8B, v13.8B, v14.8B, v15.8B}, [x12] - __ ld4r(v16, v17, v18, v19, __ T4H, Address(__ post(r17, 8))); // ld4r {v16.4H, v17.4H, v18.4H, v19.4H}, [x17], 8 + __ ld4r(v11, v12, v13, v14, __ T8B, Address(r12)); // ld4r {v11.8B, v12.8B, v13.8B, v14.8B}, [x12] + __ ld4r(v15, v16, v17, v18, __ T4H, Address(__ post(r17, 8))); // ld4r {v15.4H, v16.4H, v17.4H, v18.4H}, [x17], 8 __ ld4r(v14, v15, v16, v17, __ T2S, Address(__ post(r25, r16))); // ld4r {v14.2S, v15.2S, v16.2S, v17.2S}, [x25], x16 // NEONReduceInstruction __ addv(v20, __ T8B, v21); // addv b20, v21.8B __ addv(v1, __ T16B, v2); // addv b1, v2.16B - __ addv(v23, __ T4H, v24); // addv h23, v24.4H + __ addv(v22, __ T4H, v23); // addv h22, v23.4H __ addv(v30, __ T8H, v31); // addv h30, v31.8H __ addv(v14, __ T4S, v15); // addv s14, v15.4S __ smaxv(v2, __ T8B, v3); // smaxv b2, v3.8B __ smaxv(v6, __ T16B, v7); // smaxv b6, v7.16B __ smaxv(v3, __ T4H, v4); // smaxv h3, v4.4H - __ smaxv(v8, __ T8H, v9); // smaxv h8, v9.8H - __ smaxv(v25, __ T4S, v26); // smaxv s25, v26.4S + __ smaxv(v7, __ T8H, v8); // smaxv h7, v8.8H + __ smaxv(v24, __ T4S, v25); // smaxv s24, v25.4S __ fmaxv(v0, __ T4S, v1); // fmaxv s0, v1.4S __ sminv(v27, __ T8B, v28); // sminv b27, v28.8B - __ uminv(v30, __ T8B, v31); // uminv b30, v31.8B + __ uminv(v29, __ T8B, v30); // uminv b29, v30.8B __ sminv(v5, __ T16B, v6); // sminv b5, v6.16B __ uminv(v5, __ T16B, v6); // uminv b5, v6.16B - __ sminv(v30, __ T4H, v31); // sminv h30, v31.4H + __ sminv(v29, __ T4H, v30); // sminv h29, v30.4H __ uminv(v11, __ T4H, v12); // uminv h11, v12.4H __ sminv(v25, __ T8H, v26); // sminv h25, v26.8H __ uminv(v0, __ T8H, v1); // uminv h0, v1.8H - __ sminv(v31, __ T4S, v0); // sminv s31, v0.4S + __ sminv(v30, __ T4S, v31); // sminv s30, v31.4S __ uminv(v0, __ T4S, v1); // uminv s0, v1.4S - __ fminv(v19, __ T4S, v20); // fminv s19, v20.4S - __ fmaxp(v29, v30, __ S); // fmaxp s29, v30.2S - __ fmaxp(v26, v27, __ D); // fmaxp d26, v27.2D + __ fminv(v17, __ T4S, v18); // fminv s17, v18.4S + __ fmaxp(v28, v29, __ S); // fmaxp s28, v29.2S + __ fmaxp(v25, v26, __ D); // fmaxp d25, v26.2D __ fminp(v9, v10, __ S); // fminp s9, v10.2S - __ fminp(v26, v27, __ D); // fminp d26, v27.2D + __ fminp(v25, v26, __ D); // fminp d25, v26.2D // NEONFloatCompareWithZero __ fcm(Assembler::GT, v12, __ T2S, v13); // fcmgt v12.2S, v13.2S, #0.0 __ fcm(Assembler::GT, v15, __ T4S, v16); // fcmgt v15.4S, v16.4S, #0.0 __ fcm(Assembler::GT, v11, __ T2D, v12); // fcmgt v11.2D, v12.2D, #0.0 - __ fcm(Assembler::GE, v11, __ T2S, v12); // fcmge v11.2S, v12.2S, #0.0 - __ fcm(Assembler::GE, v18, __ T4S, v19); // fcmge v18.4S, v19.4S, #0.0 - __ fcm(Assembler::GE, v25, __ T2D, v26); // fcmge v25.2D, v26.2D, #0.0 - __ fcm(Assembler::EQ, v22, __ T2S, v23); // fcmeq v22.2S, v23.2S, #0.0 - __ fcm(Assembler::EQ, v24, __ T4S, v25); // fcmeq v24.4S, v25.4S, #0.0 + __ fcm(Assembler::GE, v10, __ T2S, v11); // fcmge v10.2S, v11.2S, #0.0 + __ fcm(Assembler::GE, v17, __ T4S, v18); // fcmge v17.4S, v18.4S, #0.0 + __ fcm(Assembler::GE, v24, __ T2D, v25); // fcmge v24.2D, v25.2D, #0.0 + __ fcm(Assembler::EQ, v21, __ T2S, v22); // fcmeq v21.2S, v22.2S, #0.0 + __ fcm(Assembler::EQ, v23, __ T4S, v24); // fcmeq v23.4S, v24.4S, #0.0 __ fcm(Assembler::EQ, v0, __ T2D, v1); // fcmeq v0.2D, v1.2D, #0.0 - __ fcm(Assembler::LT, v17, __ T2S, v18); // fcmlt v17.2S, v18.2S, #0.0 - __ fcm(Assembler::LT, v11, __ T4S, v12); // fcmlt v11.4S, v12.4S, #0.0 + __ fcm(Assembler::LT, v16, __ T2S, v17); // fcmlt v16.2S, v17.2S, #0.0 + __ fcm(Assembler::LT, v10, __ T4S, v11); // fcmlt v10.4S, v11.4S, #0.0 __ fcm(Assembler::LT, v6, __ T2D, v7); // fcmlt v6.2D, v7.2D, #0.0 - __ fcm(Assembler::LE, v29, __ T2S, v30); // fcmle v29.2S, v30.2S, #0.0 + __ fcm(Assembler::LE, v28, __ T2S, v29); // fcmle v28.2S, v29.2S, #0.0 __ fcm(Assembler::LE, v6, __ T4S, v7); // fcmle v6.4S, v7.4S, #0.0 __ fcm(Assembler::LE, v5, __ T2D, v6); // fcmle v5.2D, v6.2D, #0.0 // TwoRegNEONOp __ absr(v5, __ T8B, v6); // abs v5.8B, v6.8B - __ absr(v21, __ T16B, v22); // abs v21.16B, v22.16B - __ absr(v19, __ T4H, v20); // abs v19.4H, v20.4H - __ absr(v16, __ T8H, v17); // abs v16.8H, v17.8H - __ absr(v18, __ T2S, v19); // abs v18.2S, v19.2S - __ absr(v30, __ T4S, v31); // abs v30.4S, v31.4S - __ absr(v27, __ T2D, v28); // abs v27.2D, v28.2D + __ absr(v20, __ T16B, v21); // abs v20.16B, v21.16B + __ absr(v17, __ T4H, v18); // abs v17.4H, v18.4H + __ absr(v15, __ T8H, v16); // abs v15.8H, v16.8H + __ absr(v17, __ T2S, v18); // abs v17.2S, v18.2S + __ absr(v29, __ T4S, v30); // abs v29.4S, v30.4S + __ absr(v26, __ T2D, v27); // abs v26.2D, v27.2D __ fabs(v28, __ T2S, v29); // fabs v28.2S, v29.2S __ fabs(v1, __ T4S, v2); // fabs v1.4S, v2.4S - __ fabs(v28, __ T2D, v29); // fabs v28.2D, v29.2D - __ fneg(v1, __ T2S, v2); // fneg v1.2S, v2.2S + __ fabs(v27, __ T2D, v28); // fabs v27.2D, v28.2D + __ fneg(v0, __ T2S, v1); // fneg v0.2S, v1.2S __ fneg(v20, __ T4S, v21); // fneg v20.4S, v21.4S - __ fneg(v29, __ T2D, v30); // fneg v29.2D, v30.2D - __ fsqrt(v16, __ T2S, v17); // fsqrt v16.2S, v17.2S - __ fsqrt(v13, __ T4S, v14); // fsqrt v13.4S, v14.4S + __ fneg(v28, __ T2D, v29); // fneg v28.2D, v29.2D + __ fsqrt(v15, __ T2S, v16); // fsqrt v15.2S, v16.2S + __ fsqrt(v12, __ T4S, v13); // fsqrt v12.4S, v13.4S __ fsqrt(v10, __ T2D, v11); // fsqrt v10.2D, v11.2D - __ notr(v29, __ T8B, v30); // not v29.8B, v30.8B - __ notr(v29, __ T16B, v30); // not v29.16B, v30.16B + __ notr(v28, __ T8B, v29); // not v28.8B, v29.8B + __ notr(v28, __ T16B, v29); // not v28.16B, v29.16B // ThreeRegNEONOp __ andr(v19, __ T8B, v20, v21); // and v19.8B, v20.8B, v21.8B __ andr(v22, __ T16B, v23, v24); // and v22.16B, v23.16B, v24.16B __ orr(v10, __ T8B, v11, v12); // orr v10.8B, v11.8B, v12.8B __ orr(v4, __ T16B, v5, v6); // orr v4.16B, v5.16B, v6.16B - __ eor(v31, __ T8B, v0, v1); // eor v31.8B, v0.8B, v1.8B - __ eor(v21, __ T16B, v22, v23); // eor v21.16B, v22.16B, v23.16B + __ eor(v30, __ T8B, v31, v0); // eor v30.8B, v31.8B, v0.8B + __ eor(v20, __ T16B, v21, v22); // eor v20.16B, v21.16B, v22.16B __ addv(v8, __ T8B, v9, v10); // add v8.8B, v9.8B, v10.8B - __ addv(v31, __ T16B, v0, v1); // add v31.16B, v0.16B, v1.16B - __ addv(v19, __ T4H, v20, v21); // add v19.4H, v20.4H, v21.4H + __ addv(v30, __ T16B, v31, v0); // add v30.16B, v31.16B, v0.16B + __ addv(v17, __ T4H, v18, v19); // add v17.4H, v18.4H, v19.4H __ addv(v10, __ T8H, v11, v12); // add v10.8H, v11.8H, v12.8H - __ addv(v28, __ T2S, v29, v30); // add v28.2S, v29.2S, v30.2S + __ addv(v27, __ T2S, v28, v29); // add v27.2S, v28.2S, v29.2S __ addv(v2, __ T4S, v3, v4); // add v2.4S, v3.4S, v4.4S - __ addv(v25, __ T2D, v26, v27); // add v25.2D, v26.2D, v27.2D - __ fadd(v5, __ T2S, v6, v7); // fadd v5.2S, v6.2S, v7.2S + __ addv(v24, __ T2D, v25, v26); // add v24.2D, v25.2D, v26.2D + __ fadd(v4, __ T2S, v5, v6); // fadd v4.2S, v5.2S, v6.2S __ fadd(v3, __ T4S, v4, v5); // fadd v3.4S, v4.4S, v5.4S __ fadd(v8, __ T2D, v9, v10); // fadd v8.2D, v9.2D, v10.2D __ subv(v22, __ T8B, v23, v24); // sub v22.8B, v23.8B, v24.8B - __ subv(v19, __ T16B, v20, v21); // sub v19.16B, v20.16B, v21.16B + __ subv(v17, __ T16B, v18, v19); // sub v17.16B, v18.16B, v19.16B __ subv(v13, __ T4H, v14, v15); // sub v13.4H, v14.4H, v15.4H - __ subv(v5, __ T8H, v6, v7); // sub v5.8H, v6.8H, v7.8H - __ subv(v29, __ T2S, v30, v31); // sub v29.2S, v30.2S, v31.2S - __ subv(v24, __ T4S, v25, v26); // sub v24.4S, v25.4S, v26.4S + __ subv(v4, __ T8H, v5, v6); // sub v4.8H, v5.8H, v6.8H + __ subv(v28, __ T2S, v29, v30); // sub v28.2S, v29.2S, v30.2S + __ subv(v23, __ T4S, v24, v25); // sub v23.4S, v24.4S, v25.4S __ subv(v21, __ T2D, v22, v23); // sub v21.2D, v22.2D, v23.2D - __ fsub(v26, __ T2S, v27, v28); // fsub v26.2S, v27.2S, v28.2S + __ fsub(v25, __ T2S, v26, v27); // fsub v25.2S, v26.2S, v27.2S __ fsub(v24, __ T4S, v25, v26); // fsub v24.4S, v25.4S, v26.4S __ fsub(v3, __ T2D, v4, v5); // fsub v3.2D, v4.2D, v5.2D - __ mulv(v24, __ T8B, v25, v26); // mul v24.8B, v25.8B, v26.8B + __ mulv(v23, __ T8B, v24, v25); // mul v23.8B, v24.8B, v25.8B __ mulv(v26, __ T16B, v27, v28); // mul v26.16B, v27.16B, v28.16B __ mulv(v23, __ T4H, v24, v25); // mul v23.4H, v24.4H, v25.4H - __ mulv(v15, __ T8H, v16, v17); // mul v15.8H, v16.8H, v17.8H + __ mulv(v14, __ T8H, v15, v16); // mul v14.8H, v15.8H, v16.8H __ mulv(v21, __ T2S, v22, v23); // mul v21.2S, v22.2S, v23.2S __ mulv(v3, __ T4S, v4, v5); // mul v3.4S, v4.4S, v5.4S - __ fabd(v24, __ T2S, v25, v26); // fabd v24.2S, v25.2S, v26.2S + __ fabd(v23, __ T2S, v24, v25); // fabd v23.2S, v24.2S, v25.2S __ fabd(v8, __ T4S, v9, v10); // fabd v8.4S, v9.4S, v10.4S - __ fabd(v25, __ T2D, v26, v27); // fabd v25.2D, v26.2D, v27.2D - __ faddp(v20, __ T2S, v21, v22); // faddp v20.2S, v21.2S, v22.2S - __ faddp(v16, __ T4S, v17, v18); // faddp v16.4S, v17.4S, v18.4S - __ faddp(v17, __ T2D, v18, v19); // faddp v17.2D, v18.2D, v19.2D + __ fabd(v24, __ T2D, v25, v26); // fabd v24.2D, v25.2D, v26.2D + __ faddp(v19, __ T2S, v20, v21); // faddp v19.2S, v20.2S, v21.2S + __ faddp(v15, __ T4S, v16, v17); // faddp v15.4S, v16.4S, v17.4S + __ faddp(v16, __ T2D, v17, v18); // faddp v16.2D, v17.2D, v18.2D __ fmul(v2, __ T2S, v3, v4); // fmul v2.2S, v3.2S, v4.2S __ fmul(v1, __ T4S, v2, v3); // fmul v1.4S, v2.4S, v3.4S __ fmul(v0, __ T2D, v1, v2); // fmul v0.2D, v1.2D, v2.2D __ mlav(v24, __ T4H, v25, v26); // mla v24.4H, v25.4H, v26.4H __ mlav(v4, __ T8H, v5, v6); // mla v4.8H, v5.8H, v6.8H __ mlav(v3, __ T2S, v4, v5); // mla v3.2S, v4.2S, v5.2S - __ mlav(v12, __ T4S, v13, v14); // mla v12.4S, v13.4S, v14.4S - __ fmla(v31, __ T2S, v0, v1); // fmla v31.2S, v0.2S, v1.2S - __ fmla(v28, __ T4S, v29, v30); // fmla v28.4S, v29.4S, v30.4S - __ fmla(v10, __ T2D, v11, v12); // fmla v10.2D, v11.2D, v12.2D - __ mlsv(v26, __ T4H, v27, v28); // mls v26.4H, v27.4H, v28.4H + __ mlav(v11, __ T4S, v12, v13); // mla v11.4S, v12.4S, v13.4S + __ fmla(v30, __ T2S, v31, v0); // fmla v30.2S, v31.2S, v0.2S + __ fmla(v27, __ T4S, v28, v29); // fmla v27.4S, v28.4S, v29.4S + __ fmla(v9, __ T2D, v10, v11); // fmla v9.2D, v10.2D, v11.2D + __ mlsv(v25, __ T4H, v26, v27); // mls v25.4H, v26.4H, v27.4H __ mlsv(v2, __ T8H, v3, v4); // mls v2.8H, v3.8H, v4.8H __ mlsv(v12, __ T2S, v13, v14); // mls v12.2S, v13.2S, v14.2S - __ mlsv(v18, __ T4S, v19, v20); // mls v18.4S, v19.4S, v20.4S - __ fmls(v31, __ T2S, v0, v1); // fmls v31.2S, v0.2S, v1.2S + __ mlsv(v17, __ T4S, v18, v19); // mls v17.4S, v18.4S, v19.4S + __ fmls(v30, __ T2S, v31, v0); // fmls v30.2S, v31.2S, v0.2S __ fmls(v1, __ T4S, v2, v3); // fmls v1.4S, v2.4S, v3.4S - __ fmls(v13, __ T2D, v14, v15); // fmls v13.2D, v14.2D, v15.2D - __ fdiv(v29, __ T2S, v30, v31); // fdiv v29.2S, v30.2S, v31.2S + __ fmls(v12, __ T2D, v13, v14); // fmls v12.2D, v13.2D, v14.2D + __ fdiv(v28, __ T2S, v29, v30); // fdiv v28.2S, v29.2S, v30.2S __ fdiv(v0, __ T4S, v1, v2); // fdiv v0.4S, v1.4S, v2.4S - __ fdiv(v19, __ T2D, v20, v21); // fdiv v19.2D, v20.2D, v21.2D + __ fdiv(v17, __ T2D, v18, v19); // fdiv v17.2D, v18.2D, v19.2D __ maxv(v12, __ T8B, v13, v14); // smax v12.8B, v13.8B, v14.8B __ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B - __ maxv(v22, __ T4H, v23, v24); // smax v22.4H, v23.4H, v24.4H - __ maxv(v13, __ T8H, v14, v15); // smax v13.8H, v14.8H, v15.8H - __ maxv(v28, __ T2S, v29, v30); // smax v28.2S, v29.2S, v30.2S - __ maxv(v30, __ T4S, v31, v0); // smax v30.4S, v31.4S, v0.4S - __ smaxp(v31, __ T8B, v0, v1); // smaxp v31.8B, v0.8B, v1.8B + __ maxv(v21, __ T4H, v22, v23); // smax v21.4H, v22.4H, v23.4H + __ maxv(v12, __ T8H, v13, v14); // smax v12.8H, v13.8H, v14.8H + __ maxv(v27, __ T2S, v28, v29); // smax v27.2S, v28.2S, v29.2S + __ maxv(v29, __ T4S, v30, v31); // smax v29.4S, v30.4S, v31.4S + __ smaxp(v30, __ T8B, v31, v0); // smaxp v30.8B, v31.8B, v0.8B __ smaxp(v1, __ T16B, v2, v3); // smaxp v1.16B, v2.16B, v3.16B - __ smaxp(v26, __ T4H, v27, v28); // smaxp v26.4H, v27.4H, v28.4H - __ smaxp(v28, __ T8H, v29, v30); // smaxp v28.8H, v29.8H, v30.8H + __ smaxp(v25, __ T4H, v26, v27); // smaxp v25.4H, v26.4H, v27.4H + __ smaxp(v27, __ T8H, v28, v29); // smaxp v27.8H, v28.8H, v29.8H __ smaxp(v4, __ T2S, v5, v6); // smaxp v4.2S, v5.2S, v6.2S - __ smaxp(v30, __ T4S, v31, v0); // smaxp v30.4S, v31.4S, v0.4S - __ fmax(v4, __ T2S, v5, v6); // fmax v4.2S, v5.2S, v6.2S + __ smaxp(v29, __ T4S, v30, v31); // smaxp v29.4S, v30.4S, v31.4S + __ fmax(v3, __ T2S, v4, v5); // fmax v3.2S, v4.2S, v5.2S __ fmax(v6, __ T4S, v7, v8); // fmax v6.4S, v7.4S, v8.4S - __ fmax(v30, __ T2D, v31, v0); // fmax v30.2D, v31.2D, v0.2D - __ minv(v26, __ T8B, v27, v28); // smin v26.8B, v27.8B, v28.8B - __ minv(v18, __ T16B, v19, v20); // smin v18.16B, v19.16B, v20.16B - __ minv(v9, __ T4H, v10, v11); // smin v9.4H, v10.4H, v11.4H - __ minv(v8, __ T8H, v9, v10); // smin v8.8H, v9.8H, v10.8H + __ fmax(v29, __ T2D, v30, v31); // fmax v29.2D, v30.2D, v31.2D + __ minv(v25, __ T8B, v26, v27); // smin v25.8B, v26.8B, v27.8B + __ minv(v17, __ T16B, v18, v19); // smin v17.16B, v18.16B, v19.16B + __ minv(v8, __ T4H, v9, v10); // smin v8.4H, v9.4H, v10.4H + __ minv(v7, __ T8H, v8, v9); // smin v7.8H, v8.8H, v9.8H __ minv(v12, __ T2S, v13, v14); // smin v12.2S, v13.2S, v14.2S __ minv(v0, __ T4S, v1, v2); // smin v0.4S, v1.4S, v2.4S - __ sminp(v20, __ T8B, v21, v22); // sminp v20.8B, v21.8B, v22.8B + __ sminp(v19, __ T8B, v20, v21); // sminp v19.8B, v20.8B, v21.8B __ sminp(v1, __ T16B, v2, v3); // sminp v1.16B, v2.16B, v3.16B - __ sminp(v24, __ T4H, v25, v26); // sminp v24.4H, v25.4H, v26.4H + __ sminp(v23, __ T4H, v24, v25); // sminp v23.4H, v24.4H, v25.4H __ sminp(v2, __ T8H, v3, v4); // sminp v2.8H, v3.8H, v4.8H __ sminp(v0, __ T2S, v1, v2); // sminp v0.2S, v1.2S, v2.2S - __ sminp(v9, __ T4S, v10, v11); // sminp v9.4S, v10.4S, v11.4S - __ fmin(v24, __ T2S, v25, v26); // fmin v24.2S, v25.2S, v26.2S - __ fmin(v26, __ T4S, v27, v28); // fmin v26.4S, v27.4S, v28.4S - __ fmin(v16, __ T2D, v17, v18); // fmin v16.2D, v17.2D, v18.2D - __ facgt(v30, __ T2S, v31, v0); // facgt v30.2S, v31.2S, v0.2S + __ sminp(v8, __ T4S, v9, v10); // sminp v8.4S, v9.4S, v10.4S + __ fmin(v23, __ T2S, v24, v25); // fmin v23.2S, v24.2S, v25.2S + __ fmin(v25, __ T4S, v26, v27); // fmin v25.4S, v26.4S, v27.4S + __ fmin(v15, __ T2D, v16, v17); // fmin v15.2D, v16.2D, v17.2D + __ facgt(v29, __ T2S, v30, v31); // facgt v29.2S, v30.2S, v31.2S __ facgt(v3, __ T4S, v4, v5); // facgt v3.4S, v4.4S, v5.4S __ facgt(v10, __ T2D, v11, v12); // facgt v10.2D, v11.2D, v12.2D -// VectorScalarNEONInstruction - __ fmlavs(v5, __ T2S, v6, v7, 1); // fmla v5.2S, v6.2S, v7.S[1] - __ mulvs(v9, __ T4S, v10, v11, 0); // mul v9.4S, v10.4S, v11.S[0] - __ fmlavs(v5, __ T2D, v6, v7, 0); // fmla v5.2D, v6.2D, v7.D[0] - __ fmlsvs(v5, __ T2S, v6, v7, 0); // fmls v5.2S, v6.2S, v7.S[0] - __ mulvs(v8, __ T4S, v9, v10, 1); // mul v8.4S, v9.4S, v10.S[1] - __ fmlsvs(v5, __ T2D, v6, v7, 0); // fmls v5.2D, v6.2D, v7.D[0] - __ fmulxvs(v6, __ T2S, v7, v8, 0); // fmulx v6.2S, v7.2S, v8.S[0] - __ mulvs(v6, __ T4S, v7, v8, 1); // mul v6.4S, v7.4S, v8.S[1] - __ fmulxvs(v3, __ T2D, v4, v5, 0); // fmulx v3.2D, v4.2D, v5.D[0] - __ mulvs(v13, __ T4H, v14, v15, 2); // mul v13.4H, v14.4H, v15.H[2] - __ mulvs(v2, __ T8H, v3, v4, 4); // mul v2.8H, v3.8H, v4.H[4] - __ mulvs(v2, __ T2S, v3, v4, 0); // mul v2.2S, v3.2S, v4.S[0] - __ mulvs(v9, __ T4S, v10, v11, 1); // mul v9.4S, v10.4S, v11.S[1] - // NEONVectorCompare - __ cm(Assembler::GT, v21, __ T8B, v22, v23); // cmgt v21.8B, v22.8B, v23.8B - __ cm(Assembler::GT, v16, __ T16B, v17, v18); // cmgt v16.16B, v17.16B, v18.16B - __ cm(Assembler::GT, v18, __ T4H, v19, v20); // cmgt v18.4H, v19.4H, v20.4H - __ cm(Assembler::GT, v11, __ T8H, v12, v13); // cmgt v11.8H, v12.8H, v13.8H - __ cm(Assembler::GT, v21, __ T2S, v22, v23); // cmgt v21.2S, v22.2S, v23.2S - __ cm(Assembler::GT, v23, __ T4S, v24, v25); // cmgt v23.4S, v24.4S, v25.4S - __ cm(Assembler::GT, v12, __ T2D, v13, v14); // cmgt v12.2D, v13.2D, v14.2D - __ cm(Assembler::GE, v26, __ T8B, v27, v28); // cmge v26.8B, v27.8B, v28.8B - __ cm(Assembler::GE, v23, __ T16B, v24, v25); // cmge v23.16B, v24.16B, v25.16B - __ cm(Assembler::GE, v28, __ T4H, v29, v30); // cmge v28.4H, v29.4H, v30.4H - __ cm(Assembler::GE, v14, __ T8H, v15, v16); // cmge v14.8H, v15.8H, v16.8H - __ cm(Assembler::GE, v11, __ T2S, v12, v13); // cmge v11.2S, v12.2S, v13.2S - __ cm(Assembler::GE, v24, __ T4S, v25, v26); // cmge v24.4S, v25.4S, v26.4S - __ cm(Assembler::GE, v1, __ T2D, v2, v3); // cmge v1.2D, v2.2D, v3.2D - __ cm(Assembler::EQ, v12, __ T8B, v13, v14); // cmeq v12.8B, v13.8B, v14.8B - __ cm(Assembler::EQ, v31, __ T16B, v0, v1); // cmeq v31.16B, v0.16B, v1.16B - __ cm(Assembler::EQ, v10, __ T4H, v11, v12); // cmeq v10.4H, v11.4H, v12.4H - __ cm(Assembler::EQ, v16, __ T8H, v17, v18); // cmeq v16.8H, v17.8H, v18.8H - __ cm(Assembler::EQ, v7, __ T2S, v8, v9); // cmeq v7.2S, v8.2S, v9.2S - __ cm(Assembler::EQ, v2, __ T4S, v3, v4); // cmeq v2.4S, v3.4S, v4.4S - __ cm(Assembler::EQ, v3, __ T2D, v4, v5); // cmeq v3.2D, v4.2D, v5.2D - __ cm(Assembler::HI, v13, __ T8B, v14, v15); // cmhi v13.8B, v14.8B, v15.8B - __ cm(Assembler::HI, v19, __ T16B, v20, v21); // cmhi v19.16B, v20.16B, v21.16B - __ cm(Assembler::HI, v17, __ T4H, v18, v19); // cmhi v17.4H, v18.4H, v19.4H - __ cm(Assembler::HI, v16, __ T8H, v17, v18); // cmhi v16.8H, v17.8H, v18.8H - __ cm(Assembler::HI, v3, __ T2S, v4, v5); // cmhi v3.2S, v4.2S, v5.2S - __ cm(Assembler::HI, v1, __ T4S, v2, v3); // cmhi v1.4S, v2.4S, v3.4S - __ cm(Assembler::HI, v11, __ T2D, v12, v13); // cmhi v11.2D, v12.2D, v13.2D - __ cm(Assembler::HS, v30, __ T8B, v31, v0); // cmhs v30.8B, v31.8B, v0.8B - __ cm(Assembler::HS, v5, __ T16B, v6, v7); // cmhs v5.16B, v6.16B, v7.16B - __ cm(Assembler::HS, v8, __ T4H, v9, v10); // cmhs v8.4H, v9.4H, v10.4H - __ cm(Assembler::HS, v15, __ T8H, v16, v17); // cmhs v15.8H, v16.8H, v17.8H - __ cm(Assembler::HS, v29, __ T2S, v30, v31); // cmhs v29.2S, v30.2S, v31.2S - __ cm(Assembler::HS, v30, __ T4S, v31, v0); // cmhs v30.4S, v31.4S, v0.4S - __ cm(Assembler::HS, v0, __ T2D, v1, v2); // cmhs v0.2D, v1.2D, v2.2D - __ fcm(Assembler::EQ, v20, __ T2S, v21, v22); // fcmeq v20.2S, v21.2S, v22.2S - __ fcm(Assembler::EQ, v7, __ T4S, v8, v9); // fcmeq v7.4S, v8.4S, v9.4S - __ fcm(Assembler::EQ, v20, __ T2D, v21, v22); // fcmeq v20.2D, v21.2D, v22.2D - __ fcm(Assembler::GT, v23, __ T2S, v24, v25); // fcmgt v23.2S, v24.2S, v25.2S - __ fcm(Assembler::GT, v28, __ T4S, v29, v30); // fcmgt v28.4S, v29.4S, v30.4S - __ fcm(Assembler::GT, v21, __ T2D, v22, v23); // fcmgt v21.2D, v22.2D, v23.2D - __ fcm(Assembler::GE, v27, __ T2S, v28, v29); // fcmge v27.2S, v28.2S, v29.2S - __ fcm(Assembler::GE, v25, __ T4S, v26, v27); // fcmge v25.4S, v26.4S, v27.4S - __ fcm(Assembler::GE, v5, __ T2D, v6, v7); // fcmge v5.2D, v6.2D, v7.2D + __ cm(Assembler::GT, v22, __ T8B, v23, v24); // cmgt v22.8B, v23.8B, v24.8B + __ cm(Assembler::GT, v10, __ T16B, v11, v12); // cmgt v10.16B, v11.16B, v12.16B + __ cm(Assembler::GT, v4, __ T4H, v5, v6); // cmgt v4.4H, v5.4H, v6.4H + __ cm(Assembler::GT, v17, __ T8H, v18, v19); // cmgt v17.8H, v18.8H, v19.8H + __ cm(Assembler::GT, v1, __ T2S, v2, v3); // cmgt v1.2S, v2.2S, v3.2S + __ cm(Assembler::GT, v11, __ T4S, v12, v13); // cmgt v11.4S, v12.4S, v13.4S + __ cm(Assembler::GT, v7, __ T2D, v8, v9); // cmgt v7.2D, v8.2D, v9.2D + __ cm(Assembler::GE, v10, __ T8B, v11, v12); // cmge v10.8B, v11.8B, v12.8B + __ cm(Assembler::GE, v15, __ T16B, v16, v17); // cmge v15.16B, v16.16B, v17.16B + __ cm(Assembler::GE, v16, __ T4H, v17, v18); // cmge v16.4H, v17.4H, v18.4H + __ cm(Assembler::GE, v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H + __ cm(Assembler::GE, v9, __ T2S, v10, v11); // cmge v9.2S, v10.2S, v11.2S + __ cm(Assembler::GE, v11, __ T4S, v12, v13); // cmge v11.4S, v12.4S, v13.4S + __ cm(Assembler::GE, v12, __ T2D, v13, v14); // cmge v12.2D, v13.2D, v14.2D + __ cm(Assembler::EQ, v14, __ T8B, v15, v16); // cmeq v14.8B, v15.8B, v16.8B + __ cm(Assembler::EQ, v13, __ T16B, v14, v15); // cmeq v13.16B, v14.16B, v15.16B + __ cm(Assembler::EQ, v2, __ T4H, v3, v4); // cmeq v2.4H, v3.4H, v4.4H + __ cm(Assembler::EQ, v6, __ T8H, v7, v8); // cmeq v6.8H, v7.8H, v8.8H + __ cm(Assembler::EQ, v19, __ T2S, v20, v21); // cmeq v19.2S, v20.2S, v21.2S + __ cm(Assembler::EQ, v25, __ T4S, v26, v27); // cmeq v25.4S, v26.4S, v27.4S + __ cm(Assembler::EQ, v15, __ T2D, v16, v17); // cmeq v15.2D, v16.2D, v17.2D + __ cm(Assembler::HI, v4, __ T8B, v5, v6); // cmhi v4.8B, v5.8B, v6.8B + __ cm(Assembler::HI, v2, __ T16B, v3, v4); // cmhi v2.16B, v3.16B, v4.16B + __ cm(Assembler::HI, v4, __ T4H, v5, v6); // cmhi v4.4H, v5.4H, v6.4H + __ cm(Assembler::HI, v11, __ T8H, v12, v13); // cmhi v11.8H, v12.8H, v13.8H + __ cm(Assembler::HI, v17, __ T2S, v18, v19); // cmhi v17.2S, v18.2S, v19.2S + __ cm(Assembler::HI, v20, __ T4S, v21, v22); // cmhi v20.4S, v21.4S, v22.4S + __ cm(Assembler::HI, v16, __ T2D, v17, v18); // cmhi v16.2D, v17.2D, v18.2D + __ cm(Assembler::HS, v17, __ T8B, v18, v19); // cmhs v17.8B, v18.8B, v19.8B + __ cm(Assembler::HS, v10, __ T16B, v11, v12); // cmhs v10.16B, v11.16B, v12.16B + __ cm(Assembler::HS, v20, __ T4H, v21, v22); // cmhs v20.4H, v21.4H, v22.4H + __ cm(Assembler::HS, v22, __ T8H, v23, v24); // cmhs v22.8H, v23.8H, v24.8H + __ cm(Assembler::HS, v12, __ T2S, v13, v14); // cmhs v12.2S, v13.2S, v14.2S + __ cm(Assembler::HS, v25, __ T4S, v26, v27); // cmhs v25.4S, v26.4S, v27.4S + __ cm(Assembler::HS, v23, __ T2D, v24, v25); // cmhs v23.2D, v24.2D, v25.2D + __ fcm(Assembler::EQ, v28, __ T2S, v29, v30); // fcmeq v28.2S, v29.2S, v30.2S + __ fcm(Assembler::EQ, v14, __ T4S, v15, v16); // fcmeq v14.4S, v15.4S, v16.4S + __ fcm(Assembler::EQ, v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D + __ fcm(Assembler::GT, v24, __ T2S, v25, v26); // fcmgt v24.2S, v25.2S, v26.2S + __ fcm(Assembler::GT, v1, __ T4S, v2, v3); // fcmgt v1.4S, v2.4S, v3.4S + __ fcm(Assembler::GT, v11, __ T2D, v12, v13); // fcmgt v11.2D, v12.2D, v13.2D + __ fcm(Assembler::GE, v30, __ T2S, v31, v0); // fcmge v30.2S, v31.2S, v0.2S + __ fcm(Assembler::GE, v10, __ T4S, v11, v12); // fcmge v10.4S, v11.4S, v12.4S + __ fcm(Assembler::GE, v15, __ T2D, v16, v17); // fcmge v15.2D, v16.2D, v17.2D // SVEComparisonWithZero - __ sve_fcm(Assembler::EQ, p0, __ D, p7, z23, 0.0); // fcmeq p0.d, p7/z, z23.d, #0.0 - __ sve_fcm(Assembler::GT, p2, __ S, p7, z12, 0.0); // fcmgt p2.s, p7/z, z12.s, #0.0 - __ sve_fcm(Assembler::GE, p7, __ D, p7, z29, 0.0); // fcmge p7.d, p7/z, z29.d, #0.0 - __ sve_fcm(Assembler::LT, p9, __ S, p3, z31, 0.0); // fcmlt p9.s, p3/z, z31.s, #0.0 - __ sve_fcm(Assembler::LE, p9, __ D, p6, z31, 0.0); // fcmle p9.d, p6/z, z31.d, #0.0 - __ sve_fcm(Assembler::NE, p10, __ S, p2, z16, 0.0); // fcmne p10.s, p2/z, z16.s, #0.0 + __ sve_fcm(Assembler::EQ, p3, __ S, p3, z2, 0.0); // fcmeq p3.s, p3/z, z2.s, #0.0 + __ sve_fcm(Assembler::GT, p9, __ D, p0, z16, 0.0); // fcmgt p9.d, p0/z, z16.d, #0.0 + __ sve_fcm(Assembler::GE, p0, __ D, p1, z11, 0.0); // fcmge p0.d, p1/z, z11.d, #0.0 + __ sve_fcm(Assembler::LT, p4, __ D, p7, z14, 0.0); // fcmlt p4.d, p7/z, z14.d, #0.0 + __ sve_fcm(Assembler::LE, p0, __ S, p5, z20, 0.0); // fcmle p0.s, p5/z, z20.s, #0.0 + __ sve_fcm(Assembler::NE, p11, __ D, p6, z27, 0.0); // fcmne p11.d, p6/z, z27.d, #0.0 // SVEComparisonWithImm - __ sve_cmp(Assembler::EQ, p4, __ D, p4, z6, 11); // cmpeq p4.d, p4/z, z6.d, #11 - __ sve_cmp(Assembler::GT, p14, __ B, p2, z30, 4); // cmpgt p14.b, p2/z, z30.b, #4 - __ sve_cmp(Assembler::GE, p5, __ D, p4, z4, 1); // cmpge p5.d, p4/z, z4.d, #1 - __ sve_cmp(Assembler::LT, p11, __ D, p3, z3, 6); // cmplt p11.d, p3/z, z3.d, #6 - __ sve_cmp(Assembler::LE, p9, __ S, p0, z19, -1); // cmple p9.s, p0/z, z19.s, #-1 - __ sve_cmp(Assembler::NE, p3, __ S, p2, z12, -3); // cmpne p3.s, p2/z, z12.s, #-3 - __ sve_cmp(Assembler::HS, p11, __ D, p4, z1, 20); // cmphs p11.d, p4/z, z1.d, #20 - __ sve_cmp(Assembler::HI, p8, __ S, p5, z2, 53); // cmphi p8.s, p5/z, z2.s, #53 - __ sve_cmp(Assembler::LS, p5, __ D, p6, z21, 49); // cmpls p5.d, p6/z, z21.d, #49 - __ sve_cmp(Assembler::LO, p13, __ B, p7, z3, 97); // cmplo p13.b, p7/z, z3.b, #97 + __ sve_cmp(Assembler::EQ, p12, __ B, p5, z4, 0); // cmpeq p12.b, p5/z, z4.b, #0 + __ sve_cmp(Assembler::GT, p15, __ H, p2, z5, 12); // cmpgt p15.h, p2/z, z5.h, #12 + __ sve_cmp(Assembler::GE, p7, __ S, p7, z28, 3); // cmpge p7.s, p7/z, z28.s, #3 + __ sve_cmp(Assembler::LT, p15, __ H, p4, z5, 15); // cmplt p15.h, p4/z, z5.h, #15 + __ sve_cmp(Assembler::LE, p9, __ S, p4, z26, -4); // cmple p9.s, p4/z, z26.s, #-4 + __ sve_cmp(Assembler::NE, p5, __ B, p7, z9, 1); // cmpne p5.b, p7/z, z9.b, #1 + __ sve_cmp(Assembler::HS, p13, __ D, p1, z27, 43); // cmphs p13.d, p1/z, z27.d, #43 + __ sve_cmp(Assembler::HI, p10, __ B, p6, z9, 70); // cmphi p10.b, p6/z, z9.b, #70 + __ sve_cmp(Assembler::LS, p8, __ B, p7, z22, 61); // cmpls p8.b, p7/z, z22.b, #61 + __ sve_cmp(Assembler::LO, p11, __ S, p5, z17, 11); // cmplo p11.s, p5/z, z17.s, #11 // SpecialCases __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE @@ -1086,229 +1071,215 @@ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 // LSEOp - __ swp(Assembler::xword, r19, r17, r9); // swp x19, x17, [x9] - __ ldadd(Assembler::xword, r28, r27, r15); // ldadd x28, x27, [x15] - __ ldbic(Assembler::xword, r7, r21, r23); // ldclr x7, x21, [x23] - __ ldeor(Assembler::xword, zr, r25, r2); // ldeor xzr, x25, [x2] - __ ldorr(Assembler::xword, zr, r27, r15); // ldset xzr, x27, [x15] - __ ldsmin(Assembler::xword, r10, r23, r19); // ldsmin x10, x23, [x19] - __ ldsmax(Assembler::xword, r3, r16, r0); // ldsmax x3, x16, [x0] - __ ldumin(Assembler::xword, r25, r26, r23); // ldumin x25, x26, [x23] - __ ldumax(Assembler::xword, r2, r16, r12); // ldumax x2, x16, [x12] + __ swp(Assembler::xword, r15, r6, r12); // swp x15, x6, [x12] + __ ldadd(Assembler::xword, r16, r11, r13); // ldadd x16, x11, [x13] + __ ldbic(Assembler::xword, r23, r1, r30); // ldclr x23, x1, [x30] + __ ldeor(Assembler::xword, r19, r5, r17); // ldeor x19, x5, [x17] + __ ldorr(Assembler::xword, r2, r16, r22); // ldset x2, x16, [x22] + __ ldsmin(Assembler::xword, r13, r10, r21); // ldsmin x13, x10, [x21] + __ ldsmax(Assembler::xword, r29, r27, r12); // ldsmax x29, x27, [x12] + __ ldumin(Assembler::xword, r27, r3, r1); // ldumin x27, x3, [x1] + __ ldumax(Assembler::xword, zr, r24, r19); // ldumax xzr, x24, [x19] // LSEOp - __ swpa(Assembler::xword, r4, r28, r30); // swpa x4, x28, [x30] - __ ldadda(Assembler::xword, r29, r16, r27); // ldadda x29, x16, [x27] - __ ldbica(Assembler::xword, r6, r9, r29); // ldclra x6, x9, [x29] - __ ldeora(Assembler::xword, r16, r7, r4); // ldeora x16, x7, [x4] - __ ldorra(Assembler::xword, r7, r15, r9); // ldseta x7, x15, [x9] - __ ldsmina(Assembler::xword, r23, r8, r2); // ldsmina x23, x8, [x2] - __ ldsmaxa(Assembler::xword, r28, r21, sp); // ldsmaxa x28, x21, [sp] - __ ldumina(Assembler::xword, r5, r27, r0); // ldumina x5, x27, [x0] - __ ldumaxa(Assembler::xword, r17, r15, r4); // ldumaxa x17, x15, [x4] + __ swpa(Assembler::xword, r17, r9, r28); // swpa x17, x9, [x28] + __ ldadda(Assembler::xword, r27, r15, r7); // ldadda x27, x15, [x7] + __ ldbica(Assembler::xword, r21, r23, sp); // ldclra x21, x23, [sp] + __ ldeora(Assembler::xword, r25, r2, sp); // ldeora x25, x2, [sp] + __ ldorra(Assembler::xword, r27, r16, r10); // ldseta x27, x16, [x10] + __ ldsmina(Assembler::xword, r23, r19, r3); // ldsmina x23, x19, [x3] + __ ldsmaxa(Assembler::xword, r16, r0, r25); // ldsmaxa x16, x0, [x25] + __ ldumina(Assembler::xword, r26, r23, r2); // ldumina x26, x23, [x2] + __ ldumaxa(Assembler::xword, r16, r12, r4); // ldumaxa x16, x12, [x4] // LSEOp - __ swpal(Assembler::xword, r26, r8, r28); // swpal x26, x8, [x28] - __ ldaddal(Assembler::xword, r22, r27, r27); // ldaddal x22, x27, [x27] - __ ldbical(Assembler::xword, r25, r23, r0); // ldclral x25, x23, [x0] - __ ldeoral(Assembler::xword, r4, r6, r15); // ldeoral x4, x6, [x15] - __ ldorral(Assembler::xword, r0, r4, r15); // ldsetal x0, x4, [x15] - __ ldsminal(Assembler::xword, r1, r10, r7); // ldsminal x1, x10, [x7] - __ ldsmaxal(Assembler::xword, r5, r10, r28); // ldsmaxal x5, x10, [x28] - __ lduminal(Assembler::xword, r7, r20, r23); // lduminal x7, x20, [x23] - __ ldumaxal(Assembler::xword, r21, r6, r11); // ldumaxal x21, x6, [x11] + __ swpal(Assembler::xword, r28, r30, r29); // swpal x28, x30, [x29] + __ ldaddal(Assembler::xword, r16, r27, r6); // ldaddal x16, x27, [x6] + __ ldbical(Assembler::xword, r9, r29, r15); // ldclral x9, x29, [x15] + __ ldeoral(Assembler::xword, r7, r4, r7); // ldeoral x7, x4, [x7] + __ ldorral(Assembler::xword, r15, r9, r23); // ldsetal x15, x9, [x23] + __ ldsminal(Assembler::xword, r8, r2, r28); // ldsminal x8, x2, [x28] + __ ldsmaxal(Assembler::xword, r21, zr, r5); // ldsmaxal x21, xzr, [x5] + __ lduminal(Assembler::xword, r27, r0, r17); // lduminal x27, x0, [x17] + __ ldumaxal(Assembler::xword, r15, r4, r26); // ldumaxal x15, x4, [x26] // LSEOp - __ swpl(Assembler::xword, r8, r17, sp); // swpl x8, x17, [sp] - __ ldaddl(Assembler::xword, r6, r17, r2); // ldaddl x6, x17, [x2] - __ ldbicl(Assembler::xword, r12, r30, r29); // ldclrl x12, x30, [x29] - __ ldeorl(Assembler::xword, r3, r27, r22); // ldeorl x3, x27, [x22] - __ ldorrl(Assembler::xword, r29, r14, r13); // ldsetl x29, x14, [x13] - __ ldsminl(Assembler::xword, r28, r17, r24); // ldsminl x28, x17, [x24] - __ ldsmaxl(Assembler::xword, r5, r2, r14); // ldsmaxl x5, x2, [x14] - __ lduminl(Assembler::xword, r10, r16, r11); // lduminl x10, x16, [x11] - __ ldumaxl(Assembler::xword, r27, r23, r12); // ldumaxl x27, x23, [x12] + __ swpl(Assembler::xword, r8, r28, r22); // swpl x8, x28, [x22] + __ ldaddl(Assembler::xword, r27, r27, r25); // ldaddl x27, x27, [x25] + __ ldbicl(Assembler::xword, r23, r0, r4); // ldclrl x23, x0, [x4] + __ ldeorl(Assembler::xword, r6, r16, r0); // ldeorl x6, x16, [x0] + __ ldorrl(Assembler::xword, r4, r15, r1); // ldsetl x4, x15, [x1] + __ ldsminl(Assembler::xword, r10, r7, r5); // ldsminl x10, x7, [x5] + __ ldsmaxl(Assembler::xword, r10, r28, r7); // ldsmaxl x10, x28, [x7] + __ lduminl(Assembler::xword, r20, r23, r21); // lduminl x20, x23, [x21] + __ ldumaxl(Assembler::xword, r6, r11, r8); // ldumaxl x6, x11, [x8] // LSEOp - __ swp(Assembler::word, r4, r22, r17); // swp w4, w22, [x17] - __ ldadd(Assembler::word, r4, r1, r19); // ldadd w4, w1, [x19] - __ ldbic(Assembler::word, r16, r16, r13); // ldclr w16, w16, [x13] - __ ldeor(Assembler::word, r14, r12, r2); // ldeor w14, w12, [x2] - __ ldorr(Assembler::word, r17, r3, r21); // ldset w17, w3, [x21] - __ ldsmin(Assembler::word, r23, r5, r6); // ldsmin w23, w5, [x6] - __ ldsmax(Assembler::word, r7, r19, r13); // ldsmax w7, w19, [x13] - __ ldumin(Assembler::word, r28, r17, r16); // ldumin w28, w17, [x16] - __ ldumax(Assembler::word, r6, r2, r29); // ldumax w6, w2, [x29] + __ swp(Assembler::word, r17, zr, r6); // swp w17, wzr, [x6] + __ ldadd(Assembler::word, r17, r2, r12); // ldadd w17, w2, [x12] + __ ldbic(Assembler::word, r30, r29, r3); // ldclr w30, w29, [x3] + __ ldeor(Assembler::word, r27, r22, r29); // ldeor w27, w22, [x29] + __ ldorr(Assembler::word, r14, r13, r28); // ldset w14, w13, [x28] + __ ldsmin(Assembler::word, r17, r24, r5); // ldsmin w17, w24, [x5] + __ ldsmax(Assembler::word, r2, r14, r10); // ldsmax w2, w14, [x10] + __ ldumin(Assembler::word, r16, r11, r27); // ldumin w16, w11, [x27] + __ ldumax(Assembler::word, r23, r12, r4); // ldumax w23, w12, [x4] // LSEOp - __ swpa(Assembler::word, r3, r4, r6); // swpa w3, w4, [x6] - __ ldadda(Assembler::word, r16, r20, r13); // ldadda w16, w20, [x13] - __ ldbica(Assembler::word, r12, r20, r8); // ldclra w12, w20, [x8] - __ ldeora(Assembler::word, r25, r20, r19); // ldeora w25, w20, [x19] - __ ldorra(Assembler::word, r0, r11, r24); // ldseta w0, w11, [x24] - __ ldsmina(Assembler::word, r6, r20, sp); // ldsmina w6, w20, [sp] - __ ldsmaxa(Assembler::word, r14, r16, r6); // ldsmaxa w14, w16, [x6] - __ ldumina(Assembler::word, r0, r7, r15); // ldumina w0, w7, [x15] - __ ldumaxa(Assembler::word, r19, r26, r9); // ldumaxa w19, w26, [x9] + __ swpa(Assembler::word, r22, r17, r4); // swpa w22, w17, [x4] + __ ldadda(Assembler::word, r1, r19, r16); // ldadda w1, w19, [x16] + __ ldbica(Assembler::word, r16, r13, r14); // ldclra w16, w13, [x14] + __ ldeora(Assembler::word, r12, r2, r17); // ldeora w12, w2, [x17] + __ ldorra(Assembler::word, r3, r21, r23); // ldseta w3, w21, [x23] + __ ldsmina(Assembler::word, r5, r6, r7); // ldsmina w5, w6, [x7] + __ ldsmaxa(Assembler::word, r19, r13, r28); // ldsmaxa w19, w13, [x28] + __ ldumina(Assembler::word, r17, r16, r6); // ldumina w17, w16, [x6] + __ ldumaxa(Assembler::word, r2, r29, r3); // ldumaxa w2, w29, [x3] // LSEOp - __ swpal(Assembler::word, r10, r23, r21); // swpal w10, w23, [x21] - __ ldaddal(Assembler::word, r22, r28, r2); // ldaddal w22, w28, [x2] - __ ldbical(Assembler::word, r3, r15, r19); // ldclral w3, w15, [x19] - __ ldeoral(Assembler::word, r20, r7, r4); // ldeoral w20, w7, [x4] - __ ldorral(Assembler::word, r29, r7, r0); // ldsetal w29, w7, [x0] - __ ldsminal(Assembler::word, r9, r16, r20); // ldsminal w9, w16, [x20] - __ ldsmaxal(Assembler::word, r23, r4, r16); // ldsmaxal w23, w4, [x16] - __ lduminal(Assembler::word, r10, r23, r11); // lduminal w10, w23, [x11] - __ ldumaxal(Assembler::word, r25, r6, sp); // ldumaxal w25, w6, [sp] + __ swpal(Assembler::word, r4, r6, r15); // swpal w4, w6, [x15] + __ ldaddal(Assembler::word, r20, r13, r12); // ldaddal w20, w13, [x12] + __ ldbical(Assembler::word, r20, r8, r25); // ldclral w20, w8, [x25] + __ ldeoral(Assembler::word, r20, r19, r0); // ldeoral w20, w19, [x0] + __ ldorral(Assembler::word, r11, r24, r6); // ldsetal w11, w24, [x6] + __ ldsminal(Assembler::word, r20, zr, r14); // ldsminal w20, wzr, [x14] + __ ldsmaxal(Assembler::word, r16, r6, r0); // ldsmaxal w16, w6, [x0] + __ lduminal(Assembler::word, r7, r15, r19); // lduminal w7, w15, [x19] + __ ldumaxal(Assembler::word, r26, r9, r10); // ldumaxal w26, w9, [x10] // LSEOp - __ swpl(Assembler::word, r16, r13, r23); // swpl w16, w13, [x23] - __ ldaddl(Assembler::word, r12, r1, r14); // ldaddl w12, w1, [x14] - __ ldbicl(Assembler::word, r9, r21, r16); // ldclrl w9, w21, [x16] - __ ldeorl(Assembler::word, r26, r15, r4); // ldeorl w26, w15, [x4] - __ ldorrl(Assembler::word, r4, r16, r8); // ldsetl w4, w16, [x8] - __ ldsminl(Assembler::word, r6, r30, r4); // ldsminl w6, w30, [x4] - __ ldsmaxl(Assembler::word, r29, r17, r29); // ldsmaxl w29, w17, [x29] - __ lduminl(Assembler::word, r26, r9, r15); // lduminl w26, w9, [x15] - __ ldumaxl(Assembler::word, r2, r11, r29); // ldumaxl w2, w11, [x29] + __ swpl(Assembler::word, r23, r21, r22); // swpl w23, w21, [x22] + __ ldaddl(Assembler::word, r28, r2, r3); // ldaddl w28, w2, [x3] + __ ldbicl(Assembler::word, r15, r19, r20); // ldclrl w15, w19, [x20] + __ ldeorl(Assembler::word, r7, r4, r29); // ldeorl w7, w4, [x29] + __ ldorrl(Assembler::word, r7, r0, r9); // ldsetl w7, w0, [x9] + __ ldsminl(Assembler::word, r16, r20, r23); // ldsminl w16, w20, [x23] + __ ldsmaxl(Assembler::word, r4, r16, r10); // ldsmaxl w4, w16, [x10] + __ lduminl(Assembler::word, r23, r11, r25); // lduminl w23, w11, [x25] + __ ldumaxl(Assembler::word, r6, zr, r16); // ldumaxl w6, wzr, [x16] // SHA3SIMDOp - __ bcax(v3, __ T16B, v7, v1, v27); // bcax v3.16B, v7.16B, v1.16B, v27.16B - __ eor3(v21, __ T16B, v18, v14, v8); // eor3 v21.16B, v18.16B, v14.16B, v8.16B - __ rax1(v18, __ T2D, v22, v25); // rax1 v18.2D, v22.2D, v25.2D - __ xar(v5, __ T2D, v20, v21, 37); // xar v5.2D, v20.2D, v21.2D, #37 + __ bcax(v13, __ T16B, v22, v11, v1); // bcax v13.16B, v22.16B, v11.16B, v1.16B + __ eor3(v13, __ T16B, v8, v20, v16); // eor3 v13.16B, v8.16B, v20.16B, v16.16B + __ rax1(v25, __ T2D, v15, v4); // rax1 v25.2D, v15.2D, v4.2D + __ xar(v4, __ T2D, v17, v8, 13); // xar v4.2D, v17.2D, v8.2D, #13 // SHA512SIMDOp - __ sha512h(v23, __ T2D, v16, v30); // sha512h q23, q16, v30.2D - __ sha512h2(v20, __ T2D, v20, v0); // sha512h2 q20, q20, v0.2D - __ sha512su0(v4, __ T2D, v19); // sha512su0 v4.2D, v19.2D - __ sha512su1(v24, __ T2D, v4, v20); // sha512su1 v24.2D, v4.2D, v20.2D + __ sha512h(v29, __ T2D, v4, v28); // sha512h q29, q4, v28.2D + __ sha512h2(v16, __ T2D, v29, v26); // sha512h2 q16, q29, v26.2D + __ sha512su0(v9, __ T2D, v14); // sha512su0 v9.2D, v14.2D + __ sha512su1(v2, __ T2D, v11, v28); // sha512su1 v2.2D, v11.2D, v28.2D // SVEBinaryImmOp - __ sve_add(z4, __ D, 210u); // add z4.d, z4.d, #0xd2 - __ sve_sub(z19, __ B, 71u); // sub z19.b, z19.b, #0x47 - __ sve_and(z8, __ H, 49663u); // and z8.h, z8.h, #0xc1ff - __ sve_eor(z31, __ S, 4294967231u); // eor z31.s, z31.s, #0xffffffbf - __ sve_orr(z1, __ H, 16368u); // orr z1.h, z1.h, #0x3ff0 + __ sve_add(z3, __ B, 10u); // add z3.b, z3.b, #0xa + __ sve_sub(z26, __ S, 150u); // sub z26.s, z26.s, #0x96 + __ sve_and(z14, __ H, 57343u); // and z14.h, z14.h, #0xdfff + __ sve_eor(z24, __ B, 191u); // eor z24.b, z24.b, #0xbf + __ sve_orr(z17, __ S, 4294966791u); // orr z17.s, z17.s, #0xfffffe07 // SVEBinaryImmOp - __ sve_add(z0, __ H, 61u); // add z0.h, z0.h, #0x3d - __ sve_sub(z24, __ S, 36u); // sub z24.s, z24.s, #0x24 - __ sve_and(z27, __ B, 243u); // and z27.b, z27.b, #0xf3 - __ sve_eor(z24, __ H, 65534u); // eor z24.h, z24.h, #0xfffe - __ sve_orr(z22, __ S, 4294967293u); // orr z22.s, z22.s, #0xfffffffd + __ sve_add(z20, __ S, 3u); // add z20.s, z20.s, #0x3 + __ sve_sub(z4, __ S, 196u); // sub z4.s, z4.s, #0xc4 + __ sve_and(z4, __ S, 4286578691u); // and z4.s, z4.s, #0xff800003 + __ sve_eor(z25, __ S, 33553408u); // eor z25.s, z25.s, #0x1fffc00 + __ sve_orr(z8, __ H, 49663u); // orr z8.h, z8.h, #0xc1ff // SVEBinaryImmOp - __ sve_add(z29, __ H, 113u); // add z29.h, z29.h, #0x71 - __ sve_sub(z20, __ B, 165u); // sub z20.b, z20.b, #0xa5 - __ sve_and(z28, __ H, 32256u); // and z28.h, z28.h, #0x7e00 - __ sve_eor(z12, __ S, 4287102855u); // eor z12.s, z12.s, #0xff87ff87 - __ sve_orr(z9, __ S, 3825205247u); // orr z9.s, z9.s, #0xe3ffffff + __ sve_add(z30, __ S, 36u); // add z30.s, z30.s, #0x24 + __ sve_sub(z30, __ B, 85u); // sub z30.b, z30.b, #0x55 + __ sve_and(z19, __ H, 4032u); // and z19.h, z19.h, #0xfc0 + __ sve_eor(z7, __ D, 274877904896u); // eor z7.d, z7.d, #0x3ffffff800 + __ sve_orr(z27, __ B, 243u); // orr z27.b, z27.b, #0xf3 // SVEBinaryImmOp - __ sve_add(z18, __ S, 41u); // add z18.s, z18.s, #0x29 - __ sve_sub(z0, __ B, 98u); // sub z0.b, z0.b, #0x62 - __ sve_and(z8, __ H, 32768u); // and z8.h, z8.h, #0x8000 - __ sve_eor(z4, __ H, 508u); // eor z4.h, z4.h, #0x1fc - __ sve_orr(z0, __ H, 64512u); // orr z0.h, z0.h, #0xfc00 + __ sve_add(z23, __ H, 132u); // add z23.h, z23.h, #0x84 + __ sve_sub(z30, __ S, 183u); // sub z30.s, z30.s, #0xb7 + __ sve_and(z20, __ D, 4503599627354112u); // and z20.d, z20.d, #0xfffffffffc000 + __ sve_eor(z13, __ S, 4042322160u); // eor z13.s, z13.s, #0xf0f0f0f0 + __ sve_orr(z28, __ H, 32256u); // orr z28.h, z28.h, #0x7e00 // SVEBinaryImmOp - __ sve_add(z3, __ B, 79u); // add z3.b, z3.b, #0x4f - __ sve_sub(z19, __ D, 84u); // sub z19.d, z19.d, #0x54 - __ sve_and(z24, __ B, 62u); // and z24.b, z24.b, #0x3e - __ sve_eor(z24, __ D, 18428729675200069887u); // eor z24.d, z24.d, #0xffc00000000000ff - __ sve_orr(z11, __ D, 17296056810822168583u); // orr z11.d, z11.d, #0xf007f007f007f007 + __ sve_add(z11, __ S, 13u); // add z11.s, z11.s, #0xd + __ sve_sub(z24, __ H, 159u); // sub z24.h, z24.h, #0x9f + __ sve_and(z13, __ S, 2151677951u); // and z13.s, z13.s, #0x803fffff + __ sve_eor(z4, __ B, 124u); // eor z4.b, z4.b, #0x7c + __ sve_orr(z7, __ H, 32768u); // orr z7.h, z7.h, #0x8000 // SVEBinaryImmOp - __ sve_add(z31, __ S, 115u); // add z31.s, z31.s, #0x73 - __ sve_sub(z3, __ D, 134u); // sub z3.d, z3.d, #0x86 - __ sve_and(z22, __ S, 4042322160u); // and z22.s, z22.s, #0xf0f0f0f0 - __ sve_eor(z3, __ B, 225u); // eor z3.b, z3.b, #0xe1 - __ sve_orr(z9, __ S, 4164941887u); // orr z9.s, z9.s, #0xf83ff83f + __ sve_add(z4, __ H, 243u); // add z4.h, z4.h, #0xf3 + __ sve_sub(z5, __ B, 86u); // sub z5.b, z5.b, #0x56 + __ sve_and(z21, __ D, 8064u); // and z21.d, z21.d, #0x1f80 + __ sve_eor(z9, __ S, 130023424u); // eor z9.s, z9.s, #0x7c00000 + __ sve_orr(z24, __ B, 62u); // orr z24.b, z24.b, #0x3e // SVEVectorOp - __ sve_add(z0, __ D, z4, z2); // add z0.d, z4.d, z2.d - __ sve_sub(z14, __ S, z6, z11); // sub z14.s, z6.s, z11.s - __ sve_fadd(z14, __ S, z17, z30); // fadd z14.s, z17.s, z30.s - __ sve_fmul(z3, __ S, z3, z23); // fmul z3.s, z3.s, z23.s - __ sve_fsub(z3, __ S, z24, z28); // fsub z3.s, z24.s, z28.s - __ sve_abs(z19, __ D, p5, z7); // abs z19.d, p5/m, z7.d - __ sve_add(z21, __ H, p3, z5); // add z21.h, p3/m, z21.h, z5.h - __ sve_and(z26, __ S, p1, z22); // and z26.s, p1/m, z26.s, z22.s - __ sve_asr(z17, __ H, p0, z3); // asr z17.h, p0/m, z17.h, z3.h - __ sve_bic(z20, __ H, p3, z8); // bic z20.h, p3/m, z20.h, z8.h - __ sve_clz(z14, __ H, p4, z17); // clz z14.h, p4/m, z17.h - __ sve_cnt(z13, __ D, p6, z18); // cnt z13.d, p6/m, z18.d - __ sve_eor(z19, __ H, p2, z16); // eor z19.h, p2/m, z19.h, z16.h - __ sve_lsl(z27, __ S, p5, z28); // lsl z27.s, p5/m, z27.s, z28.s - __ sve_lsr(z8, __ D, p2, z5); // lsr z8.d, p2/m, z8.d, z5.d - __ sve_mul(z28, __ H, p2, z0); // mul z28.h, p2/m, z28.h, z0.h - __ sve_neg(z25, __ B, p5, z21); // neg z25.b, p5/m, z21.b - __ sve_not(z3, __ B, p5, z26); // not z3.b, p5/m, z26.b - __ sve_orr(z26, __ S, p7, z19); // orr z26.s, p7/m, z26.s, z19.s - __ sve_rbit(z1, __ D, p3, z14); // rbit z1.d, p3/m, z14.d - __ sve_revb(z14, __ H, p0, z18); // revb z14.h, p0/m, z18.h - __ sve_smax(z31, __ S, p5, z23); // smax z31.s, p5/m, z31.s, z23.s - __ sve_smin(z30, __ B, p3, z8); // smin z30.b, p3/m, z30.b, z8.b - __ sve_sub(z0, __ S, p3, z23); // sub z0.s, p3/m, z0.s, z23.s - __ sve_fabs(z0, __ D, p4, z26); // fabs z0.d, p4/m, z26.d - __ sve_fadd(z24, __ D, p3, z22); // fadd z24.d, p3/m, z24.d, z22.d - __ sve_fdiv(z2, __ D, p0, z11); // fdiv z2.d, p0/m, z2.d, z11.d - __ sve_fmax(z12, __ D, p5, z24); // fmax z12.d, p5/m, z12.d, z24.d - __ sve_fmin(z9, __ D, p7, z17); // fmin z9.d, p7/m, z9.d, z17.d - __ sve_fmul(z20, __ D, p5, z4); // fmul z20.d, p5/m, z20.d, z4.d - __ sve_fneg(z13, __ D, p7, z22); // fneg z13.d, p7/m, z22.d - __ sve_frintm(z31, __ D, p6, z18); // frintm z31.d, p6/m, z18.d - __ sve_frintn(z15, __ D, p2, z13); // frintn z15.d, p2/m, z13.d - __ sve_frintp(z20, __ S, p1, z1); // frintp z20.s, p1/m, z1.s - __ sve_fsqrt(z14, __ S, p0, z7); // fsqrt z14.s, p0/m, z7.s - __ sve_fsub(z12, __ D, p4, z4); // fsub z12.d, p4/m, z12.d, z4.d - __ sve_fmad(z15, __ S, p0, z3, z30); // fmad z15.s, p0/m, z3.s, z30.s - __ sve_fmla(z20, __ D, p1, z20, z31); // fmla z20.d, p1/m, z20.d, z31.d - __ sve_fmls(z13, __ D, p3, z9, z14); // fmls z13.d, p3/m, z9.d, z14.d - __ sve_fmsb(z1, __ S, p3, z28, z3); // fmsb z1.s, p3/m, z28.s, z3.s - __ sve_fnmad(z26, __ S, p2, z25, z9); // fnmad z26.s, p2/m, z25.s, z9.s - __ sve_fnmsb(z26, __ D, p2, z14, z1); // fnmsb z26.d, p2/m, z14.d, z1.d - __ sve_fnmla(z26, __ D, p1, z29, z20); // fnmla z26.d, p1/m, z29.d, z20.d - __ sve_fnmls(z6, __ D, p7, z13, z1); // fnmls z6.d, p7/m, z13.d, z1.d - __ sve_mla(z11, __ B, p2, z1, z1); // mla z11.b, p2/m, z1.b, z1.b - __ sve_mls(z27, __ B, p6, z15, z2); // mls z27.b, p6/m, z15.b, z2.b - __ sve_and(z30, z17, z25); // and z30.d, z17.d, z25.d - __ sve_eor(z2, z24, z3); // eor z2.d, z24.d, z3.d - __ sve_orr(z29, z13, z3); // orr z29.d, z13.d, z3.d - __ sve_bic(z14, z16, z28); // bic z14.d, z16.d, z28.d - __ sve_uzp1(z4, __ S, z11, z27); // uzp1 z4.s, z11.s, z27.s - __ sve_uzp2(z2, __ D, z16, z1); // uzp2 z2.d, z16.d, z1.d - __ sve_fabd(z7, __ D, p5, z31); // fabd z7.d, p5/m, z7.d, z31.d - __ sve_bext(z16, __ S, z10, z22); // bext z16.s, z10.s, z22.s - __ sve_bdep(z29, __ B, z7, z22); // bdep z29.b, z7.b, z22.b - __ sve_eor3(z12, z24, z11); // eor3 z12.d, z12.d, z24.d, z11.d + __ sve_add(z23, __ S, z28, z13); // add z23.s, z28.s, z13.s + __ sve_sub(z10, __ S, z26, z12); // sub z10.s, z26.s, z12.s + __ sve_fadd(z30, __ S, z17, z14); // fadd z30.s, z17.s, z14.s + __ sve_fmul(z29, __ D, z16, z21); // fmul z29.d, z16.d, z21.d + __ sve_fsub(z7, __ S, z19, z2); // fsub z7.s, z19.s, z2.s + __ sve_abs(z26, __ S, p4, z9); // abs z26.s, p4/m, z9.s + __ sve_add(z17, __ B, p5, z0); // add z17.b, p5/m, z17.b, z0.b + __ sve_and(z2, __ B, p6, z14); // and z2.b, p6/m, z2.b, z14.b + __ sve_asr(z11, __ S, p5, z14); // asr z11.s, p5/m, z11.s, z14.s + __ sve_bic(z29, __ B, p3, z3); // bic z29.b, p3/m, z29.b, z3.b + __ sve_clz(z22, __ D, p2, z3); // clz z22.d, p2/m, z3.d + __ sve_cnt(z27, __ S, p0, z19); // cnt z27.s, p0/m, z19.s + __ sve_eor(z7, __ H, p6, z21); // eor z7.h, p6/m, z7.h, z21.h + __ sve_lsl(z5, __ B, p2, z25); // lsl z5.b, p2/m, z5.b, z25.b + __ sve_lsr(z21, __ B, p4, z17); // lsr z21.b, p4/m, z21.b, z17.b + __ sve_mul(z3, __ H, p2, z19); // mul z3.h, p2/m, z3.h, z19.h + __ sve_neg(z7, __ S, p3, z14); // neg z7.s, p3/m, z14.s + __ sve_not(z17, __ D, p2, z13); // not z17.d, p2/m, z13.d + __ sve_orr(z17, __ H, p7, z17); // orr z17.h, p7/m, z17.h, z17.h + __ sve_rbit(z15, __ S, p3, z26); // rbit z15.s, p3/m, z26.s + __ sve_revb(z27, __ H, p5, z7); // revb z27.h, p5/m, z7.h + __ sve_smax(z5, __ H, p7, z27); // smax z5.h, p7/m, z5.h, z27.h + __ sve_smin(z0, __ S, p3, z24); // smin z0.s, p3/m, z0.s, z24.s + __ sve_sub(z20, __ S, p0, z3); // sub z20.s, p0/m, z20.s, z3.s + __ sve_fabs(z25, __ D, p1, z25); // fabs z25.d, p1/m, z25.d + __ sve_fadd(z17, __ S, p4, z1); // fadd z17.s, p4/m, z17.s, z1.s + __ sve_fdiv(z14, __ S, p7, z13); // fdiv z14.s, p7/m, z14.s, z13.s + __ sve_fmax(z17, __ D, p0, z30); // fmax z17.d, p0/m, z17.d, z30.d + __ sve_fmin(z22, __ S, p5, z29); // fmin z22.s, p5/m, z22.s, z29.s + __ sve_fmul(z8, __ S, p0, z0); // fmul z8.s, p0/m, z8.s, z0.s + __ sve_fneg(z23, __ D, p5, z0); // fneg z23.d, p5/m, z0.d + __ sve_frintm(z25, __ S, p6, z23); // frintm z25.s, p6/m, z23.s + __ sve_frintn(z21, __ S, p5, z1); // frintn z21.s, p5/m, z1.s + __ sve_frintp(z10, __ D, p5, z11); // frintp z10.d, p5/m, z11.d + __ sve_fsqrt(z23, __ D, p6, z8); // fsqrt z23.d, p6/m, z8.d + __ sve_fsub(z17, __ D, p5, z19); // fsub z17.d, p5/m, z17.d, z19.d + __ sve_fmad(z4, __ D, p5, z13, z30); // fmad z4.d, p5/m, z13.d, z30.d + __ sve_fmla(z30, __ D, p7, z25, z17); // fmla z30.d, p7/m, z25.d, z17.d + __ sve_fmls(z14, __ D, p2, z12, z28); // fmls z14.d, p2/m, z12.d, z28.d + __ sve_fmsb(z5, __ S, p0, z13, z13); // fmsb z5.s, p0/m, z13.s, z13.s + __ sve_fnmad(z7, __ S, p2, z11, z19); // fnmad z7.s, p2/m, z11.s, z19.s + __ sve_fnmsb(z25, __ D, p3, z2, z3); // fnmsb z25.d, p3/m, z2.d, z3.d + __ sve_fnmla(z0, __ D, p5, z5, z20); // fnmla z0.d, p5/m, z5.d, z20.d + __ sve_fnmls(z28, __ S, p3, z13, z8); // fnmls z28.s, p3/m, z13.s, z8.s + __ sve_mla(z29, __ B, p0, z14, z27); // mla z29.b, p0/m, z14.b, z27.b + __ sve_mls(z3, __ H, p6, z8, z24); // mls z3.h, p6/m, z8.h, z24.h + __ sve_and(z1, z25, z10); // and z1.d, z25.d, z10.d + __ sve_eor(z1, z20, z25); // eor z1.d, z20.d, z25.d + __ sve_orr(z28, z19, z16); // orr z28.d, z19.d, z16.d + __ sve_bic(z27, z13, z1); // bic z27.d, z13.d, z1.d + __ sve_uzp1(z11, __ B, z9, z1); // uzp1 z11.b, z9.b, z1.b + __ sve_uzp2(z1, __ H, z27, z26); // uzp2 z1.h, z27.h, z26.h + __ sve_fabd(z2, __ D, p1, z29); // fabd z2.d, p1/m, z2.d, z29.d + __ sve_bext(z24, __ D, z2, z2); // bext z24.d, z2.d, z2.d + __ sve_bdep(z3, __ H, z25, z28); // bdep z3.h, z25.h, z28.h + __ sve_eor3(z3, z22, z13); // eor3 z3.d, z3.d, z22.d, z13.d // SVEReductionOp - __ sve_andv(v11, __ B, p2, z0); // andv b11, p2, z0.b - __ sve_orv(v23, __ B, p5, z20); // orv b23, p5, z20.b - __ sve_eorv(v3, __ B, p3, z15); // eorv b3, p3, z15.b - __ sve_smaxv(v30, __ B, p6, z27); // smaxv b30, p6, z27.b - __ sve_sminv(v21, __ D, p6, z10); // sminv d21, p6, z10.d - __ sve_fminv(v3, __ S, p6, z4); // fminv s3, p6, z4.s - __ sve_fmaxv(v6, __ S, p0, z21); // fmaxv s6, p0, z21.s - __ sve_fadda(v25, __ D, p6, z30); // fadda d25, p6, d25, z30.d - __ sve_uaddv(v31, __ H, p4, z1); // uaddv d31, p4, z1.h - -// AddWideNEONOp - __ saddwv(v12, v13, __ T8H, v14, __ T8B); // saddw v12.8H, v13.8H, v14.8B - __ saddwv2(v30, v31, __ T8H, v0, __ T16B); // saddw2 v30.8H, v31.8H, v0.16B - __ saddwv(v13, v14, __ T4S, v15, __ T4H); // saddw v13.4S, v14.4S, v15.4H - __ saddwv2(v8, v9, __ T4S, v10, __ T8H); // saddw2 v8.4S, v9.4S, v10.8H - __ saddwv(v25, v26, __ T2D, v27, __ T2S); // saddw v25.2D, v26.2D, v27.2S - __ saddwv2(v29, v30, __ T2D, v31, __ T4S); // saddw2 v29.2D, v30.2D, v31.4S - __ uaddwv(v1, v2, __ T8H, v3, __ T8B); // uaddw v1.8H, v2.8H, v3.8B - __ uaddwv2(v31, v0, __ T8H, v1, __ T16B); // uaddw2 v31.8H, v0.8H, v1.16B - __ uaddwv(v23, v24, __ T4S, v25, __ T4H); // uaddw v23.4S, v24.4S, v25.4H - __ uaddwv2(v31, v0, __ T4S, v1, __ T8H); // uaddw2 v31.4S, v0.4S, v1.8H - __ uaddwv(v20, v21, __ T2D, v22, __ T2S); // uaddw v20.2D, v21.2D, v22.2S - __ uaddwv2(v0, v1, __ T2D, v2, __ T4S); // uaddw2 v0.2D, v1.2D, v2.4S + __ sve_andv(v27, __ H, p4, z4); // andv h27, p4, z4.h + __ sve_orv(v26, __ S, p4, z2); // orv s26, p4, z2.s + __ sve_eorv(v1, __ S, p7, z7); // eorv s1, p7, z7.s + __ sve_smaxv(v30, __ H, p7, z16); // smaxv h30, p7, z16.h + __ sve_sminv(v21, __ B, p4, z28); // sminv b21, p4, z28.b + __ sve_fminv(v21, __ D, p1, z12); // fminv d21, p1, z12.d + __ sve_fmaxv(v11, __ S, p2, z10); // fmaxv s11, p2, z10.s + __ sve_fadda(v0, __ D, p1, z22); // fadda d0, p1, d0, z22.d + __ sve_uaddv(v20, __ H, p1, z3); // uaddv d20, p1, z3.h __ bind(forth); @@ -1327,30 +1298,30 @@ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x14000441, 0x94000000, - 0x97ffffd4, 0x9400043e, 0x3400000a, 0x34fffa2a, - 0x3400876a, 0x35000008, 0x35fff9c8, 0x35008708, - 0xb400000b, 0xb4fff96b, 0xb40086ab, 0xb500001d, - 0xb5fff91d, 0xb500865d, 0x10000013, 0x10fff8b3, - 0x100085f3, 0x90000013, 0x36300016, 0x3637f836, - 0x36308576, 0x3758000c, 0x375ff7cc, 0x3758850c, + 0x14000000, 0x17ffffd7, 0x14000428, 0x94000000, + 0x97ffffd4, 0x94000425, 0x3400000a, 0x34fffa2a, + 0x3400844a, 0x35000008, 0x35fff9c8, 0x350083e8, + 0xb400000b, 0xb4fff96b, 0xb400838b, 0xb500001d, + 0xb5fff91d, 0xb500833d, 0x10000013, 0x10fff8b3, + 0x100082d3, 0x90000013, 0x36300016, 0x3637f836, + 0x36308256, 0x3758000c, 0x375ff7cc, 0x375881ec, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x540082e0, 0x54000001, 0x54fff541, 0x54008281, - 0x54000002, 0x54fff4e2, 0x54008222, 0x54000002, - 0x54fff482, 0x540081c2, 0x54000003, 0x54fff423, - 0x54008163, 0x54000003, 0x54fff3c3, 0x54008103, - 0x54000004, 0x54fff364, 0x540080a4, 0x54000005, - 0x54fff305, 0x54008045, 0x54000006, 0x54fff2a6, - 0x54007fe6, 0x54000007, 0x54fff247, 0x54007f87, - 0x54000008, 0x54fff1e8, 0x54007f28, 0x54000009, - 0x54fff189, 0x54007ec9, 0x5400000a, 0x54fff12a, - 0x54007e6a, 0x5400000b, 0x54fff0cb, 0x54007e0b, - 0x5400000c, 0x54fff06c, 0x54007dac, 0x5400000d, - 0x54fff00d, 0x54007d4d, 0x5400000e, 0x54ffefae, - 0x54007cee, 0x5400000f, 0x54ffef4f, 0x54007c8f, + 0x54007fc0, 0x54000001, 0x54fff541, 0x54007f61, + 0x54000002, 0x54fff4e2, 0x54007f02, 0x54000002, + 0x54fff482, 0x54007ea2, 0x54000003, 0x54fff423, + 0x54007e43, 0x54000003, 0x54fff3c3, 0x54007de3, + 0x54000004, 0x54fff364, 0x54007d84, 0x54000005, + 0x54fff305, 0x54007d25, 0x54000006, 0x54fff2a6, + 0x54007cc6, 0x54000007, 0x54fff247, 0x54007c67, + 0x54000008, 0x54fff1e8, 0x54007c08, 0x54000009, + 0x54fff189, 0x54007ba9, 0x5400000a, 0x54fff12a, + 0x54007b4a, 0x5400000b, 0x54fff0cb, 0x54007aeb, + 0x5400000c, 0x54fff06c, 0x54007a8c, 0x5400000d, + 0x54fff00d, 0x54007a2d, 0x5400000e, 0x54ffefae, + 0x540079ce, 0x5400000f, 0x54ffef4f, 0x5400796f, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f, 0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf, @@ -1375,23 +1346,23 @@ 0xb81b1022, 0x381ea354, 0x79002fd7, 0xf85cf39a, 0xb8580309, 0x385e218c, 0x784051e1, 0x389e11d8, 0x789fa1f8, 0x79c01865, 0xb881131b, 0xfc5dd3ad, - 0xbc5d1137, 0xfc00900b, 0xbc181015, 0xf818ec7d, + 0xbc5d1136, 0xfc00900b, 0xbc181014, 0xf818ec7d, 0xb81b8c91, 0x381efc40, 0x78007c3d, 0xf857beb0, 0xb8413dd4, 0x385fddd6, 0x78409e2f, 0x389eddea, 0x789e7d94, 0x78de3d55, 0xb8805c13, 0xfc5cadc0, - 0xbc428c23, 0xfc1a2dc4, 0xbc1caf92, 0xf81475f6, + 0xbc428c23, 0xfc1a2dc4, 0xbc1caf91, 0xf81475f6, 0xb81f95d1, 0x381e757e, 0x78014561, 0xf8402436, 0xb85896e2, 0x385f4763, 0x785db4f0, 0x3880374f, 0x789e25e7, 0x78dd0563, 0xb88166f9, 0xfc529540, - 0xbc4374d3, 0xfc1166ae, 0xbc1ba6c0, 0xf820ea7b, + 0xbc4374d1, 0xfc1166ad, 0xbc1ba6c0, 0xf820ea7b, 0xb82d68c8, 0x38367a04, 0x782f4b59, 0xf878c8a4, 0xb8674a24, 0x386b78f1, 0x78776bc0, 0x38a15aca, - 0x78bedbd5, 0x78fcd94b, 0xb8aa4a7c, 0xfc6ecbbe, - 0xbc65d8a8, 0xfc2de919, 0xbc3a7b11, 0xf91f1193, + 0x78bedbd5, 0x78fcd94b, 0xb8aa4a7c, 0xfc6ecbbd, + 0xbc65d8a8, 0xfc2de918, 0xbc3a7b11, 0xf91f1193, 0xb91ed5f7, 0x391ec9bd, 0x79182ceb, 0xf95d4b0a, 0xb9581010, 0x395fc034, 0x795fb221, 0x399d8731, 0x799efb3b, 0x79dd1a2e, 0xb998e4ea, 0xfd583723, - 0xbd5ea12c, 0xfd18dc38, 0xbd1b0e83, 0x58ffdaa2, + 0xbd5ea12c, 0xfd18dc37, 0xbd1b0e83, 0x58ffdaa2, 0x1800001d, 0xf885d1c0, 0xd8ffda40, 0xf8a77820, 0xf9980220, 0x1a030301, 0x3a140311, 0x5a0d000b, 0x7a07015c, 0x9a1001e4, 0xba140182, 0xda0d01bd, @@ -1412,193 +1383,187 @@ 0x9ad521f7, 0x9adb263c, 0x9ac0286a, 0x9ac92f27, 0x9bdd7de6, 0x9b427d4f, 0x1b0b2cf1, 0x1b1ddcf7, 0x9b0b2f6e, 0x9b0cbf04, 0x9b2b728e, 0x9b2cdd6d, - 0x9bae275e, 0x9ba7954d, 0x7ea3d5fe, 0x1e30098c, - 0x1e321bff, 0x1e302ab3, 0x1e35394f, 0x7efcd542, - 0x1e7f0bc7, 0x1e621832, 0x1e632946, 0x1e673979, - 0x1f000d81, 0x1f06dfb3, 0x1f3c6c06, 0x1f2774a2, - 0x1f4d332c, 0x1f48ca78, 0x1f755356, 0x1f7e5853, - 0x1e2042c8, 0x1e20c2b3, 0x1e21424c, 0x1e21c0d5, - 0x1e22c070, 0x1e23c3a3, 0x1ee24383, 0x1e6041cf, - 0x1e60c1aa, 0x1e61424c, 0x1e61c34a, 0x1e6240e7, - 0x1e3803ae, 0x9e3802e0, 0x1e780180, 0x9e7801d7, - 0x1e2200ed, 0x9e2200ef, 0x1e620289, 0x9e620393, - 0x1e24021e, 0x9e640122, 0x1e3002b0, 0x9e70009d, - 0x1e260361, 0x9e660318, 0x1e2702ae, 0x9e6700ad, - 0x1e392180, 0x1e7e2320, 0x1e202388, 0x1e6022a8, + 0x9bae275e, 0x9ba7954d, 0x7ea3d5fd, 0x1e2f098b, + 0x1e311bde, 0x1e2f2a93, 0x1e35392f, 0x7efbd522, + 0x1e7e0ba7, 0x1e621831, 0x1e632946, 0x1e673978, + 0x1f000d61, 0x1f06db91, 0x1f3b6806, 0x1f2770a2, + 0x1f4d2f2b, 0x1f48c677, 0x1f744f35, 0x1f7d5851, + 0x1e2042a8, 0x1e20c293, 0x1e21422b, 0x1e21c0d4, + 0x1e22c06f, 0x1e23c383, 0x1ee24363, 0x1e6041ce, + 0x1e60c18a, 0x1e61422b, 0x1e61c32a, 0x1e6240e7, + 0x1e38038e, 0x9e3802c0, 0x1e780180, 0x9e7801b7, + 0x1e2200ed, 0x9e2200ee, 0x1e620288, 0x9e620391, + 0x1e24021e, 0x9e640122, 0x1e300290, 0x9e70009d, + 0x1e260341, 0x9e6602f8, 0x1e2702ae, 0x9e6700ac, + 0x1e382180, 0x1e7d2300, 0x1e202368, 0x1e6022a8, 0x293a1796, 0x29426e73, 0x697c68fc, 0xa93d0486, 0xa97b5eba, 0x29b47934, 0x29c2534d, 0x69f62dbd, 0xa9bd54bb, 0xa9c503c6, 0x28a63e13, 0x28e25d2c, 0x68c469e0, 0xa8b34748, 0xa8f51c59, 0x28264433, 0x285036c0, 0xa8005f7d, 0xa872290b, 0x0c407160, - 0x4cdfa350, 0x0cd16f56, 0x4cdf27bb, 0x0d40c0d6, - 0x4ddfcbae, 0x0dd0cd96, 0x4c408c01, 0x0cdf86aa, - 0x4d60c327, 0x0dffc929, 0x4deecd89, 0x4cd14887, - 0x0c404a37, 0x4d40e6c4, 0x4ddfe84d, 0x0dcced50, - 0x4cdf0444, 0x0ccb0286, 0x0d60e18c, 0x0dffe630, - 0x0df0eb2e, 0x0e31bab4, 0x4e31b841, 0x0e71bb17, + 0x4cdfa350, 0x0cd16f55, 0x4cdf27ba, 0x0d40c0d5, + 0x4ddfcbad, 0x0dd0cd95, 0x4c408c01, 0x0cdf86a9, + 0x4d60c327, 0x0dffc928, 0x4deecd89, 0x4cd14887, + 0x0c404a37, 0x4d40e6c3, 0x4ddfe84c, 0x0dcced4f, + 0x4cdf0444, 0x0ccb0286, 0x0d60e18b, 0x0dffe62f, + 0x0df0eb2e, 0x0e31bab4, 0x4e31b841, 0x0e71baf6, 0x4e71bbfe, 0x4eb1b9ee, 0x0e30a862, 0x4e30a8e6, - 0x0e70a883, 0x4e70a928, 0x4eb0ab59, 0x6e30f820, - 0x0e31ab9b, 0x2e31abfe, 0x4e31a8c5, 0x6e31a8c5, - 0x0e71abfe, 0x2e71a98b, 0x4e71ab59, 0x6e71a820, - 0x4eb1a81f, 0x6eb1a820, 0x6eb0fa93, 0x7e30fbdd, - 0x7e70fb7a, 0x7eb0f949, 0x7ef0fb7a, 0x0ea0c9ac, - 0x4ea0ca0f, 0x4ee0c98b, 0x2ea0c98b, 0x6ea0ca72, - 0x6ee0cb59, 0x0ea0daf6, 0x4ea0db38, 0x4ee0d820, - 0x0ea0ea51, 0x4ea0e98b, 0x4ee0e8e6, 0x2ea0dbdd, - 0x6ea0d8e6, 0x6ee0d8c5, 0x0e20b8c5, 0x4e20bad5, - 0x0e60ba93, 0x4e60ba30, 0x0ea0ba72, 0x4ea0bbfe, - 0x4ee0bb9b, 0x0ea0fbbc, 0x4ea0f841, 0x4ee0fbbc, - 0x2ea0f841, 0x6ea0fab4, 0x6ee0fbdd, 0x2ea1fa30, - 0x6ea1f9cd, 0x6ee1f96a, 0x2e205bdd, 0x6e205bdd, + 0x0e70a883, 0x4e70a907, 0x4eb0ab38, 0x6e30f820, + 0x0e31ab9b, 0x2e31abdd, 0x4e31a8c5, 0x6e31a8c5, + 0x0e71abdd, 0x2e71a98b, 0x4e71ab59, 0x6e71a820, + 0x4eb1abfe, 0x6eb1a820, 0x6eb0fa51, 0x7e30fbbc, + 0x7e70fb59, 0x7eb0f949, 0x7ef0fb59, 0x0ea0c9ac, + 0x4ea0ca0f, 0x4ee0c98b, 0x2ea0c96a, 0x6ea0ca51, + 0x6ee0cb38, 0x0ea0dad5, 0x4ea0db17, 0x4ee0d820, + 0x0ea0ea30, 0x4ea0e96a, 0x4ee0e8e6, 0x2ea0dbbc, + 0x6ea0d8e6, 0x6ee0d8c5, 0x0e20b8c5, 0x4e20bab4, + 0x0e60ba51, 0x4e60ba0f, 0x0ea0ba51, 0x4ea0bbdd, + 0x4ee0bb7a, 0x0ea0fbbc, 0x4ea0f841, 0x4ee0fb9b, + 0x2ea0f820, 0x6ea0fab4, 0x6ee0fbbc, 0x2ea1fa0f, + 0x6ea1f9ac, 0x6ee1f96a, 0x2e205bbc, 0x6e205bbc, 0x0e351e93, 0x4e381ef6, 0x0eac1d6a, 0x4ea61ca4, - 0x2e211c1f, 0x6e371ed5, 0x0e2a8528, 0x4e21841f, - 0x0e758693, 0x4e6c856a, 0x0ebe87bc, 0x4ea48462, - 0x4efb8759, 0x0e27d4c5, 0x4e25d483, 0x4e6ad528, - 0x2e3886f6, 0x6e358693, 0x2e6f85cd, 0x6e6784c5, - 0x2ebf87dd, 0x6eba8738, 0x6ef786d5, 0x0ebcd77a, - 0x4ebad738, 0x4ee5d483, 0x0e3a9f38, 0x4e3c9f7a, - 0x0e799f17, 0x4e719e0f, 0x0eb79ed5, 0x4ea59c83, - 0x2ebad738, 0x6eaad528, 0x6efbd759, 0x2e36d6b4, - 0x6e32d630, 0x6e73d651, 0x2e24dc62, 0x6e23dc41, + 0x2e201ffe, 0x6e361eb4, 0x0e2a8528, 0x4e2087fe, + 0x0e738651, 0x4e6c856a, 0x0ebd879b, 0x4ea48462, + 0x4efa8738, 0x0e26d4a4, 0x4e25d483, 0x4e6ad528, + 0x2e3886f6, 0x6e338651, 0x2e6f85cd, 0x6e6684a4, + 0x2ebe87bc, 0x6eb98717, 0x6ef786d5, 0x0ebbd759, + 0x4ebad738, 0x4ee5d483, 0x0e399f17, 0x4e3c9f7a, + 0x0e799f17, 0x4e709dee, 0x0eb79ed5, 0x4ea59c83, + 0x2eb9d717, 0x6eaad528, 0x6efad738, 0x2e35d693, + 0x6e31d60f, 0x6e72d630, 0x2e24dc62, 0x6e23dc41, 0x6e62dc20, 0x0e7a9738, 0x4e6694a4, 0x0ea59483, - 0x4eae95ac, 0x0e21cc1f, 0x4e3ecfbc, 0x4e6ccd6a, - 0x2e7c977a, 0x6e649462, 0x2eae95ac, 0x6eb49672, - 0x0ea1cc1f, 0x4ea3cc41, 0x4eefcdcd, 0x2e3fffdd, - 0x6e22fc20, 0x6e75fe93, 0x0e2e65ac, 0x4e336651, - 0x0e7866f6, 0x4e6f65cd, 0x0ebe67bc, 0x4ea067fe, - 0x0e21a41f, 0x4e23a441, 0x0e7ca77a, 0x4e7ea7bc, - 0x0ea6a4a4, 0x4ea0a7fe, 0x0e26f4a4, 0x4e28f4e6, - 0x4e60f7fe, 0x0e3c6f7a, 0x4e346e72, 0x0e6b6d49, - 0x4e6a6d28, 0x0eae6dac, 0x4ea26c20, 0x0e36aeb4, - 0x4e23ac41, 0x0e7aaf38, 0x4e64ac62, 0x0ea2ac20, - 0x4eabad49, 0x0ebaf738, 0x4ebcf77a, 0x4ef2f630, - 0x2ea0effe, 0x6ea5ec83, 0x6eeced6a, 0x0fa710c5, - 0x4f8b8149, 0x4fc710c5, 0x0f8750c5, 0x4faa8128, - 0x4fc750c5, 0x2f8890e6, 0x4fa880e6, 0x6fc59083, - 0x0f6f81cd, 0x4f448862, 0x0f848062, 0x4fab8149, - 0x0e3736d5, 0x4e323630, 0x0e743672, 0x4e6d358b, - 0x0eb736d5, 0x4eb93717, 0x4eee35ac, 0x0e3c3f7a, - 0x4e393f17, 0x0e7e3fbc, 0x4e703dee, 0x0ead3d8b, - 0x4eba3f38, 0x4ee33c41, 0x2e2e8dac, 0x6e218c1f, - 0x2e6c8d6a, 0x6e728e30, 0x2ea98d07, 0x6ea48c62, - 0x6ee58c83, 0x2e2f35cd, 0x6e353693, 0x2e733651, - 0x6e723630, 0x2ea53483, 0x6ea33441, 0x6eed358b, - 0x2e203ffe, 0x6e273cc5, 0x2e6a3d28, 0x6e713e0f, - 0x2ebf3fdd, 0x6ea03ffe, 0x6ee23c20, 0x0e36e6b4, - 0x4e29e507, 0x4e76e6b4, 0x2eb9e717, 0x6ebee7bc, - 0x6ef7e6d5, 0x2e3de79b, 0x6e3be759, 0x6e67e4c5, - 0x65d23ee0, 0x65903d92, 0x65d03fa7, 0x65912fe9, - 0x65d13bf9, 0x65932a0a, 0x25cb90c4, 0x25040bde, - 0x25c11085, 0x25c62c6b, 0x259f2279, 0x259d8993, - 0x24e5102b, 0x24ad5458, 0x24ec7ab5, 0x24387c6d, - 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, - 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, - 0x88267fff, 0x4e010fe0, 0x5e040420, 0x4e081fe1, - 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4e042c20, - 0x4e062c20, 0x4e052c20, 0x4e083c20, 0x0e0c3c20, - 0x0e0a3c20, 0x0e073c20, 0x9eae0020, 0x0f03f409, - 0x6f03f40e, 0x4cc0ac3f, 0x0ea1b820, 0x4e21c862, - 0x4e61b8a4, 0x05a08020, 0x05104fe0, 0x05505001, - 0x05906fe2, 0x05d03005, 0x05101fea, 0x05901feb, - 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35, - 0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160, - 0x0461943e, 0x04a19020, 0x04038100, 0x040381a0, - 0x040387e1, 0x04438be2, 0x04c38fe3, 0x040181e0, - 0x04018100, 0x04018621, 0x04418b22, 0x04418822, - 0x04818c23, 0x040081e0, 0x04008120, 0x04008761, - 0x04008621, 0x04408822, 0x04808c23, 0x042053ff, - 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001, - 0x25b8efe2, 0x25f8f007, 0x2538dfea, 0x25b8dfeb, - 0xa400a3e0, 0xa420a7e0, 0xa4484be0, 0xa467afe0, - 0xa4a8a7ea, 0xa547a814, 0xa4084ffe, 0xa55c53e0, - 0xa5e1540b, 0xe400fbf6, 0xe408ffff, 0xe420e7e0, - 0xe4484be0, 0xe460efe0, 0xe547e400, 0xe4014be0, - 0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff, - 0xe59f5d08, 0x0420e3e9, 0x0460e3ea, 0x04a0e3eb, - 0x04e0e3ec, 0x25104042, 0x25104871, 0x25904861, - 0x25904c92, 0x05344020, 0x05744041, 0x05b44062, - 0x05f44083, 0x252c8840, 0x253c1420, 0x25681572, - 0x25a21ce3, 0x25ea1e34, 0x253c0421, 0x25680572, - 0x25a20ce3, 0x25ea0e34, 0x0522c020, 0x05e6c0a4, - 0x2401a001, 0x2443a051, 0x24858881, 0x24c78cd1, - 0x24850891, 0x24c70cc1, 0x250f9001, 0x25508051, - 0x25802491, 0x25df28c1, 0x25850c81, 0x251e10d1, - 0x65816001, 0x65c36051, 0x65854891, 0x65c74cc1, - 0x05733820, 0x05b238a4, 0x05f138e6, 0x0570396a, - 0x65d0a001, 0x65d6a443, 0x65d4a826, 0x6594ac26, - 0x6554ac26, 0x6556ac26, 0x6552ac26, 0x65cbac85, - 0x65caac01, 0x6589ac85, 0x6588ac01, 0x65c9ac85, - 0x65c8ac01, 0x65dea833, 0x659ca509, 0x65d8a801, - 0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601, - 0x052281e0, 0x05238601, 0x04a14026, 0x042244a6, - 0x046344a6, 0x04a444a6, 0x04e544a7, 0x0568aca7, - 0x05b23230, 0x853040af, 0xc5b040af, 0xe57080af, - 0xe5b080af, 0x25034440, 0x254054c4, 0x25034640, - 0x25415a05, 0x25834440, 0x25c54489, 0x250b5d3a, - 0x2550dc20, 0x2518e3e1, 0x2518e021, 0x2518e0a1, - 0x2518e121, 0x2518e1a1, 0x2558e3e2, 0x2558e042, - 0x2558e0c2, 0x2558e142, 0x2598e3e3, 0x2598e063, - 0x2598e0e3, 0x2598e163, 0x25d8e3e4, 0x25d8e084, - 0x25d8e104, 0x25d8e184, 0x2518e407, 0x05214800, - 0x05614800, 0x05a14800, 0x05e14800, 0x05214c00, - 0x05614c00, 0x05a14c00, 0x05e14c00, 0x05304001, - 0x05314001, 0x05a18610, 0x05e18610, 0x05271e11, - 0x6545e891, 0x6585e891, 0x65c5e891, 0x6545c891, - 0x6585c891, 0x65c5c891, 0x45b0c210, 0x45f1c231, - 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, - 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, - 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, - 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, - 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, - 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, - 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, - 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, - 0xf8338131, 0xf83c01fb, 0xf82712f5, 0xf83f2059, - 0xf83f31fb, 0xf82a5277, 0xf8234010, 0xf83972fa, - 0xf8226190, 0xf8a483dc, 0xf8bd0370, 0xf8a613a9, - 0xf8b02087, 0xf8a7312f, 0xf8b75048, 0xf8bc43f5, - 0xf8a5701b, 0xf8b1608f, 0xf8fa8388, 0xf8f6037b, - 0xf8f91017, 0xf8e421e6, 0xf8e031e4, 0xf8e150ea, - 0xf8e5438a, 0xf8e772f4, 0xf8f56166, 0xf86883f1, - 0xf8660051, 0xf86c13be, 0xf86322db, 0xf87d31ae, - 0xf87c5311, 0xf86541c2, 0xf86a7170, 0xf87b6197, - 0xb8248236, 0xb8240261, 0xb83011b0, 0xb82e204c, - 0xb83132a3, 0xb83750c5, 0xb82741b3, 0xb83c7211, - 0xb82663a2, 0xb8a380c4, 0xb8b001b4, 0xb8ac1114, - 0xb8b92274, 0xb8a0330b, 0xb8a653f4, 0xb8ae40d0, - 0xb8a071e7, 0xb8b3613a, 0xb8ea82b7, 0xb8f6005c, - 0xb8e3126f, 0xb8f42087, 0xb8fd3007, 0xb8e95290, - 0xb8f74204, 0xb8ea7177, 0xb8f963e6, 0xb87082ed, - 0xb86c01c1, 0xb8691215, 0xb87a208f, 0xb8643110, - 0xb866509e, 0xb87d43b1, 0xb87a71e9, 0xb86263ab, - 0xce216ce3, 0xce0e2255, 0xce798ed2, 0xce959685, - 0xce7e8217, 0xce608694, 0xcec08264, 0xce748898, - 0x25e0da44, 0x2521c8f3, 0x05801548, 0x0540cbdf, - 0x05006521, 0x2560c7a0, 0x25a1c498, 0x058026bb, - 0x05407dd8, 0x0500f3d6, 0x2560ce3d, 0x2521d4b4, - 0x05803cbc, 0x05404d6c, 0x05001b89, 0x25a0c532, - 0x2521cc40, 0x05800c08, 0x054074c4, 0x050034a0, - 0x2520c9e3, 0x25e1ca93, 0x05803e98, 0x05425238, - 0x050024cb, 0x25a0ce7f, 0x25e1d0c3, 0x05802676, - 0x05401e63, 0x05002d49, 0x04e20080, 0x04ab04ce, - 0x659e022e, 0x65970863, 0x659c0703, 0x04d6b4f3, - 0x04400cb5, 0x049a06da, 0x04508071, 0x045b0d14, - 0x0459b22e, 0x04daba4d, 0x04590a13, 0x0493979b, - 0x04d188a8, 0x0450081c, 0x0417b6b9, 0x041eb743, - 0x04981e7a, 0x05e78dc1, 0x0564824e, 0x048816ff, - 0x040a0d1e, 0x04810ee0, 0x04dcb340, 0x65c08ed8, - 0x65cd8162, 0x65c6970c, 0x65c79e29, 0x65c29494, - 0x04ddbecd, 0x65c2ba5f, 0x65c0a9af, 0x6581a434, - 0x658da0ee, 0x65c1908c, 0x65be806f, 0x65ff0694, - 0x65ee2d2d, 0x65a3af81, 0x65a9cb3a, 0x65e1e9da, - 0x65f447ba, 0x65e17da6, 0x0401482b, 0x040279fb, - 0x0439323e, 0x04a33302, 0x046331bd, 0x04fc320e, - 0x05bb6964, 0x05e16e02, 0x65c897e7, 0x4596b150, - 0x4516b4fd, 0x0438396c, 0x041a280b, 0x04183697, - 0x04192de3, 0x04083b7e, 0x04ca3955, 0x65873883, - 0x658622a6, 0x65d83bd9, 0x0441303f, 0x0e2e11ac, - 0x4e2013fe, 0x0e6f11cd, 0x4e6a1128, 0x0ebb1359, - 0x4ebf13dd, 0x2e231041, 0x6e21101f, 0x2e791317, - 0x6e61101f, 0x2eb612b4, 0x6ea21020, + 0x4ead958b, 0x0e20cffe, 0x4e3dcf9b, 0x4e6bcd49, + 0x2e7b9759, 0x6e649462, 0x2eae95ac, 0x6eb39651, + 0x0ea0cffe, 0x4ea3cc41, 0x4eeecdac, 0x2e3effbc, + 0x6e22fc20, 0x6e73fe51, 0x0e2e65ac, 0x4e336651, + 0x0e7766d5, 0x4e6e65ac, 0x0ebd679b, 0x4ebf67dd, + 0x0e20a7fe, 0x4e23a441, 0x0e7ba759, 0x4e7da79b, + 0x0ea6a4a4, 0x4ebfa7dd, 0x0e25f483, 0x4e28f4e6, + 0x4e7ff7dd, 0x0e3b6f59, 0x4e336e51, 0x0e6a6d28, + 0x4e696d07, 0x0eae6dac, 0x4ea26c20, 0x0e35ae93, + 0x4e23ac41, 0x0e79af17, 0x4e64ac62, 0x0ea2ac20, + 0x4eaaad28, 0x0eb9f717, 0x4ebbf759, 0x4ef1f60f, + 0x2ebfefdd, 0x6ea5ec83, 0x6eeced6a, 0x0e3836f6, + 0x4e2c356a, 0x0e6634a4, 0x4e733651, 0x0ea33441, + 0x4ead358b, 0x4ee93507, 0x0e2c3d6a, 0x4e313e0f, + 0x0e723e30, 0x4e643c62, 0x0eab3d49, 0x4ead3d8b, + 0x4eee3dac, 0x2e308dee, 0x6e2f8dcd, 0x2e648c62, + 0x6e688ce6, 0x2eb58e93, 0x6ebb8f59, 0x6ef18e0f, + 0x2e2634a4, 0x6e243462, 0x2e6634a4, 0x6e6d358b, + 0x2eb33651, 0x6eb636b4, 0x6ef23630, 0x2e333e51, + 0x6e2c3d6a, 0x2e763eb4, 0x6e783ef6, 0x2eae3dac, + 0x6ebb3f59, 0x6ef93f17, 0x0e3ee7bc, 0x4e30e5ee, + 0x4e6ce56a, 0x2ebae738, 0x6ea3e441, 0x6eede58b, + 0x2e20e7fe, 0x6e2ce56a, 0x6e71e60f, 0x65922c43, + 0x65d02219, 0x65d02560, 0x65d13dc4, 0x65913690, + 0x65d33b6b, 0x2500948c, 0x254c08bf, 0x25831f87, + 0x254f30af, 0x259c3359, 0x25019d35, 0x24eac76d, + 0x2431993a, 0x242f7ed8, 0x24a2f62b, 0xba5fd3e3, + 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff, + 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff, + 0x4e010fe0, 0x5e040420, 0x4e081fe1, 0x4e0c1fe1, + 0x4e0a1fe1, 0x4e071fe1, 0x4e042c20, 0x4e062c20, + 0x4e052c20, 0x4e083c20, 0x0e0c3c20, 0x0e0a3c20, + 0x0e073c20, 0x9eae0020, 0x0f03f409, 0x6f03f40e, + 0x4cc0ac3f, 0x0ea1b820, 0x4e21c862, 0x4e61b8a4, + 0x05a08020, 0x05104fe0, 0x05505001, 0x05906fe2, + 0x05d03005, 0x05101fea, 0x05901feb, 0x04b0e3e0, + 0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x047f9c20, + 0x04ff9c20, 0x04299420, 0x04319160, 0x0461943e, + 0x04a19020, 0x04038100, 0x040381a0, 0x040387e1, + 0x04438be2, 0x04c38fe3, 0x040181e0, 0x04018100, + 0x04018621, 0x04418b22, 0x04418822, 0x04818c23, + 0x040081e0, 0x04008120, 0x04008761, 0x04008621, + 0x04408822, 0x04808c23, 0x042053ff, 0x047f5401, + 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2, + 0x25f8f007, 0x2538dfea, 0x25b8dfeb, 0xa400a3e0, + 0xa420a7e0, 0xa4484be0, 0xa467afe0, 0xa4a8a7ea, + 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, + 0xe400fbf6, 0xe408ffff, 0xe420e7e0, 0xe4484be0, + 0xe460efe0, 0xe547e400, 0xe4014be0, 0xe4a84fe0, + 0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08, + 0x0420e3e9, 0x0460e3ea, 0x04a0e3eb, 0x04e0e3ec, + 0x25104042, 0x25104871, 0x25904861, 0x25904c92, + 0x05344020, 0x05744041, 0x05b44062, 0x05f44083, + 0x252c8840, 0x253c1420, 0x25681572, 0x25a21ce3, + 0x25ea1e34, 0x253c0421, 0x25680572, 0x25a20ce3, + 0x25ea0e34, 0x0522c020, 0x05e6c0a4, 0x2401a001, + 0x2443a051, 0x24858881, 0x24c78cd1, 0x24850891, + 0x24c70cc1, 0x250f9001, 0x25508051, 0x25802491, + 0x25df28c1, 0x25850c81, 0x251e10d1, 0x65816001, + 0x65c36051, 0x65854891, 0x65c74cc1, 0x05733820, + 0x05b238a4, 0x05f138e6, 0x0570396a, 0x65d0a001, + 0x65d6a443, 0x65d4a826, 0x6594ac26, 0x6554ac26, + 0x6556ac26, 0x6552ac26, 0x65cbac85, 0x65caac01, + 0x6589ac85, 0x6588ac01, 0x65c9ac85, 0x65c8ac01, + 0x65dea833, 0x659ca509, 0x65d8a801, 0x65dcac01, + 0x655cb241, 0x0520a1e0, 0x0521a601, 0x052281e0, + 0x05238601, 0x04a14026, 0x042244a6, 0x046344a6, + 0x04a444a6, 0x04e544a7, 0x0568aca7, 0x05b23230, + 0x853040af, 0xc5b040af, 0xe57080af, 0xe5b080af, + 0x25034440, 0x254054c4, 0x25034640, 0x25415a05, + 0x25834440, 0x25c54489, 0x250b5d3a, 0x2550dc20, + 0x2518e3e1, 0x2518e021, 0x2518e0a1, 0x2518e121, + 0x2518e1a1, 0x2558e3e2, 0x2558e042, 0x2558e0c2, + 0x2558e142, 0x2598e3e3, 0x2598e063, 0x2598e0e3, + 0x2598e163, 0x25d8e3e4, 0x25d8e084, 0x25d8e104, + 0x25d8e184, 0x2518e407, 0x05214800, 0x05614800, + 0x05a14800, 0x05e14800, 0x05214c00, 0x05614c00, + 0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001, + 0x05a18610, 0x05e18610, 0x05271e11, 0x6545e891, + 0x6585e891, 0x65c5e891, 0x6545c891, 0x6585c891, + 0x65c5c891, 0x45b0c210, 0x45f1c231, 0x1e601000, + 0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000, + 0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000, + 0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, + 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000, + 0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000, + 0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000, + 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, + 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf82f8186, + 0xf83001ab, 0xf83713c1, 0xf8332225, 0xf82232d0, + 0xf82d52aa, 0xf83d419b, 0xf83b7023, 0xf83f6278, + 0xf8b18389, 0xf8bb00ef, 0xf8b513f7, 0xf8b923e2, + 0xf8bb3150, 0xf8b75073, 0xf8b04320, 0xf8ba7057, + 0xf8b0608c, 0xf8fc83be, 0xf8f000db, 0xf8e911fd, + 0xf8e720e4, 0xf8ef32e9, 0xf8e85382, 0xf8f540bf, + 0xf8fb7220, 0xf8ef6344, 0xf86882dc, 0xf87b033b, + 0xf8771080, 0xf8662010, 0xf864302f, 0xf86a50a7, + 0xf86a40fc, 0xf87472b7, 0xf866610b, 0xb83180df, + 0xb8310182, 0xb83e107d, 0xb83b23b6, 0xb82e338d, + 0xb83150b8, 0xb822414e, 0xb830736b, 0xb837608c, + 0xb8b68091, 0xb8a10213, 0xb8b011cd, 0xb8ac2222, + 0xb8a332f5, 0xb8a550e6, 0xb8b3438d, 0xb8b170d0, + 0xb8a2607d, 0xb8e481e6, 0xb8f4018d, 0xb8f41328, + 0xb8f42013, 0xb8eb30d8, 0xb8f451df, 0xb8f04006, + 0xb8e7726f, 0xb8fa6149, 0xb87782d5, 0xb87c0062, + 0xb86f1293, 0xb86723a4, 0xb8673120, 0xb87052f4, + 0xb8644150, 0xb877732b, 0xb866621f, 0xce2b06cd, + 0xce14410d, 0xce648df9, 0xce883624, 0xce7c809d, + 0xce7a87b0, 0xcec081c9, 0xce7c8962, 0x2520c143, + 0x25a1d2da, 0x058015ce, 0x05400ed8, 0x0500bb31, + 0x25a0c074, 0x25a1d884, 0x05804944, 0x0540b1d9, + 0x05001548, 0x25a0c49e, 0x2521cabe, 0x058054b3, + 0x0543ab47, 0x050026bb, 0x2560d097, 0x25a1d6fe, + 0x058394b4, 0x0540266d, 0x05003cbc, 0x25a0c1ab, + 0x2561d3f8, 0x05800acd, 0x05403684, 0x05000c07, + 0x2560de64, 0x2521cac5, 0x0583c8b5, 0x05405089, + 0x05003e98, 0x04ad0397, 0x04ac074a, 0x658e023e, + 0x65d50a1d, 0x65820667, 0x0496b13a, 0x04001411, + 0x041a19c2, 0x049095cb, 0x041b0c7d, 0x04d9a876, + 0x049aa27b, 0x04591aa7, 0x04138b25, 0x04119235, + 0x04500a63, 0x0497adc7, 0x04dea9b1, 0x04581e31, + 0x05a78f4f, 0x056494fb, 0x04481f65, 0x048a0f00, + 0x04810074, 0x04dca739, 0x65809031, 0x658d9dae, + 0x65c683d1, 0x658797b6, 0x65828008, 0x04ddb417, + 0x6582baf9, 0x6580b435, 0x65c1b56a, 0x65cdb917, + 0x65c19671, 0x65fe95a4, 0x65f11f3e, 0x65fc298e, + 0x65ada1a5, 0x65b3c967, 0x65e3ec59, 0x65f454a0, + 0x65a86dbc, 0x041b41dd, 0x04587903, 0x042a3321, + 0x04b93281, 0x0470327c, 0x04e131bb, 0x0521692b, + 0x057a6f61, 0x65c887a2, 0x45c2b058, 0x455cb723, + 0x043639a3, 0x045a309b, 0x0498305a, 0x04993ce1, + 0x04483e1e, 0x040a3395, 0x65c72595, 0x6586294b, + 0x65d826c0, 0x04412474, }; // END Generated code -- do not edit