mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8312425: [vectorapi] AArch64: Optimize vector math operations with SLEEF
Co-authored-by: Xiaohong Gong <xgong@openjdk.org> Reviewed-by: ihse, fgao, aph
This commit is contained in:
parent
e7f0bf11ff
commit
df08a9ec0d
@ -912,6 +912,36 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP],
|
||||
IF_FALSE: [$2FDLIBM_CFLAGS=""])
|
||||
fi
|
||||
AC_SUBST($2FDLIBM_CFLAGS)
|
||||
|
||||
# Check whether the compiler supports the Arm C Language Extensions (ACLE)
|
||||
# for SVE. Set SVE_CFLAGS to -march=armv8-a+sve if it does.
|
||||
# ACLE and this flag are required to build the aarch64 SVE related functions in
|
||||
# libvectormath.
|
||||
if test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
|
||||
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
|
||||
AC_LANG_PUSH(C)
|
||||
OLD_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -march=armv8-a+sve"
|
||||
AC_MSG_CHECKING([if Arm SVE ACLE is supported])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <arm_sve.h>],
|
||||
[
|
||||
svint32_t r = svdup_n_s32(1);
|
||||
return 0;
|
||||
])],
|
||||
[
|
||||
AC_MSG_RESULT([yes])
|
||||
$2SVE_CFLAGS="-march=armv8-a+sve"
|
||||
],
|
||||
[
|
||||
AC_MSG_RESULT([no])
|
||||
$2SVE_CFLAGS=""
|
||||
]
|
||||
)
|
||||
CFLAGS="$OLD_CFLAGS"
|
||||
AC_LANG_POP(C)
|
||||
fi
|
||||
fi
|
||||
AC_SUBST($2SVE_CFLAGS)
|
||||
])
|
||||
|
||||
# FLAGS_SETUP_GCC6_COMPILER_FLAGS([PREFIX])
|
||||
|
||||
@ -827,6 +827,9 @@ OS_VERSION_MAJOR := @OS_VERSION_MAJOR@
|
||||
OS_VERSION_MINOR := @OS_VERSION_MINOR@
|
||||
OS_VERSION_MICRO := @OS_VERSION_MICRO@
|
||||
|
||||
# Arm SVE
|
||||
SVE_CFLAGS := @SVE_CFLAGS@
|
||||
|
||||
# Images directory definitions
|
||||
JDK_IMAGE_SUBDIR := jdk
|
||||
JRE_IMAGE_SUBDIR := jre
|
||||
|
||||
@ -55,3 +55,17 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2
|
||||
|
||||
TARGETS += $(BUILD_LIBSLEEF)
|
||||
endif
|
||||
|
||||
ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, aarch64)+$(INCLUDE_COMPILER2), true+true+true)
|
||||
$(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
|
||||
NAME := sleef, \
|
||||
OPTIMIZATION := HIGH, \
|
||||
SRC := libsleef/lib, \
|
||||
EXTRA_SRC := libsleef/generated, \
|
||||
DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
|
||||
DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
|
||||
CFLAGS := $(SVE_CFLAGS), \
|
||||
))
|
||||
|
||||
TARGETS += $(BUILD_LIBSLEEF)
|
||||
endif
|
||||
|
||||
@ -2307,14 +2307,18 @@ const RegMask* Matcher::predicate_reg_mask(void) {
|
||||
return &_PR_REG_mask;
|
||||
}
|
||||
|
||||
// Vector calling convention not yet implemented.
|
||||
bool Matcher::supports_vector_calling_convention(void) {
|
||||
return false;
|
||||
return EnableVectorSupport && UseVectorStubs;
|
||||
}
|
||||
|
||||
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
|
||||
Unimplemented();
|
||||
return OptoRegPair(0, 0);
|
||||
assert(EnableVectorSupport && UseVectorStubs, "sanity");
|
||||
int lo = V0_num;
|
||||
int hi = V0_H_num;
|
||||
if (ideal_reg == Op_VecX || ideal_reg == Op_VecA) {
|
||||
hi = V0_K_num;
|
||||
}
|
||||
return OptoRegPair(hi, lo);
|
||||
}
|
||||
|
||||
// Is this branch offset short enough that a short branch can be used?
|
||||
@ -16109,6 +16113,22 @@ instruct CallLeafDirect(method meth)
|
||||
ins_pipe(pipe_class_call);
|
||||
%}
|
||||
|
||||
// Call Runtime Instruction without safepoint and with vector arguments
|
||||
instruct CallLeafDirectVector(method meth)
|
||||
%{
|
||||
match(CallLeafVector);
|
||||
|
||||
effect(USE meth);
|
||||
|
||||
ins_cost(CALL_COST);
|
||||
|
||||
format %{ "CALL, runtime leaf vector $meth" %}
|
||||
|
||||
ins_encode(aarch64_enc_java_to_runtime(meth));
|
||||
|
||||
ins_pipe(pipe_class_call);
|
||||
%}
|
||||
|
||||
// Call Runtime Instruction
|
||||
|
||||
instruct CallLeafNoFPDirect(method meth)
|
||||
|
||||
@ -871,7 +871,20 @@ static int c_calling_convention_priv(const BasicType *sig_bt,
|
||||
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
|
||||
uint num_bits,
|
||||
uint total_args_passed) {
|
||||
Unimplemented();
|
||||
// More than 8 argument inputs are not supported now.
|
||||
assert(total_args_passed <= Argument::n_float_register_parameters_c, "unsupported");
|
||||
assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
|
||||
|
||||
static const FloatRegister VEC_ArgReg[Argument::n_float_register_parameters_c] = {
|
||||
v0, v1, v2, v3, v4, v5, v6, v7
|
||||
};
|
||||
|
||||
// On SVE, we use the same vector registers with 128-bit vector registers on NEON.
|
||||
int next_reg_val = num_bits == 64 ? 1 : 3;
|
||||
for (uint i = 0; i < total_args_passed; i++) {
|
||||
VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
|
||||
regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "prims/methodHandles.hpp"
|
||||
#include "prims/upcallLinker.hpp"
|
||||
#include "runtime/arguments.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "runtime/continuation.hpp"
|
||||
#include "runtime/continuationEntry.inline.hpp"
|
||||
@ -8176,6 +8177,78 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// }
|
||||
};
|
||||
|
||||
void generate_vector_math_stubs() {
|
||||
// Get native vector math stub routine addresses
|
||||
void* libsleef = nullptr;
|
||||
char ebuf[1024];
|
||||
char dll_name[JVM_MAXPATHLEN];
|
||||
if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
|
||||
libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
|
||||
}
|
||||
if (libsleef == nullptr) {
|
||||
log_info(library)("Failed to load native vector math library, %s!", ebuf);
|
||||
return;
|
||||
}
|
||||
// Method naming convention
|
||||
// All the methods are named as <OP><T><N>_<U><suffix>
|
||||
// Where:
|
||||
// <OP> is the operation name, e.g. sin
|
||||
// <T> is optional to indicate float/double
|
||||
// "f/d" for vector float/double operation
|
||||
// <N> is the number of elements in the vector
|
||||
// "2/4" for neon, and "x" for sve
|
||||
// <U> is the precision level
|
||||
// "u10/u05" represents 1.0/0.5 ULP error bounds
|
||||
// We use "u10" for all operations by default
|
||||
// But for those functions do not have u10 support, we use "u05" instead
|
||||
// <suffix> indicates neon/sve
|
||||
// "sve/advsimd" for sve/neon implementations
|
||||
// e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions
|
||||
// cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions
|
||||
//
|
||||
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
|
||||
|
||||
// Math vector stubs implemented with SVE for scalable vector size.
|
||||
if (UseSVE > 0) {
|
||||
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
|
||||
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
|
||||
// Skip "tanh" because there is performance regression
|
||||
if (vop == VectorSupport::VECTOR_OP_TANH) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The native library does not support u10 level of "hypot".
|
||||
const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
|
||||
|
||||
snprintf(ebuf, sizeof(ebuf), "%sfx_%ssve", VectorSupport::mathname[op], ulf);
|
||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||
|
||||
snprintf(ebuf, sizeof(ebuf), "%sdx_%ssve", VectorSupport::mathname[op], ulf);
|
||||
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||
}
|
||||
}
|
||||
|
||||
// Math vector stubs implemented with NEON for 64/128 bits vector size.
|
||||
for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
|
||||
int vop = VectorSupport::VECTOR_OP_MATH_START + op;
|
||||
// Skip "tanh" because there is performance regression
|
||||
if (vop == VectorSupport::VECTOR_OP_TANH) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The native library does not support u10 level of "hypot".
|
||||
const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
|
||||
|
||||
snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf);
|
||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||
|
||||
snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf);
|
||||
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||
|
||||
snprintf(ebuf, sizeof(ebuf), "%sd2_%sadvsimd", VectorSupport::mathname[op], ulf);
|
||||
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialization
|
||||
void generate_initial_stubs() {
|
||||
@ -8329,6 +8402,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// because it's faster for the sizes of modulus we care about.
|
||||
StubRoutines::_montgomerySquare = g.generate_multiply();
|
||||
}
|
||||
|
||||
generate_vector_math_stubs();
|
||||
|
||||
#endif // COMPILER2
|
||||
|
||||
if (UseChaCha20Intrinsics) {
|
||||
@ -8384,6 +8460,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (UseAdler32Intrinsics) {
|
||||
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
|
||||
}
|
||||
|
||||
#endif // COMPILER2_OR_JVMCI
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "../generated/misc.h"
|
||||
#include "../generated/sleefinline_advsimd.h"
|
||||
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#define DEFINE_VECTOR_MATH_UNARY(op, type) \
|
||||
JNIEXPORT \
|
||||
type op##advsimd(type input) { \
|
||||
return Sleef_##op##advsimd(input); \
|
||||
}
|
||||
|
||||
#define DEFINE_VECTOR_MATH_BINARY(op, type) \
|
||||
JNIEXPORT \
|
||||
type op##advsimd(type input1, type input2) { \
|
||||
return Sleef_##op##advsimd(input1, input2); \
|
||||
}
|
||||
|
||||
DEFINE_VECTOR_MATH_UNARY(tanf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(tanhf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(sinf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(sinhf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(cosf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(coshf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(asinf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(acosf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(atanf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(cbrtf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(logf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(log10f4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(log1pf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(expf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(expm1f4_u10, float32x4_t)
|
||||
|
||||
DEFINE_VECTOR_MATH_UNARY(tand2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(tanhd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(sind2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(sinhd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(cosd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(coshd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(asind2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(acosd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(atand2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(cbrtd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(logd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(log10d2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(log1pd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(expd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_UNARY(expm1d2_u10, float64x2_t)
|
||||
|
||||
DEFINE_VECTOR_MATH_BINARY(atan2f4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_BINARY(powf4_u10, float32x4_t)
|
||||
DEFINE_VECTOR_MATH_BINARY(hypotf4_u05, float32x4_t)
|
||||
|
||||
DEFINE_VECTOR_MATH_BINARY(atan2d2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_BINARY(powd2_u10, float64x2_t)
|
||||
DEFINE_VECTOR_MATH_BINARY(hypotd2_u05, float64x2_t)
|
||||
|
||||
#undef DEFINE_VECTOR_MATH_UNARY
|
||||
|
||||
#undef DEFINE_VECTOR_MATH_BINARY
|
||||
|
||||
#endif // defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <arm_sve.h>
|
||||
|
||||
#include "../generated/misc.h"
|
||||
#include "../generated/sleefinline_sve.h"
|
||||
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#define DEFINE_VECTOR_MATH_UNARY_SVE(op, type) \
|
||||
JNIEXPORT \
|
||||
type op##sve(type input) { \
|
||||
return Sleef_##op##sve(input); \
|
||||
}
|
||||
|
||||
#define DEFINE_VECTOR_MATH_BINARY_SVE(op, type) \
|
||||
JNIEXPORT \
|
||||
type op##sve(type input1, type input2) { \
|
||||
return Sleef_##op##sve(input1, input2); \
|
||||
}
|
||||
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(tanfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(sinfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(sinhfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(cosfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(coshfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(asinfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(acosfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(atanfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(cbrtfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(logfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(log10fx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(log1pfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(expfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(expm1fx_u10, svfloat32_t)
|
||||
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(tandx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(sindx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(sinhdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(cosdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(coshdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(asindx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(acosdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(atandx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(cbrtdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(logdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(log10dx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(log1pdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(expdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_UNARY_SVE(expm1dx_u10, svfloat64_t)
|
||||
|
||||
DEFINE_VECTOR_MATH_BINARY_SVE(atan2fx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_BINARY_SVE(powfx_u10, svfloat32_t)
|
||||
DEFINE_VECTOR_MATH_BINARY_SVE(hypotfx_u05, svfloat32_t)
|
||||
|
||||
DEFINE_VECTOR_MATH_BINARY_SVE(atan2dx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_BINARY_SVE(powdx_u10, svfloat64_t)
|
||||
DEFINE_VECTOR_MATH_BINARY_SVE(hypotdx_u05, svfloat64_t)
|
||||
|
||||
#undef DEFINE_VECTOR_MATH_UNARY_SVE
|
||||
|
||||
#undef DEFINE_VECTOR_MATH_BINARY_SVE
|
||||
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
Loading…
x
Reference in New Issue
Block a user