diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4 index bd8f2ec9a7f..57654514eb6 100644 --- a/make/autoconf/flags-cflags.m4 +++ b/make/autoconf/flags-cflags.m4 @@ -912,6 +912,36 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP], IF_FALSE: [$2FDLIBM_CFLAGS=""]) fi AC_SUBST($2FDLIBM_CFLAGS) + + # Check whether the compiler supports the Arm C Language Extensions (ACLE) + # for SVE. Set SVE_CFLAGS to -march=armv8-a+sve if it does. + # ACLE and this flag are required to build the aarch64 SVE related functions in + # libvectormath. + if test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then + if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then + AC_LANG_PUSH(C) + OLD_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -march=armv8-a+sve" + AC_MSG_CHECKING([if Arm SVE ACLE is supported]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ], + [ + svint32_t r = svdup_n_s32(1); + return 0; + ])], + [ + AC_MSG_RESULT([yes]) + $2SVE_CFLAGS="-march=armv8-a+sve" + ], + [ + AC_MSG_RESULT([no]) + $2SVE_CFLAGS="" + ] + ) + CFLAGS="$OLD_CFLAGS" + AC_LANG_POP(C) + fi + fi + AC_SUBST($2SVE_CFLAGS) ]) # FLAGS_SETUP_GCC6_COMPILER_FLAGS([PREFIX]) diff --git a/make/autoconf/spec.gmk.template b/make/autoconf/spec.gmk.template index a5f4143c888..eb2b1a688e1 100644 --- a/make/autoconf/spec.gmk.template +++ b/make/autoconf/spec.gmk.template @@ -827,6 +827,9 @@ OS_VERSION_MAJOR := @OS_VERSION_MAJOR@ OS_VERSION_MINOR := @OS_VERSION_MINOR@ OS_VERSION_MICRO := @OS_VERSION_MICRO@ +# Arm SVE +SVE_CFLAGS := @SVE_CFLAGS@ + # Images directory definitions JDK_IMAGE_SUBDIR := jdk JRE_IMAGE_SUBDIR := jre diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk index bf6ace6f97f..69da7ed059a 100644 --- a/make/modules/jdk.incubator.vector/Lib.gmk +++ b/make/modules/jdk.incubator.vector/Lib.gmk @@ -55,3 +55,17 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2 TARGETS += $(BUILD_LIBSLEEF) endif + +ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, aarch64)+$(INCLUDE_COMPILER2), true+true+true) + $(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \ + NAME := sleef, \ + OPTIMIZATION := HIGH, \ + SRC := libsleef/lib, \ + EXTRA_SRC := libsleef/generated, \ + DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \ + DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \ + CFLAGS := $(SVE_CFLAGS), \ + )) + + TARGETS += $(BUILD_LIBSLEEF) +endif diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index d9c77a2f529..eb473f97979 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2307,14 +2307,18 @@ const RegMask* Matcher::predicate_reg_mask(void) { return &_PR_REG_mask; } -// Vector calling convention not yet implemented. bool Matcher::supports_vector_calling_convention(void) { - return false; + return EnableVectorSupport && UseVectorStubs; } OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - Unimplemented(); - return OptoRegPair(0, 0); + assert(EnableVectorSupport && UseVectorStubs, "sanity"); + int lo = V0_num; + int hi = V0_H_num; + if (ideal_reg == Op_VecX || ideal_reg == Op_VecA) { + hi = V0_K_num; + } + return OptoRegPair(hi, lo); } // Is this branch offset short enough that a short branch can be used? @@ -16109,6 +16113,22 @@ instruct CallLeafDirect(method meth) ins_pipe(pipe_class_call); %} +// Call Runtime Instruction without safepoint and with vector arguments +instruct CallLeafDirectVector(method meth) +%{ + match(CallLeafVector); + + effect(USE meth); + + ins_cost(CALL_COST); + + format %{ "CALL, runtime leaf vector $meth" %} + + ins_encode(aarch64_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} + // Call Runtime Instruction instruct CallLeafNoFPDirect(method meth) diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 52996f4c4a5..fb8ae80cb49 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -871,7 +871,20 @@ static int c_calling_convention_priv(const BasicType *sig_bt, int SharedRuntime::vector_calling_convention(VMRegPair *regs, uint num_bits, uint total_args_passed) { - Unimplemented(); + // More than 8 argument inputs are not supported now. + assert(total_args_passed <= Argument::n_float_register_parameters_c, "unsupported"); + assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported"); + + static const FloatRegister VEC_ArgReg[Argument::n_float_register_parameters_c] = { + v0, v1, v2, v3, v4, v5, v6, v7 + }; + + // On SVE, we use the same vector registers with 128-bit vector registers on NEON. + int next_reg_val = num_bits == 64 ? 1 : 3; + for (uint i = 0; i < total_args_passed; i++) { + VMReg vmreg = VEC_ArgReg[i]->as_VMReg(); + regs[i].set_pair(vmreg->next(next_reg_val), vmreg); + } return 0; } diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index de7fc5b281b..c0f2531b1ac 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -42,6 +42,7 @@ #include "oops/oop.inline.hpp" #include "prims/methodHandles.hpp" #include "prims/upcallLinker.hpp" +#include "runtime/arguments.hpp" #include "runtime/atomic.hpp" #include "runtime/continuation.hpp" #include "runtime/continuationEntry.inline.hpp" @@ -8176,6 +8177,78 @@ class StubGenerator: public StubCodeGenerator { // } }; + void generate_vector_math_stubs() { + // Get native vector math stub routine addresses + void* libsleef = nullptr; + char ebuf[1024]; + char dll_name[JVM_MAXPATHLEN]; + if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) { + libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf); + } + if (libsleef == nullptr) { + log_info(library)("Failed to load native vector math library, %s!", ebuf); + return; + } + // Method naming convention + // All the methods are named as _ + // Where: + // is the operation name, e.g. sin + // is optional to indicate float/double + // "f/d" for vector float/double operation + // is the number of elements in the vector + // "2/4" for neon, and "x" for sve + // is the precision level + // "u10/u05" represents 1.0/0.5 ULP error bounds + // We use "u10" for all operations by default + // But for those functions do not have u10 support, we use "u05" instead + // indicates neon/sve + // "sve/advsimd" for sve/neon implementations + // e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions + // cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions + // + log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef)); + + // Math vector stubs implemented with SVE for scalable vector size. + if (UseSVE > 0) { + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { + int vop = VectorSupport::VECTOR_OP_MATH_START + op; + // Skip "tanh" because there is performance regression + if (vop == VectorSupport::VECTOR_OP_TANH) { + continue; + } + + // The native library does not support u10 level of "hypot". + const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; + + snprintf(ebuf, sizeof(ebuf), "%sfx_%ssve", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); + + snprintf(ebuf, sizeof(ebuf), "%sdx_%ssve", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf); + } + } + + // Math vector stubs implemented with NEON for 64/128 bits vector size. + for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) { + int vop = VectorSupport::VECTOR_OP_MATH_START + op; + // Skip "tanh" because there is performance regression + if (vop == VectorSupport::VECTOR_OP_TANH) { + continue; + } + + // The native library does not support u10 level of "hypot". + const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10"; + + snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libsleef, ebuf); + + snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf); + + snprintf(ebuf, sizeof(ebuf), "%sd2_%sadvsimd", VectorSupport::mathname[op], ulf); + StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libsleef, ebuf); + } + } // Initialization void generate_initial_stubs() { @@ -8329,6 +8402,9 @@ class StubGenerator: public StubCodeGenerator { // because it's faster for the sizes of modulus we care about. StubRoutines::_montgomerySquare = g.generate_multiply(); } + + generate_vector_math_stubs(); + #endif // COMPILER2 if (UseChaCha20Intrinsics) { @@ -8384,6 +8460,7 @@ class StubGenerator: public StubCodeGenerator { if (UseAdler32Intrinsics) { StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); } + #endif // COMPILER2_OR_JVMCI } diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_neon.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_neon.c new file mode 100644 index 00000000000..de289d4ffc5 --- /dev/null +++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_neon.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2024, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + +#include +#include + +#include "../generated/misc.h" +#include "../generated/sleefinline_advsimd.h" + + +#include + +#define DEFINE_VECTOR_MATH_UNARY(op, type) \ +JNIEXPORT \ +type op##advsimd(type input) { \ + return Sleef_##op##advsimd(input); \ +} + +#define DEFINE_VECTOR_MATH_BINARY(op, type) \ +JNIEXPORT \ +type op##advsimd(type input1, type input2) { \ + return Sleef_##op##advsimd(input1, input2); \ +} + +DEFINE_VECTOR_MATH_UNARY(tanf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(tanhf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(sinf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(sinhf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(cosf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(coshf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(asinf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(acosf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(atanf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(cbrtf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(logf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(log10f4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(log1pf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(expf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_UNARY(expm1f4_u10, float32x4_t) + +DEFINE_VECTOR_MATH_UNARY(tand2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(tanhd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(sind2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(sinhd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(cosd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(coshd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(asind2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(acosd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(atand2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(cbrtd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(logd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(log10d2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(log1pd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(expd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_UNARY(expm1d2_u10, float64x2_t) + +DEFINE_VECTOR_MATH_BINARY(atan2f4_u10, float32x4_t) +DEFINE_VECTOR_MATH_BINARY(powf4_u10, float32x4_t) +DEFINE_VECTOR_MATH_BINARY(hypotf4_u05, float32x4_t) + +DEFINE_VECTOR_MATH_BINARY(atan2d2_u10, float64x2_t) +DEFINE_VECTOR_MATH_BINARY(powd2_u10, float64x2_t) +DEFINE_VECTOR_MATH_BINARY(hypotd2_u05, float64x2_t) + +#undef DEFINE_VECTOR_MATH_UNARY + +#undef DEFINE_VECTOR_MATH_BINARY + +#endif // defined(__ARM_NEON__) || defined(__ARM_NEON) diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_sve.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_sve.c new file mode 100644 index 00000000000..1a442761d51 --- /dev/null +++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_sve.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2024, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#if defined(__ARM_FEATURE_SVE) + +#include +#include + +#include "../generated/misc.h" +#include "../generated/sleefinline_sve.h" + + +#include + +#define DEFINE_VECTOR_MATH_UNARY_SVE(op, type) \ +JNIEXPORT \ +type op##sve(type input) { \ + return Sleef_##op##sve(input); \ +} + +#define DEFINE_VECTOR_MATH_BINARY_SVE(op, type) \ +JNIEXPORT \ +type op##sve(type input1, type input2) { \ + return Sleef_##op##sve(input1, input2); \ +} + +DEFINE_VECTOR_MATH_UNARY_SVE(tanfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(sinfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(sinhfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(cosfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(coshfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(asinfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(acosfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(atanfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(cbrtfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(logfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(log10fx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(log1pfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(expfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_UNARY_SVE(expm1fx_u10, svfloat32_t) + +DEFINE_VECTOR_MATH_UNARY_SVE(tandx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(sindx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(sinhdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(cosdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(coshdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(asindx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(acosdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(atandx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(cbrtdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(logdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(log10dx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(log1pdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(expdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_UNARY_SVE(expm1dx_u10, svfloat64_t) + +DEFINE_VECTOR_MATH_BINARY_SVE(atan2fx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_BINARY_SVE(powfx_u10, svfloat32_t) +DEFINE_VECTOR_MATH_BINARY_SVE(hypotfx_u05, svfloat32_t) + +DEFINE_VECTOR_MATH_BINARY_SVE(atan2dx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_BINARY_SVE(powdx_u10, svfloat64_t) +DEFINE_VECTOR_MATH_BINARY_SVE(hypotdx_u05, svfloat64_t) + +#undef DEFINE_VECTOR_MATH_UNARY_SVE + +#undef DEFINE_VECTOR_MATH_BINARY_SVE + +#endif // __ARM_FEATURE_SVE