8376602: [Vector API] Upgrade SLEEF from 3.6.1 to 3.9.0

Reviewed-by: psandoz, fyang, erikj
This commit is contained in:
Xueming Shen 2026-05-27 04:56:50 +00:00
parent 7757684450
commit 185d933bb9
167 changed files with 17283 additions and 7588 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -48,7 +48,7 @@ ifneq ($(OPENJDK_BUILD_OS), linux)
endif
SLEEF_SUPPORT_DIR := $(MAKESUPPORT_OUTPUTDIR)/sleef
SLEEF_SOURCE_BASE_DIR := $(TOPDIR)/src/jdk.incubator.vector/linux/native/libsleef
SLEEF_SOURCE_BASE_DIR := $(TOPDIR)/src/jdk.incubator.vector/unix/native/libsleef
SLEEF_SOURCE_DIR := $(SLEEF_SOURCE_BASE_DIR)/upstream
SLEEF_TARGET_DIR := $(SLEEF_SOURCE_BASE_DIR)/generated
SLEEF_NATIVE_BUILD_DIR := $(SLEEF_SUPPORT_DIR)/native
@ -82,7 +82,12 @@ $(eval $(call SetupExecute, sleef_native_config, \
INFO := Configuring native sleef build, \
OUTPUT_DIR := $(SLEEF_NATIVE_BUILD_DIR), \
WORKING_DIR := $(SLEEF_SOURCE_DIR), \
COMMAND := $(CMAKE) -S . -B $(SLEEF_NATIVE_BUILD_DIR), \
COMMAND := $(CMAKE) -S . -B $(SLEEF_NATIVE_BUILD_DIR) \
-DCMAKE_INSTALL_PREFIX=$(SLEEF_NATIVE_BUILD_DIR) \
-DSLEEF_BUILD_TESTS=OFF \
-DSLEEF_DISABLE_SSL=ON \
-DSLEEF_ENABLE_TLFLOAT=OFF \
-DSLEEF_ENABLE_TESTER4=OFF, \
))
TARGETS := $(sleef_native_config)
@ -106,6 +111,11 @@ $(eval $(call SetupExecute, sleef_cross_config, \
-DCMAKE_C_COMPILER=$(CC) \
-DCMAKE_TOOLCHAIN_FILE=$(SLEEF_CMAKE_FILE) \
-DNATIVE_BUILD_DIR=$(SLEEF_NATIVE_BUILD_DIR) \
-DCMAKE_INSTALL_PREFIX=$(SLEEF_CROSS_BUILD_DIR) \
-DSLEEF_BUILD_TESTS=OFF \
-DSLEEF_DISABLE_SSL=ON \
-DSLEEF_ENABLE_TLFLOAT=OFF \
-DSLEEF_ENABLE_TESTER4=OFF \
-DSLEEF_BUILD_INLINE_HEADERS=TRUE \
$(EXTRA_CROSS_OPTIONS), \
))
@ -139,7 +149,7 @@ $(eval $(call SetupCopyFiles, copy_generated_sleef_source, \
DEST := $(SLEEF_TARGET_DIR), \
))
TARGETS := $(copy_generated_sleef_source)
TARGETS := $(copy_static_sleef_source) $(copy_generated_sleef_source)
################################################################################

View File

@ -1,8 +1,8 @@
## SLEEF v3.6.1
## SLEEF v3.9.0
### Notice
```
Copyright © 2010-2024 SLEEF Project, Naoki Shibata and contributors
Copyright © 2010-2025 SLEEF Project, Naoki Shibata and contributors
-------
src/arch/helpersve.h has the following copyright:

View File

@ -4,15 +4,15 @@ This directory contains the source code for the SLEEF library, the
**SIMD Library for Evaluating Elementary Functions**. For more information on
SLEEF, see https://sleef.org/.
The currently imported libsleef sources is version 3.6.1, which has
git tag `3.6.1` and git commit hash `6ee14bcae5fe92c2ff8b000d5a01102dab08d774`.
The currently imported libsleef sources are version 3.9.0, which has
git tag `3.9.0` and git commit hash `906ca7512ee483296780a81a21b9ca715d40dfe1`.
# About the libsleef integration in the JDK
The upstream original source code is available in
`src/jdk.incubator.vector/unix/native/libsleef/upstream`. However, this code is
not directly usable in the JDK build system, but is instead used as the base for
the generation of additional souce code files. This generation is done by
the generation of additional source code files. This generation is done by
the libsleef CMake files. If this should have been done at build time, it would
have meant adding CMake as a required dependency to build the JDK.
@ -25,7 +25,7 @@ the JDK source tree. The generated files reside in
To update the version of libsleef that is used in the JDK, clone
`https://github.com/shibatch/sleef.git`, and copy all files, except the `docs`,
`.github` and `.git` directories, into
`.github` and `.git` directories, and the `.nojekyll` file, into
`src/jdk.incubator.vector/unix/native/libsleef/upstream`.
The libsleef source code does not follow the JDK whitespace rules as enforced by

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2024.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -13,10 +13,15 @@
#include <string.h>
#endif
#ifndef M_PI
#define M_PI 3.141592653589793238462643383279502884
#endif
#ifndef M_PIf
# define M_PIf ((float)M_PI)
#endif
#ifndef M_PIl
#define M_PIl 3.141592653589793238462643383279502884L
#endif
@ -137,9 +142,17 @@
#define L2Lf 1.428606765330187045e-06f
#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
#ifndef M_PIf
# define M_PIf ((float)M_PI)
#endif
// Overflow bounds
// - exp(x) overflows for x over (also used in pow)
#define LOG_DBL_MAX 0x1.62e42fefa39efp+9 /* 709.782712893384 */
// Other bounds
// - log1p(f)(x) approximation holds up to x equals
#define LOG1PF_BOUND 0x1.2ced32p+126 /* 1.0e+38 */
#define LOG1P_BOUND 0x1.c7b1f3cac7433p+1019 /* 1.0e+307 */
//
@ -183,17 +196,13 @@ typedef struct {
} Sleef_longdouble2;
#endif
#if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER)
#if (defined (__GNUC__) || defined (__clang__)) && !defined(_MSC_VER)
#define LIKELY(condition) __builtin_expect(!!(condition), 1)
#define UNLIKELY(condition) __builtin_expect(!!(condition), 0)
#define RESTRICT __restrict__
#ifndef __arm__
#define ALIGNED(x) __attribute__((aligned(x)))
#else
#define ALIGNED(x)
#endif
#if defined(SLEEF_GENHEADER)
@ -229,7 +238,7 @@ typedef struct {
#define SLEEF_INFINITYf __builtin_inff()
#define SLEEF_INFINITYl __builtin_infl()
#if defined(__INTEL_COMPILER) || defined (__clang__)
#if defined (__clang__)
#define SLEEF_INFINITYq __builtin_inf()
#define SLEEF_NANq __builtin_nan("")
#else
@ -237,7 +246,7 @@ typedef struct {
#define SLEEF_NANq (SLEEF_INFINITYq - SLEEF_INFINITYq)
#endif
#elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER)
#elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__)) && !defined(_MSC_VER)
#if defined(SLEEF_GENHEADER)
@ -249,6 +258,9 @@ typedef struct {
#else // #if defined(SLEEF_GENHEADER)
#define INLINE __forceinline
#ifdef CONST
#undef CONST
#endif
#define CONST
#ifndef SLEEF_STATIC_LIBS
#define EXPORT __declspec(dllexport)
@ -265,7 +277,7 @@ typedef struct {
#define LIKELY(condition) (condition)
#define UNLIKELY(condition) (condition)
#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) && !defined(SLEEF_GENHEADER)
#if (defined(__GNUC__) || defined(__CLANG__)) && defined(__x86_64__) && !defined(SLEEF_GENHEADER)
#include <x86intrin.h>
#endif
@ -294,7 +306,7 @@ typedef struct {
#endif
#endif
#endif // #elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER)
#endif // #elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__)) && !defined(_MSC_VER)
#if !defined(__linux__)
#define isinff(x) ((x) == SLEEF_INFINITYf || (x) == -SLEEF_INFINITYf)
@ -305,15 +317,9 @@ typedef struct {
#endif // #ifndef __MISC_H__
#ifdef ENABLE_AAVPCS
#define VECTOR_CC __attribute__((aarch64_vector_pcs))
#else
#define VECTOR_CC
#endif
//
#if defined (__GNUC__) && !defined(__INTEL_COMPILER)
#if defined (__GNUC__)
#pragma GCC diagnostic ignored "-Wpragmas"
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#if !defined (__clang__)

View File

@ -1,8 +1,11 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See http://www.boost.org/LICENSE_1_0.txt)
// This file is generated by SLEEF 3.6.1
// This file is generated by SLEEF 3.9.0
/* #undef SLEEF_FLOAT128_IS_IEEEQP */
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#ifndef SLEEF_ALWAYS_INLINE
#if defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)
@ -1010,6 +1013,7 @@ static const double Sleef_rempitabdp[] = {
2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322,
2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322,
2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323,
0, 0, 0, 0,
};
static const float Sleef_rempitabsp[] = {
@ -1116,17 +1120,10 @@ static const float Sleef_rempitabsp[] = {
1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
2.743283031e-13, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
0, 0, 0, 0,
};
#endif // #ifndef __SLEEF_REMPITAB__
#if !defined(__NVCC__) && ((defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8))
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && !defined(__NVCC__) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;
@ -3294,7 +3291,8 @@ SLEEF_INLINE SLEEF_CONST vdouble_advsimd_sleef Sleef_expd2_u10advsimd(vdouble_ad
u = vldexp2_vd_vd_vi_advsimd_sleef(u, q);
u = vsel_vd_vo_vd_vd_advsimd_sleef(vgt_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(709.78271114955742909217217426)), vcast_vd_d_advsimd_sleef(__builtin_inf()), u);
vopmask_advsimd_sleef o = vgt_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(0x1.62e42fefa39efp+9));
u = vsel_vd_vo_vd_vd_advsimd_sleef(o, vcast_vd_d_advsimd_sleef(__builtin_inf()), u);
u = vreinterpret_vd_vm_advsimd_sleef(vandnot_vm_vo64_vm_advsimd_sleef(vlt_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(-1000)), vreinterpret_vm_vd_advsimd_sleef(u)));
return u;
@ -3411,13 +3409,13 @@ static SLEEF_ALWAYS_INLINE SLEEF_CONST vdouble_advsimd_sleef expk_advsimd_sleef(
}
SLEEF_INLINE SLEEF_CONST vdouble_advsimd_sleef Sleef_powd2_u10advsimd(vdouble_advsimd_sleef x, vdouble_advsimd_sleef y) {
vopmask_advsimd_sleef yisint = visint_vo_vd_advsimd_sleef(y);
vopmask_advsimd_sleef yisodd = vand_vo_vo_vo_advsimd_sleef(visodd_vo_vd_advsimd_sleef(y), yisint);
vdouble2_advsimd_sleef d = ddmul_vd2_vd2_vd_advsimd_sleef(logk_advsimd_sleef(vabs_vd_vd_advsimd_sleef(x)), y);
vdouble_advsimd_sleef result = expk_advsimd_sleef(d);
result = vsel_vd_vo_vd_vd_advsimd_sleef(vgt_vo_vd_vd_advsimd_sleef(vd2getx_vd_vd2_advsimd_sleef(d), vcast_vd_d_advsimd_sleef(709.78271114955742909217217426)), vcast_vd_d_advsimd_sleef(__builtin_inf()), result);
vopmask_advsimd_sleef o = vgt_vo_vd_vd_advsimd_sleef(vd2getx_vd_vd2_advsimd_sleef(d), vcast_vd_d_advsimd_sleef(0x1.62e42fefa39efp+9));
result = vsel_vd_vo_vd_vd_advsimd_sleef(o, vcast_vd_d_advsimd_sleef(__builtin_inf()), result);
result = vmul_vd_vd_vd_advsimd_sleef(result,
vsel_vd_vo_vd_vd_advsimd_sleef(vgt_vo_vd_vd_advsimd_sleef(x, vcast_vd_d_advsimd_sleef(0)),
@ -3443,7 +3441,6 @@ SLEEF_INLINE SLEEF_CONST vdouble_advsimd_sleef Sleef_powd2_u10advsimd(vdouble_ad
result = vsel_vd_vo_vd_vd_advsimd_sleef(vor_vo_vo_vo_advsimd_sleef(veq_vo_vd_vd_advsimd_sleef(y, vcast_vd_d_advsimd_sleef(0)), veq_vo_vd_vd_advsimd_sleef(x, vcast_vd_d_advsimd_sleef(1))), vcast_vd_d_advsimd_sleef(1), result);
return result;
}
static SLEEF_ALWAYS_INLINE SLEEF_CONST vdouble2_advsimd_sleef expk2_advsimd_sleef(vdouble2_advsimd_sleef d) {
@ -3931,7 +3928,8 @@ SLEEF_INLINE SLEEF_CONST vdouble_advsimd_sleef Sleef_log1pd2_u10advsimd(vdouble_
vdouble_advsimd_sleef r = vadd_vd_vd_vd_advsimd_sleef(vd2getx_vd_vd2_advsimd_sleef(s), vd2gety_vd_vd2_advsimd_sleef(s));
r = vsel_vd_vo_vd_vd_advsimd_sleef(vgt_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(1e+307)), vcast_vd_d_advsimd_sleef(__builtin_inf()), r);
vopmask_advsimd_sleef ocore = vle_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(0x1.c7b1f3cac7433p+1019));
if(!__builtin_expect(!!(vtestallones_i_vo64_advsimd_sleef (ocore)), 1)) r = vsel_vd_vo_vd_vd_advsimd_sleef(ocore, r, Sleef_logd2_u10advsimd(d));
r = vsel_vd_vo_vd_vd_advsimd_sleef(vor_vo_vo_vo_advsimd_sleef(vlt_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(-1)), visnan_vo_vd_advsimd_sleef(d)), vcast_vd_d_advsimd_sleef(__builtin_nan("")), r);
r = vsel_vd_vo_vd_vd_advsimd_sleef(veq_vo_vd_vd_advsimd_sleef(d, vcast_vd_d_advsimd_sleef(-1)), vcast_vd_d_advsimd_sleef(-__builtin_inf()), r);
r = vsel_vd_vo_vd_vd_advsimd_sleef(visnegzero_vo_vd_advsimd_sleef(d), vcast_vd_d_advsimd_sleef(-0.0), r);
@ -4011,7 +4009,7 @@ SLEEF_INLINE SLEEF_CONST vint_advsimd_sleef Sleef_expfrexpd2_advsimd(vdouble_adv
vint_advsimd_sleef ret = vcastu_vi_vm_advsimd_sleef(vreinterpret_vm_vd_advsimd_sleef(x));
ret = vsub_vi_vi_vi_advsimd_sleef(vand_vi_vi_vi_advsimd_sleef(vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(ret), 20)), vcast_vi_i_advsimd_sleef(0x7ff)), vcast_vi_i_advsimd_sleef(0x3fe));
ret = vsel_vi_vo_vi_vi_advsimd_sleef(vor_vo_vo_vo_advsimd_sleef(vor_vo_vo_vo_advsimd_sleef(veq_vo_vd_vd_advsimd_sleef(x, vcast_vd_d_advsimd_sleef(0)), visnan_vo_vd_advsimd_sleef(x)), visinf_vo_vd_advsimd_sleef(x)), vcast_vi_i_advsimd_sleef(0), ret);
ret = vsel_vi_vo_vi_vi_advsimd_sleef(vcast_vo32_vo64_advsimd_sleef(vor_vo_vo_vo_advsimd_sleef(vor_vo_vo_vo_advsimd_sleef(veq_vo_vd_vd_advsimd_sleef(x, vcast_vd_d_advsimd_sleef(0)), visnan_vo_vd_advsimd_sleef(x)), visinf_vo_vd_advsimd_sleef(x))), vcast_vi_i_advsimd_sleef(0), ret);
return ret;
}
@ -4410,14 +4408,6 @@ SLEEF_INLINE SLEEF_CONST vdouble_advsimd_sleef Sleef_erfcd2_u15advsimd(vdouble_a
return r;
}
#if !defined(__NVCC__) && ((defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8))
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && !defined(__NVCC__) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;
@ -4934,6 +4924,7 @@ SLEEF_INLINE SLEEF_CONST vfloat_advsimd_sleef Sleef_tanf4_u35advsimd(vfloat_advs
if (__builtin_expect(!!(vtestallones_i_vo32_advsimd_sleef(vlt_vo_vf_vf_advsimd_sleef(vabs_vf_vf_advsimd_sleef(d), vcast_vf_f_advsimd_sleef(125.0f*0.5f)))), 1)) {
q = vrint_vi2_vf_advsimd_sleef(vmul_vf_vf_vf_advsimd_sleef(d, vcast_vf_f_advsimd_sleef((float)(2 * 0.318309886183790671537767526745028724))));
u = vcast_vf_vi2_advsimd_sleef(q);
x = vmla_vf_vf_vf_vf_advsimd_sleef(u, vcast_vf_f_advsimd_sleef(-3.1414794921875f*0.5f), x);
x = vmla_vf_vf_vf_vf_advsimd_sleef(u, vcast_vf_f_advsimd_sleef(-0.00011315941810607910156f*0.5f), x);
x = vmla_vf_vf_vf_vf_advsimd_sleef(u, vcast_vf_f_advsimd_sleef(-1.9841872589410058936e-09f*0.5f), x);
@ -6335,7 +6326,8 @@ SLEEF_INLINE SLEEF_CONST vfloat_advsimd_sleef Sleef_log1pf4_u10advsimd(vfloat_ad
vfloat_advsimd_sleef r = vadd_vf_vf_vf_advsimd_sleef(vf2getx_vf_vf2_advsimd_sleef(s), vf2gety_vf_vf2_advsimd_sleef(s));
r = vsel_vf_vo_vf_vf_advsimd_sleef(vgt_vo_vf_vf_advsimd_sleef(d, vcast_vf_f_advsimd_sleef(1e+38)), vcast_vf_f_advsimd_sleef(__builtin_inff()), r);
vopmask_advsimd_sleef ocore = vle_vo_vf_vf_advsimd_sleef(d, vcast_vf_f_advsimd_sleef(0x1.2ced32p+126));
if(!__builtin_expect(!!(vtestallones_i_vo32_advsimd_sleef (ocore)), 1)) r = vsel_vf_vo_vf_vf_advsimd_sleef(ocore, r, Sleef_logf4_u10advsimd(d));
r = vreinterpret_vf_vm_advsimd_sleef(vor_vm_vo32_vm_advsimd_sleef(vgt_vo_vf_vf_advsimd_sleef(vcast_vf_f_advsimd_sleef(-1), d), vreinterpret_vm_vf_advsimd_sleef(r)));
r = vsel_vf_vo_vf_vf_advsimd_sleef(veq_vo_vf_vf_advsimd_sleef(d, vcast_vf_f_advsimd_sleef(-1)), vcast_vf_f_advsimd_sleef(-__builtin_inff()), r);
r = vsel_vf_vo_vf_vf_advsimd_sleef(visnegzero_vo_vf_advsimd_sleef(d), vcast_vf_f_advsimd_sleef(-0.0f), r);

View File

@ -1,8 +1,11 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See http://www.boost.org/LICENSE_1_0.txt)
// This file is generated by SLEEF 3.6.1
// This file is generated by SLEEF 3.9.0
/* #undef SLEEF_FLOAT128_IS_IEEEQP */
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#ifndef SLEEF_ALWAYS_INLINE
#if defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)
@ -1010,6 +1013,7 @@ static const double Sleef_rempitabdp[] = {
2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322,
2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322,
2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323,
0, 0, 0, 0,
};
static const float Sleef_rempitabsp[] = {
@ -1116,17 +1120,10 @@ static const float Sleef_rempitabsp[] = {
1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
2.743283031e-13, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
0, 0, 0, 0,
};
#endif // #ifndef __SLEEF_REMPITAB__
#if !defined(__NVCC__) && ((defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8))
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && !defined(__NVCC__) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;
@ -1182,7 +1179,7 @@ typedef vquad_rvvm1_sleef vargquad_rvvm1_sleef;
static SLEEF_ALWAYS_INLINE int vavailability_i_rvvm1_sleef(int name) {
return (__riscv_vsetvlmax_e64m1() >= __riscv_vsetvlmax_e64m1()) ? 3 : 0;
return (((int)__riscv_vsetvlmax_e64m1()) >= ((int)__riscv_vsetvlmax_e64m1())) ? 3 : 0;
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef figetd_vf_di_rvvm1_sleef(fi_t_rvvm1_sleef d) {
@ -1239,144 +1236,144 @@ static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vreinterpret_vf_vi2_rvvm1_sleef(vi
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vcast_vf_f_rvvm1_sleef(float f) {
return __riscv_vfmv_v_f_f32m1(f, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmv_v_f_f32m1(f, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vrint_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef vd_rvvm1_sleef) {
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1_rm(vd_rvvm1_sleef, __RISCV_FRM_RNE, (__riscv_vsetvlmax_e32m1())), (__riscv_vsetvlmax_e32m1()));
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1_rm(vd_rvvm1_sleef, __RISCV_FRM_RNE, ((int)__riscv_vsetvlmax_e32m1())), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vcast_vf_vi2_rvvm1_sleef(vint2_rvvm1_sleef vi) {
return __riscv_vfcvt_f(vi, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfcvt_f(vi, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vcast_vi2_i_rvvm1_sleef(int i) {
return __riscv_vmv_v_x_i32m1(i, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmv_v_x_i32m1(i, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vrint_vi2_vf_rvvm1_sleef(vfloat_rvvm1_sleef vf) {
return __riscv_vfcvt_x_f_v_i32m1_rm(vf, __RISCV_FRM_RNE, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfcvt_x_f_v_i32m1_rm(vf, __RISCV_FRM_RNE, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vtruncate_vi2_vf_rvvm1_sleef(vfloat_rvvm1_sleef vf) {
return __riscv_vfcvt_rtz_x(vf, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfcvt_rtz_x(vf, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vtruncate_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef vf) {
return vcast_vf_vi2_rvvm1_sleef(vtruncate_vi2_vf_rvvm1_sleef(vf));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vload_vf_p_rvvm1_sleef(const float *ptr) {
return __riscv_vle32_v_f32m1(ptr, (__riscv_vsetvlmax_e32m1()));
return __riscv_vle32_v_f32m1(ptr, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vloadu_vf_p_rvvm1_sleef(const float *ptr) {
return __riscv_vle32_v_f32m1(ptr, (__riscv_vsetvlmax_e32m1()));
return __riscv_vle32_v_f32m1(ptr, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE void vstore_v_p_vf_rvvm1_sleef(float *ptr, vfloat_rvvm1_sleef v) {
__riscv_vse32(ptr, v, (__riscv_vsetvlmax_e32m1()));
__riscv_vse32(ptr, v, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE void vstoreu_v_p_vf_rvvm1_sleef(float *ptr, vfloat_rvvm1_sleef v) {
__riscv_vse32(ptr, v, (__riscv_vsetvlmax_e32m1()));
__riscv_vse32(ptr, v, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE void vstoreu_v_p_vi2_rvvm1_sleef(int32_t *ptr, vint2_rvvm1_sleef v) {
__riscv_vse32(ptr, v, (__riscv_vsetvlmax_e32m1()));
__riscv_vse32(ptr, v, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vgather_vf_p_vi2_rvvm1_sleef(const float *ptr, vint2_rvvm1_sleef vi2) {
return __riscv_vluxei32(ptr, __riscv_vmul(__riscv_vreinterpret_u32m1(vi2), sizeof(float), (__riscv_vsetvlmax_e32m1())), (__riscv_vsetvlmax_e32m1()));
return __riscv_vluxei32(ptr, __riscv_vmul(__riscv_vreinterpret_u32m1(vi2), sizeof(float), ((int)__riscv_vsetvlmax_e32m1())), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vadd_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfadd(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfadd(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsub_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfsub(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfsub(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmul_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfmul(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmul(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vdiv_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfdiv(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfdiv(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmax_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfmax(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmax(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmin_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfmin(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmin(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vrec_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
return __riscv_vfdiv(vcast_vf_f_rvvm1_sleef(1.0f), d, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfdiv(vcast_vf_f_rvvm1_sleef(1.0f), d, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsqrt_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
return __riscv_vfsqrt(d, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfsqrt(d, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmla_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) {
return __riscv_vfmadd(x, y, z, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmadd(x, y, z, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmlanp_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) {
return __riscv_vfnmsub(x, y, z, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfnmsub(x, y, z, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmlapn_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) {
return __riscv_vfmsub(x, y, z, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmsub(x, y, z, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vfma_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) {
return __riscv_vfmadd(x, y, z, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmadd(x, y, z, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vfmanp_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) {
return __riscv_vfnmsub(x, y, z, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfnmsub(x, y, z, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vfmapn_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) {
return __riscv_vfmsub(x, y, z, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmsub(x, y, z, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmulsign_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfsgnjx(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfsgnjx(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vcopysign_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vfsgnj(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfsgnj(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsign_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef f) {
return __riscv_vfsgnj(__riscv_vfmv_v_f_f32m1(1.0f, (__riscv_vsetvlmax_e32m1())), f, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfsgnj(__riscv_vfmv_v_f_f32m1(1.0f, ((int)__riscv_vsetvlmax_e32m1())), f, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vorsign_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
vint2_rvvm1_sleef xi = __riscv_vreinterpret_i32m1(x);
vint2_rvvm1_sleef yi = __riscv_vreinterpret_i32m1(y);
vint2_rvvm1_sleef xioryi = __riscv_vor(xi, yi, (__riscv_vsetvlmax_e32m1()));
vint2_rvvm1_sleef xioryi = __riscv_vor(xi, yi, ((int)__riscv_vsetvlmax_e32m1()));
vfloat_rvvm1_sleef xory = __riscv_vreinterpret_f32m1(xioryi);
return __riscv_vfsgnj(x, xory, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfsgnj(x, xory, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vabs_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef f) {
return __riscv_vfabs(f, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfabs(f, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vneg_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef f) {
return __riscv_vfneg(f, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfneg(f, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vadd_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vadd(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vadd(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vsub_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vsub(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vsub(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vneg_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x) {
return __riscv_vneg(x, (__riscv_vsetvlmax_e32m1()));
return __riscv_vneg(x, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vand_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vand(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vand(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vandnot_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vand(__riscv_vnot(x, (__riscv_vsetvlmax_e32m1())), y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vand(__riscv_vnot(x, ((int)__riscv_vsetvlmax_e32m1())), y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vor_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vor(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vor(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vxor_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vxor(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vxor(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vsll_vi2_vi2_i_rvvm1_sleef(vint2_rvvm1_sleef x, int c) {
return __riscv_vsll(x, c, (__riscv_vsetvlmax_e32m1()));
return __riscv_vsll(x, c, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vsra_vi2_vi2_i_rvvm1_sleef(vint2_rvvm1_sleef x, int c) {
return __riscv_vsra(x, c, (__riscv_vsetvlmax_e32m1()));
return __riscv_vsra(x, c, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vsrl_vi2_vi2_i_rvvm1_sleef(vint2_rvvm1_sleef x, int c) {
return __riscv_vreinterpret_i32m1(__riscv_vsrl(__riscv_vreinterpret_u32m1(x), c, (__riscv_vsetvlmax_e32m1())));
return __riscv_vreinterpret_i32m1(__riscv_vsrl(__riscv_vreinterpret_u32m1(x), c, ((int)__riscv_vsetvlmax_e32m1())));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vreinterpret_vf_vm_rvvm1_sleef(vmask_rvvm1_sleef vm) {
@ -1387,91 +1384,91 @@ static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vreinterpret_vm_vf_rvvm1_sleef(vflo
}
static SLEEF_ALWAYS_INLINE int vtestallones_i_vo32_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef g) {
return __riscv_vcpop(g, (__riscv_vsetvlmax_e32m1())) == (__riscv_vsetvlmax_e32m1());
return (int)__riscv_vcpop(g, ((int)__riscv_vsetvlmax_e32m1())) == (int)((int)__riscv_vsetvlmax_e32m1());
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vor_vm_vo32_vm_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
rvv_vmask32 y32 = __riscv_vreinterpret_u32m1(y);
return __riscv_vreinterpret_u64m1(__riscv_vmerge(y32, -1, x, (__riscv_vsetvlmax_e32m1())));
return __riscv_vreinterpret_u64m1(__riscv_vmerge(y32, -1, x, ((int)__riscv_vsetvlmax_e32m1())));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vand_vm_vo32_vm_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
rvv_vmask32 y32 = __riscv_vreinterpret_u32m1(y);
return __riscv_vreinterpret_u64m1(__riscv_vmerge(y32, 0, __riscv_vmnot(x, (__riscv_vsetvlmax_e32m1())), (__riscv_vsetvlmax_e32m1())));
return __riscv_vreinterpret_u64m1(__riscv_vmerge(y32, 0, __riscv_vmnot(x, ((int)__riscv_vsetvlmax_e32m1())), ((int)__riscv_vsetvlmax_e32m1())));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vandnot_vm_vo32_vm_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
rvv_vmask32 y32 = __riscv_vreinterpret_u32m1(y);
return __riscv_vreinterpret_u64m1(__riscv_vmerge(y32, 0, x, (__riscv_vsetvlmax_e32m1())));
return __riscv_vreinterpret_u64m1(__riscv_vmerge(y32, 0, x, ((int)__riscv_vsetvlmax_e32m1())));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef rvv_sp_vand_vo_vo_vo(rvv_sp_vopmask_rvvm1_sleef x, rvv_sp_vopmask_rvvm1_sleef y) {
return __riscv_vmand(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmand(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef rvv_sp_vandnot_vo_vo_vo(rvv_sp_vopmask_rvvm1_sleef x, rvv_sp_vopmask_rvvm1_sleef y) {
return __riscv_vmandn(y, x, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmandn(y, x, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef rvv_sp_vor_vo_vo_vo(rvv_sp_vopmask_rvvm1_sleef x, rvv_sp_vopmask_rvvm1_sleef y) {
return __riscv_vmor(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmor(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef rvv_sp_vxor_vo_vo_vo(rvv_sp_vopmask_rvvm1_sleef x, rvv_sp_vopmask_rvvm1_sleef y) {
return __riscv_vmxor(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmxor(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef veq_vo_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmfeq(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfeq(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vneq_vo_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmfne(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfne(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vgt_vo_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmfgt(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfgt(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vge_vo_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmfge(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfge(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vlt_vo_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmflt(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmflt(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vle_vo_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmfle(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfle(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef visnan_vo_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
return __riscv_vmfne(d, d, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfne(d, d, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef visinf_vo_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
return __riscv_vmfeq(__riscv_vfabs(d, (__riscv_vsetvlmax_e32m1())), __builtin_inff(), (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfeq(__riscv_vfabs(d, ((int)__riscv_vsetvlmax_e32m1())), __builtin_inff(), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vispinf_vo_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
return __riscv_vmfeq(d, __builtin_inff(), (__riscv_vsetvlmax_e32m1()));
return __riscv_vmfeq(d, __builtin_inff(), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsel_vf_vo_vf_vf_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef mask, vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y) {
return __riscv_vmerge(y, x, mask, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(y, x, mask, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsel_vf_vo_f_f_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef mask, float v1, float v0) {
return __riscv_vfmerge(vcast_vf_f_rvvm1_sleef(v0), v1, mask, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmerge(vcast_vf_f_rvvm1_sleef(v0), v1, mask, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsel_vf_vo_vo_f_f_f_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef o0, rvv_sp_vopmask_rvvm1_sleef o1, float d0, float d1, float d2) {
return __riscv_vfmerge(__riscv_vfmerge(vcast_vf_f_rvvm1_sleef(d2), d1, o1, (__riscv_vsetvlmax_e32m1())), d0, o0, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmerge(__riscv_vfmerge(vcast_vf_f_rvvm1_sleef(d2), d1, o1, ((int)__riscv_vsetvlmax_e32m1())), d0, o0, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vsel_vf_vo_vo_vo_f_f_f_f_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef o0, rvv_sp_vopmask_rvvm1_sleef o1, rvv_sp_vopmask_rvvm1_sleef o2, float d0, float d1, float d2, float d3) {
return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vf_f_rvvm1_sleef(d3), d2, o2, (__riscv_vsetvlmax_e32m1())), d1, o1, (__riscv_vsetvlmax_e32m1())), d0, o0, (__riscv_vsetvlmax_e32m1()));
return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vf_f_rvvm1_sleef(d3), d2, o2, ((int)__riscv_vsetvlmax_e32m1())), d1, o1, ((int)__riscv_vsetvlmax_e32m1())), d0, o0, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef veq_vo_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vmseq(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmseq(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE rvv_sp_vopmask_rvvm1_sleef vgt_vo_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vmsgt(x, y, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmsgt(x, y, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vgt_vi2_vi2_vi2_rvvm1_sleef(vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
vint2_rvvm1_sleef zero = vcast_vi2_i_rvvm1_sleef(0);
return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, (__riscv_vsetvlmax_e32m1())), (__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, ((int)__riscv_vsetvlmax_e32m1())), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vsel_vi2_vo_vi2_vi2_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef m, vint2_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vmerge(y, x, m, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(y, x, m, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vint2_rvvm1_sleef vand_vi2_vo_vi2_rvvm1_sleef(rvv_sp_vopmask_rvvm1_sleef x, vint2_rvvm1_sleef y) {
return __riscv_vmerge(y, 0, __riscv_vmnot(x, (__riscv_vsetvlmax_e32m1())), (__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(y, 0, __riscv_vmnot(x, ((int)__riscv_vsetvlmax_e32m1())), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE const vdouble_rvvm1_sleef vd2getx_vd_vd2_rvvm1_sleef(vdouble2_rvvm1_sleef v) {
@ -1537,203 +1534,203 @@ static SLEEF_ALWAYS_INLINE ddi_t_rvvm1_sleef ddisetdd_ddi_ddi_vd2_rvvm1_sleef(dd
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vcast_vd_d_rvvm1_sleef(double d) {
return __riscv_vfmv_v_f_f64m1(d, __riscv_vsetvlmax_e64m1());
return __riscv_vfmv_v_f_f64m1(d, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vcast_vd_vi_rvvm1_sleef(vint_rvvm1_sleef i) {
return __riscv_vfwcvt_f(i, __riscv_vsetvlmax_e64m1());
return __riscv_vfwcvt_f(i, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vcast_vi_i_rvvm1_sleef(int32_t i) {
return __riscv_vmv_v_x_i32mf2(i, __riscv_vsetvlmax_e64m1());
return __riscv_vmv_v_x_i32mf2(i, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vrint_vi_vd_rvvm1_sleef(vdouble_rvvm1_sleef vd_rvvm1_sleef) {
return __riscv_vfncvt_x_f_w_i32mf2_rm(vd_rvvm1_sleef, __RISCV_FRM_RNE, __riscv_vsetvlmax_e64m1());
return __riscv_vfncvt_x_f_w_i32mf2_rm(vd_rvvm1_sleef, __RISCV_FRM_RNE, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vrint_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef vd_rvvm1_sleef) {
return __riscv_vfcvt_f_x_v_f64m1(__riscv_vfcvt_x_f_v_i64m1_rm(vd_rvvm1_sleef, __RISCV_FRM_RNE, __riscv_vsetvlmax_e64m1()), __riscv_vsetvlmax_e64m1());
return __riscv_vfcvt_f_x_v_f64m1(__riscv_vfcvt_x_f_v_i64m1_rm(vd_rvvm1_sleef, __RISCV_FRM_RNE, ((int)__riscv_vsetvlmax_e64m1())), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vtruncate_vi_vd_rvvm1_sleef(vdouble_rvvm1_sleef vd_rvvm1_sleef) {
return __riscv_vfncvt_rtz_x(vd_rvvm1_sleef, __riscv_vsetvlmax_e64m1());
return __riscv_vfncvt_rtz_x(vd_rvvm1_sleef, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vtruncate_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef vd_rvvm1_sleef) {
return vcast_vd_vi_rvvm1_sleef(vtruncate_vi_vd_rvvm1_sleef(vd_rvvm1_sleef));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vload_vd_p_rvvm1_sleef(const double *ptr) {
return __riscv_vle64_v_f64m1(ptr, __riscv_vsetvlmax_e64m1());
return __riscv_vle64_v_f64m1(ptr, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vloadu_vd_p_rvvm1_sleef(const double *ptr) {
return __riscv_vle64_v_f64m1(ptr, __riscv_vsetvlmax_e64m1());
return __riscv_vle64_v_f64m1(ptr, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vloadu_vi_p_rvvm1_sleef(int32_t *p) {
return __riscv_vle32_v_i32mf2(p, __riscv_vsetvlmax_e64m1());
return __riscv_vle32_v_i32mf2(p, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE void vstore_v_p_vd_rvvm1_sleef(double *ptr, vdouble_rvvm1_sleef v) {
__riscv_vse64(ptr, v, __riscv_vsetvlmax_e64m1());
__riscv_vse64(ptr, v, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE void vstoreu_v_p_vd_rvvm1_sleef(double *ptr, vdouble_rvvm1_sleef v) {
__riscv_vse64(ptr, v, __riscv_vsetvlmax_e64m1());
__riscv_vse64(ptr, v, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE void vstoreu_v_p_vi_rvvm1_sleef(int32_t *ptr, vint_rvvm1_sleef v) {
__riscv_vse32(ptr, v, __riscv_vsetvlmax_e64m1());
__riscv_vse32(ptr, v, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vgather_vd_p_vi_rvvm1_sleef(const double *ptr, vint_rvvm1_sleef vi) {
return __riscv_vluxei64(ptr, __riscv_vwmulu(__riscv_vreinterpret_u32mf2(vi), sizeof(double), __riscv_vsetvlmax_e64m1()), __riscv_vsetvlmax_e64m1());
return __riscv_vluxei64(ptr, __riscv_vwmulu(__riscv_vreinterpret_u32mf2(vi), sizeof(double), ((int)__riscv_vsetvlmax_e64m1())), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vadd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfadd(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfadd(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsub_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfsub(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfsub(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vrec_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vfdiv(vcast_vd_d_rvvm1_sleef(1.0), d, __riscv_vsetvlmax_e64m1());
return __riscv_vfdiv(vcast_vd_d_rvvm1_sleef(1.0), d, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vabs_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vfabs(d, __riscv_vsetvlmax_e64m1());
return __riscv_vfabs(d, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsqrt_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vfsqrt(d, __riscv_vsetvlmax_e64m1());
return __riscv_vfsqrt(d, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmul_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfmul(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfmul(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vdiv_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfdiv(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfdiv(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmax_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfmax(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfmax(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmin_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfmin(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfmin(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmla_vd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y, vdouble_rvvm1_sleef z) {
return __riscv_vfmadd(x, y, z, __riscv_vsetvlmax_e64m1());
return __riscv_vfmadd(x, y, z, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmlapn_vd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y, vdouble_rvvm1_sleef z) {
return __riscv_vfmsub(x, y, z, __riscv_vsetvlmax_e64m1());
return __riscv_vfmsub(x, y, z, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmlanp_vd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y, vdouble_rvvm1_sleef z) {
return __riscv_vfnmsac(z, x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfnmsac(z, x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vfma_vd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y, vdouble_rvvm1_sleef z) {
return __riscv_vfmadd(x, y, z, __riscv_vsetvlmax_e64m1());
return __riscv_vfmadd(x, y, z, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vfmanp_vd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y, vdouble_rvvm1_sleef z) {
return __riscv_vfnmsub(x, y, z, __riscv_vsetvlmax_e64m1());
return __riscv_vfnmsub(x, y, z, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vfmapn_vd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y, vdouble_rvvm1_sleef z) {
return __riscv_vfmsub(x, y, z, __riscv_vsetvlmax_e64m1());
return __riscv_vfmsub(x, y, z, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmulsign_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfsgnjx(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfsgnjx(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vcopysign_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfsgnj(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vfsgnj(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vorsign_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vfsgnj(x, __riscv_vreinterpret_f64m1(__riscv_vreinterpret_i64m1(__riscv_vor(__riscv_vreinterpret_u64m1(x), __riscv_vreinterpret_u64m1(y), __riscv_vsetvlmax_e64m1()))), __riscv_vsetvlmax_e64m1());
return __riscv_vfsgnj(x, __riscv_vreinterpret_f64m1(__riscv_vreinterpret_i64m1(__riscv_vor(__riscv_vreinterpret_u64m1(x), __riscv_vreinterpret_u64m1(y), ((int)__riscv_vsetvlmax_e64m1())))), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vneg_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vfneg(d, __riscv_vsetvlmax_e64m1());
return __riscv_vfneg(d, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vadd_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vadd(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vadd(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vsub_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vsub(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vsub(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vneg_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x) {
return __riscv_vneg(x, __riscv_vsetvlmax_e64m1());
return __riscv_vneg(x, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vand_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vand(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vand(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vandnot_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vand(__riscv_vnot(x, __riscv_vsetvlmax_e64m1()), y, __riscv_vsetvlmax_e64m1());
return __riscv_vand(__riscv_vnot(x, ((int)__riscv_vsetvlmax_e64m1())), y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vor_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vor(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vor(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vxor_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vxor(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vxor(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vsll_vi_vi_i_rvvm1_sleef(vint_rvvm1_sleef x, int c) {
return __riscv_vsll(x, c, __riscv_vsetvlmax_e64m1());
return __riscv_vsll(x, c, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vsra_vi_vi_i_rvvm1_sleef(vint_rvvm1_sleef x, int c) {
return __riscv_vsra(x, c, __riscv_vsetvlmax_e64m1());
return __riscv_vsra(x, c, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vsrl_vi_vi_i_rvvm1_sleef(vint_rvvm1_sleef x, int c) {
return __riscv_vreinterpret_i32mf2(__riscv_vsrl(__riscv_vreinterpret_u32mf2(x), c, __riscv_vsetvlmax_e64m1()));
return __riscv_vreinterpret_i32mf2(__riscv_vsrl(__riscv_vreinterpret_u32mf2(x), c, ((int)__riscv_vsetvlmax_e64m1())));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vcast_vm_i64_rvvm1_sleef(int64_t c) {
return __riscv_vmv_v_x_u64m1(c, __riscv_vsetvlmax_e64m1());
return __riscv_vmv_v_x_u64m1(c, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vcast_vm_u64_rvvm1_sleef(uint64_t c) {
return __riscv_vmv_v_x_u64m1(c, __riscv_vsetvlmax_e64m1());
return __riscv_vmv_v_x_u64m1(c, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vcast_vm_i_i_rvvm1_sleef(int64_t h, int64_t l) {
return __riscv_vmv_v_x_u64m1((((uint64_t)h) << 32) | (uint32_t) l, __riscv_vsetvlmax_e64m1());
return __riscv_vmv_v_x_u64m1((((uint64_t)h) << 32) | (uint32_t) l, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vcast_vm_vi_rvvm1_sleef(vint_rvvm1_sleef vi) {
return __riscv_vreinterpret_u64m1(__riscv_vwcvt_x(vi, __riscv_vsetvlmax_e64m1()));
return __riscv_vreinterpret_u64m1(__riscv_vwcvt_x(vi, ((int)__riscv_vsetvlmax_e64m1())));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vcastu_vm_vi_rvvm1_sleef(vint_rvvm1_sleef vi) {
return __riscv_vsll(__riscv_vreinterpret_u64m1(__riscv_vwcvt_x(vi, __riscv_vsetvlmax_e64m1())), 32, __riscv_vsetvlmax_e64m1());
return __riscv_vsll(__riscv_vreinterpret_u64m1(__riscv_vwcvt_x(vi, ((int)__riscv_vsetvlmax_e64m1()))), 32, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vcastu_vi_vm_rvvm1_sleef(vmask_rvvm1_sleef vm) {
return __riscv_vreinterpret_i32mf2(__riscv_vnsrl(vm, 32, __riscv_vsetvlmax_e64m1()));
return __riscv_vreinterpret_i32mf2(__riscv_vnsrl(vm, 32, ((int)__riscv_vsetvlmax_e64m1())));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vcast_vi_vm_rvvm1_sleef(vmask_rvvm1_sleef vm) {
return __riscv_vreinterpret_i32mf2(__riscv_vncvt_x(vm, __riscv_vsetvlmax_e64m1()));
return __riscv_vreinterpret_i32mf2(__riscv_vncvt_x(vm, ((int)__riscv_vsetvlmax_e64m1())));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vand_vm_vo64_vm_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vmerge(y, 0, __riscv_vmnot(x, __riscv_vsetvlmax_e64m1()), __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, 0, __riscv_vmnot(x, ((int)__riscv_vsetvlmax_e64m1())), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vand_vm_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vand(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vand(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vor_vm_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vor(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vor(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vxor_vm_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vxor(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vxor(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vandnot_vm_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vand(__riscv_vnot(x, __riscv_vsetvlmax_e64m1()), y, __riscv_vsetvlmax_e64m1());
return __riscv_vand(__riscv_vnot(x, ((int)__riscv_vsetvlmax_e64m1())), y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vandnot_vm_vo64_vm_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vmerge(y, 0, x, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, 0, x, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vsll64_vm_vm_i(vmask_rvvm1_sleef mask, int64_t c) {
return __riscv_vsll(mask, c, __riscv_vsetvlmax_e64m1());
return __riscv_vsll(mask, c, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vsub64_vm_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vsub(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vsub(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vsrl64_vm_vm_i(vmask_rvvm1_sleef mask, int64_t c) {
return __riscv_vsrl(mask, c, __riscv_vsetvlmax_e64m1());
return __riscv_vsrl(mask, c, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vadd64_vm_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vadd(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vadd(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vor_vm_vo64_vm_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vmerge(y, -1, x, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, -1, x, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vsel_vm_vo64_vm_vm_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef mask, vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vmerge(y, x, mask, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, x, mask, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vneg64_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef mask) {
return __riscv_vreinterpret_u64m1(__riscv_vneg(__riscv_vreinterpret_i64m1(mask), __riscv_vsetvlmax_e64m1()));
return __riscv_vreinterpret_u64m1(__riscv_vneg(__riscv_vreinterpret_i64m1(mask), ((int)__riscv_vsetvlmax_e64m1())));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vreinterpret_vd_vm_rvvm1_sleef(vmask_rvvm1_sleef vm) {
return __riscv_vreinterpret_f64m1(__riscv_vreinterpret_i64m1(vm));
@ -1757,111 +1754,111 @@ static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vcast_vo32_vo64_rvvm1_slee
return vo;
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef rvv_dp_vand_vo_vo_vo(rvv_dp_vopmask_rvvm1_sleef x, rvv_dp_vopmask_rvvm1_sleef y) {
return __riscv_vmand(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmand(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef rvv_dp_vandnot_vo_vo_vo(rvv_dp_vopmask_rvvm1_sleef x, rvv_dp_vopmask_rvvm1_sleef y) {
return __riscv_vmandn(y, x, __riscv_vsetvlmax_e64m1());
return __riscv_vmandn(y, x, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef rvv_dp_vor_vo_vo_vo(rvv_dp_vopmask_rvvm1_sleef x, rvv_dp_vopmask_rvvm1_sleef y) {
return __riscv_vmor(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmor(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef rvv_dp_vxor_vo_vo_vo(rvv_dp_vopmask_rvvm1_sleef x, rvv_dp_vopmask_rvvm1_sleef y) {
return __riscv_vmxor(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmxor(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef veq64_vo_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vmseq(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmseq(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vgt64_vo_vm_vm_rvvm1_sleef(vmask_rvvm1_sleef x, vmask_rvvm1_sleef y) {
return __riscv_vmsgt(__riscv_vreinterpret_i64m1(x), __riscv_vreinterpret_i64m1(y), __riscv_vsetvlmax_e64m1());
return __riscv_vmsgt(__riscv_vreinterpret_i64m1(x), __riscv_vreinterpret_i64m1(y), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef visinf_vo_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vmfeq(__riscv_vfabs(d, __riscv_vsetvlmax_e64m1()), __builtin_inf(), __riscv_vsetvlmax_e64m1());
return __riscv_vmfeq(__riscv_vfabs(d, ((int)__riscv_vsetvlmax_e64m1())), __builtin_inf(), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vispinf_vo_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vmfeq(d, __builtin_inf(), __riscv_vsetvlmax_e64m1());
return __riscv_vmfeq(d, __builtin_inf(), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef veq_vo_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmfeq(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmfeq(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vneq_vo_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmfne(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmfne(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vlt_vo_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmflt(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmflt(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vle_vo_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmfle(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmfle(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vgt_vo_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmfgt(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmfgt(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vge_vo_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmfge(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmfge(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef visnan_vo_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
return __riscv_vmfne(d, d, __riscv_vsetvlmax_e64m1());
return __riscv_vmfne(d, d, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsel_vd_vo_vd_vd_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef mask, vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
return __riscv_vmerge(y, x, mask, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, x, mask, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsel_vd_vo_d_d_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef mask, double v0, double v1) {
return __riscv_vfmerge(vcast_vd_d_rvvm1_sleef(v1), v0, mask, __riscv_vsetvlmax_e64m1());
return __riscv_vfmerge(vcast_vd_d_rvvm1_sleef(v1), v0, mask, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsel_vd_vo_vo_d_d_d_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef o0, rvv_dp_vopmask_rvvm1_sleef o1, double d0, double d1, double d2) {
return __riscv_vfmerge(__riscv_vfmerge(vcast_vd_d_rvvm1_sleef(d2), d1, o1, __riscv_vsetvlmax_e64m1()), d0, o0, __riscv_vsetvlmax_e64m1());
return __riscv_vfmerge(__riscv_vfmerge(vcast_vd_d_rvvm1_sleef(d2), d1, o1, ((int)__riscv_vsetvlmax_e64m1())), d0, o0, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsel_vd_vo_vo_vo_d_d_d_d_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef o0, rvv_dp_vopmask_rvvm1_sleef o1, rvv_dp_vopmask_rvvm1_sleef o2, double d0, double d1, double d2, double d3) {
return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d_rvvm1_sleef(d3), d2, o2, __riscv_vsetvlmax_e64m1()), d1, o1, __riscv_vsetvlmax_e64m1()), d0, o0, __riscv_vsetvlmax_e64m1());
return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d_rvvm1_sleef(d3), d2, o2, ((int)__riscv_vsetvlmax_e64m1())), d1, o1, ((int)__riscv_vsetvlmax_e64m1())), d0, o0, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE int vtestallones_i_vo64_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef g) {
return __riscv_vcpop(g, __riscv_vsetvlmax_e64m1()) == __riscv_vsetvlmax_e64m1();
return (int)__riscv_vcpop(g, ((int)__riscv_vsetvlmax_e64m1())) == (int)((int)__riscv_vsetvlmax_e64m1());
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef veq_vo_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vmseq(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmseq(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vgt_vo_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vmsgt(x, y, __riscv_vsetvlmax_e64m1());
return __riscv_vmsgt(x, y, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vgt_vi_vi_vi_rvvm1_sleef(vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
vint_rvvm1_sleef zero = vcast_vi_i_rvvm1_sleef(0);
return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, __riscv_vsetvlmax_e64m1()), __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, ((int)__riscv_vsetvlmax_e64m1())), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vsel_vi_vo_vi_vi_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef m, vint_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vmerge(y, x, m, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, x, m, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vandnot_vi_vo_vi_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef mask, vint_rvvm1_sleef vi) {
return __riscv_vmerge(vi, 0, mask, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(vi, 0, mask, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vint_rvvm1_sleef vand_vi_vo_vi_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef x, vint_rvvm1_sleef y) {
return __riscv_vmerge(y, 0, __riscv_vmnot(x, __riscv_vsetvlmax_e64m1()), __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(y, 0, __riscv_vmnot(x, ((int)__riscv_vsetvlmax_e64m1())), ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vposneg_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
rvv_dp_vopmask_rvvm1_sleef mask = __riscv_vreinterpret_b64(__riscv_vmv_v_x_u8m1(0x55, __riscv_vsetvlmax_e8m1()));
vdouble_rvvm1_sleef nd = __riscv_vfneg(d, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(nd, d, mask, __riscv_vsetvlmax_e64m1());
vdouble_rvvm1_sleef nd = __riscv_vfneg(d, ((int)__riscv_vsetvlmax_e64m1()));
return __riscv_vmerge(nd, d, mask, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vnegpos_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef d) {
rvv_dp_vopmask_rvvm1_sleef mask = __riscv_vreinterpret_b64(__riscv_vmv_v_x_u8m1(0xaa, __riscv_vsetvlmax_e8m1()));
vdouble_rvvm1_sleef nd = __riscv_vfneg(d, __riscv_vsetvlmax_e64m1());
return __riscv_vmerge(nd, d, mask, __riscv_vsetvlmax_e64m1());
vdouble_rvvm1_sleef nd = __riscv_vfneg(d, ((int)__riscv_vsetvlmax_e64m1()));
return __riscv_vmerge(nd, d, mask, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vposneg_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
rvv_sp_vopmask_rvvm1_sleef mask = __riscv_vreinterpret_b32(__riscv_vmv_v_x_u8m1(0x55, __riscv_vsetvlmax_e8m1()));
vfloat_rvvm1_sleef nd = __riscv_vfneg(d, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(nd, d, mask, (__riscv_vsetvlmax_e32m1()));
vfloat_rvvm1_sleef nd = __riscv_vfneg(d, ((int)__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(nd, d, mask, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vnegpos_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef d) {
rvv_sp_vopmask_rvvm1_sleef mask = __riscv_vreinterpret_b32(__riscv_vmv_v_x_u8m1(0xaa, __riscv_vsetvlmax_e8m1()));
vfloat_rvvm1_sleef nd = __riscv_vfneg(d, (__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(nd, d, mask, (__riscv_vsetvlmax_e32m1()));
vfloat_rvvm1_sleef nd = __riscv_vfneg(d, ((int)__riscv_vsetvlmax_e32m1()));
return __riscv_vmerge(nd, d, mask, ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vsubadd_vd_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) { return vadd_vd_vd_vd_rvvm1_sleef(x, vnegpos_vd_vd_rvvm1_sleef(y)); }
@ -1870,33 +1867,33 @@ static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vmlsubadd_vd_vd_vd_vd_rvvm1_sleef
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vmlsubadd_vf_vf_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef x, vfloat_rvvm1_sleef y, vfloat_rvvm1_sleef z) { return vfma_vf_vf_vf_vf_rvvm1_sleef(x, y, vnegpos_vf_vf_rvvm1_sleef(z)); }
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vrev21_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef vd_rvvm1_sleef) {
rvv_dp_vuint2 id = __riscv_vid_v_u64m1(__riscv_vsetvlmax_e64m1());
id = __riscv_vxor(id, 1, __riscv_vsetvlmax_e64m1());
return __riscv_vrgather(vd_rvvm1_sleef, id, __riscv_vsetvlmax_e64m1());
rvv_dp_vuint2 id = __riscv_vid_v_u64m1(((int)__riscv_vsetvlmax_e64m1()));
id = __riscv_vxor(id, 1, ((int)__riscv_vsetvlmax_e64m1()));
return __riscv_vrgather(vd_rvvm1_sleef, id, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vrev21_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef vf) {
vint2_rvvm1_sleef id = __riscv_vreinterpret_i32m1(__riscv_vid_v_u32m1((__riscv_vsetvlmax_e32m1())));
id = __riscv_vxor(id, 1, (__riscv_vsetvlmax_e32m1()));
return __riscv_vrgather(vf, __riscv_vreinterpret_u32m1(id), (__riscv_vsetvlmax_e32m1()));
vint2_rvvm1_sleef id = __riscv_vreinterpret_i32m1(__riscv_vid_v_u32m1(((int)__riscv_vsetvlmax_e32m1())));
id = __riscv_vxor(id, 1, ((int)__riscv_vsetvlmax_e32m1()));
return __riscv_vrgather(vf, __riscv_vreinterpret_u32m1(id), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE vdouble_rvvm1_sleef vreva2_vd_vd_rvvm1_sleef(vdouble_rvvm1_sleef vd_rvvm1_sleef) {
rvv_dp_vuint2 id = __riscv_vid_v_u64m1(__riscv_vsetvlmax_e64m1());
id = __riscv_vxor(id, __riscv_vsetvlmax_e64m1() - 2, __riscv_vsetvlmax_e64m1());
return __riscv_vrgather(vd_rvvm1_sleef, id, __riscv_vsetvlmax_e64m1());
rvv_dp_vuint2 id = __riscv_vid_v_u64m1(((int)__riscv_vsetvlmax_e64m1()));
id = __riscv_vxor(id, ((int)__riscv_vsetvlmax_e64m1()) - 2, ((int)__riscv_vsetvlmax_e64m1()));
return __riscv_vrgather(vd_rvvm1_sleef, id, ((int)__riscv_vsetvlmax_e64m1()));
}
static SLEEF_ALWAYS_INLINE vfloat_rvvm1_sleef vreva2_vf_vf_rvvm1_sleef(vfloat_rvvm1_sleef vf) {
vint2_rvvm1_sleef id = __riscv_vreinterpret_i32m1(__riscv_vid_v_u32m1((__riscv_vsetvlmax_e32m1())));
id = __riscv_vxor(id, (__riscv_vsetvlmax_e32m1()) - 2, (__riscv_vsetvlmax_e32m1()));
return __riscv_vrgather(vf, __riscv_vreinterpret_u32m1(id), (__riscv_vsetvlmax_e32m1()));
vint2_rvvm1_sleef id = __riscv_vreinterpret_i32m1(__riscv_vid_v_u32m1(((int)__riscv_vsetvlmax_e32m1())));
id = __riscv_vxor(id, ((int)__riscv_vsetvlmax_e32m1()) - 2, ((int)__riscv_vsetvlmax_e32m1()));
return __riscv_vrgather(vf, __riscv_vreinterpret_u32m1(id), ((int)__riscv_vsetvlmax_e32m1()));
}
static SLEEF_ALWAYS_INLINE void vscatter2_v_p_i_i_vd_rvvm1_sleef(double *ptr, int offset, int step, vdouble_rvvm1_sleef v) {
ptr += offset * 2;
for (int i = 0; i < __riscv_vsetvlmax_e64m1(); i += 2) {
for (int i = 0; i < (int)((int)__riscv_vsetvlmax_e64m1()); i += 2) {
vdouble_rvvm1_sleef vv = __riscv_vslidedown(v, i, 2);
__riscv_vse64(ptr, vv, 2);
@ -1907,7 +1904,7 @@ static SLEEF_ALWAYS_INLINE void vscatter2_v_p_i_i_vd_rvvm1_sleef(double *ptr, in
static SLEEF_ALWAYS_INLINE void vscatter2_v_p_i_i_vf_rvvm1_sleef(float *ptr, int offset, int step, vfloat_rvvm1_sleef v) {
ptr += offset * 2;
for (int i = 0; i < (__riscv_vsetvlmax_e32m1()); i += 2) {
for (int i = 0; i < (int)((int)__riscv_vsetvlmax_e32m1()); i += 2) {
vfloat_rvvm1_sleef vv = __riscv_vslidedown(v, i, 2);
__riscv_vse32(ptr, vv, 2);
ptr += step * 2;
@ -2007,7 +2004,7 @@ static SLEEF_ALWAYS_INLINE tdi_t_rvvm1_sleef tdisettdi_tdi_vd3_vi_rvvm1_sleef(vd
}
static SLEEF_ALWAYS_INLINE rvv_dp_vopmask_rvvm1_sleef vcast_vo_i_rvvm1_sleef(int i) {
return __riscv_vreinterpret_b64(__riscv_vmv_v_x_u32m1(i, (__riscv_vsetvlmax_e32m1())));
return __riscv_vreinterpret_b64(__riscv_vmv_v_x_u32m1(i, ((int)__riscv_vsetvlmax_e32m1())));
}
static SLEEF_ALWAYS_INLINE vmask_rvvm1_sleef vreinterpret_vm_vi64_rvvm1_sleef(vint64_rvvm1_sleef v) {
return __riscv_vreinterpret_u64m1(v);
@ -2022,7 +2019,7 @@ static SLEEF_ALWAYS_INLINE vuint64_rvvm1_sleef vreinterpret_vu64_vm_rvvm1_sleef(
return m;
}
static SLEEF_ALWAYS_INLINE int vtestallzeros_i_vo64_rvvm1_sleef(rvv_dp_vopmask_rvvm1_sleef g) {
return __riscv_vcpop(g, __riscv_vsetvlmax_e64m1()) == 0;
return __riscv_vcpop(g, ((int)__riscv_vsetvlmax_e64m1())) == 0;
}
static SLEEF_ALWAYS_INLINE void vstream_v_p_vd_rvvm1_sleef(double *ptr, vdouble_rvvm1_sleef v) { vstore_v_p_vd_rvvm1_sleef(ptr, v); }
@ -2048,7 +2045,7 @@ static int vcast_i_vi2(vint2_rvvm1_sleef v) {
static vquad_rvvm1_sleef loadu_vq_p_rvvm1_sleef(const int32_t *ptr) {
return __riscv_vreinterpret_u64m2(__riscv_vreinterpret_u32m2(__riscv_vle32_v_i32m2(ptr, (__riscv_vsetvlmax_e32m1()) * 2)));
return __riscv_vreinterpret_u64m2(__riscv_vreinterpret_u32m2(__riscv_vle32_v_i32m2(ptr, ((int)__riscv_vsetvlmax_e32m1()) * 2)));
}
static SLEEF_ALWAYS_INLINE vquad_rvvm1_sleef cast_vq_aq_rvvm1_sleef(vargquad_rvvm1_sleef aq) { return aq; }
@ -3511,7 +3508,8 @@ SLEEF_INLINE SLEEF_CONST vdouble_rvvm1_sleef Sleef_expdx_u10rvvm1(vdouble_rvvm1_
u = vldexp2_vd_vd_vi_rvvm1_sleef(u, q);
u = vsel_vd_vo_vd_vd_rvvm1_sleef(vgt_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(709.78271114955742909217217426)), vcast_vd_d_rvvm1_sleef(__builtin_inf()), u);
rvv_dp_vopmask_rvvm1_sleef o = vgt_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(0x1.62e42fefa39efp+9));
u = vsel_vd_vo_vd_vd_rvvm1_sleef(o, vcast_vd_d_rvvm1_sleef(__builtin_inf()), u);
u = vreinterpret_vd_vm_rvvm1_sleef(vandnot_vm_vo64_vm_rvvm1_sleef(vlt_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(-1000)), vreinterpret_vm_vd_rvvm1_sleef(u)));
return u;
@ -3628,13 +3626,13 @@ static SLEEF_ALWAYS_INLINE SLEEF_CONST vdouble_rvvm1_sleef expk_rvvm1_sleef(vdou
}
SLEEF_INLINE SLEEF_CONST vdouble_rvvm1_sleef Sleef_powdx_u10rvvm1(vdouble_rvvm1_sleef x, vdouble_rvvm1_sleef y) {
rvv_dp_vopmask_rvvm1_sleef yisint = visint_vo_vd_rvvm1_sleef(y);
rvv_dp_vopmask_rvvm1_sleef yisodd = rvv_dp_vand_vo_vo_vo(visodd_vo_vd_rvvm1_sleef(y), yisint);
vdouble2_rvvm1_sleef d = ddmul_vd2_vd2_vd_rvvm1_sleef(logk_rvvm1_sleef(vabs_vd_vd_rvvm1_sleef(x)), y);
vdouble_rvvm1_sleef result = expk_rvvm1_sleef(d);
result = vsel_vd_vo_vd_vd_rvvm1_sleef(vgt_vo_vd_vd_rvvm1_sleef(vd2getx_vd_vd2_rvvm1_sleef(d), vcast_vd_d_rvvm1_sleef(709.78271114955742909217217426)), vcast_vd_d_rvvm1_sleef(__builtin_inf()), result);
rvv_dp_vopmask_rvvm1_sleef o = vgt_vo_vd_vd_rvvm1_sleef(vd2getx_vd_vd2_rvvm1_sleef(d), vcast_vd_d_rvvm1_sleef(0x1.62e42fefa39efp+9));
result = vsel_vd_vo_vd_vd_rvvm1_sleef(o, vcast_vd_d_rvvm1_sleef(__builtin_inf()), result);
result = vmul_vd_vd_vd_rvvm1_sleef(result,
vsel_vd_vo_vd_vd_rvvm1_sleef(vgt_vo_vd_vd_rvvm1_sleef(x, vcast_vd_d_rvvm1_sleef(0)),
@ -3660,7 +3658,6 @@ SLEEF_INLINE SLEEF_CONST vdouble_rvvm1_sleef Sleef_powdx_u10rvvm1(vdouble_rvvm1_
result = vsel_vd_vo_vd_vd_rvvm1_sleef(rvv_dp_vor_vo_vo_vo(veq_vo_vd_vd_rvvm1_sleef(y, vcast_vd_d_rvvm1_sleef(0)), veq_vo_vd_vd_rvvm1_sleef(x, vcast_vd_d_rvvm1_sleef(1))), vcast_vd_d_rvvm1_sleef(1), result);
return result;
}
static SLEEF_ALWAYS_INLINE SLEEF_CONST vdouble2_rvvm1_sleef expk2_rvvm1_sleef(vdouble2_rvvm1_sleef d) {
@ -4148,7 +4145,8 @@ SLEEF_INLINE SLEEF_CONST vdouble_rvvm1_sleef Sleef_log1pdx_u10rvvm1(vdouble_rvvm
vdouble_rvvm1_sleef r = vadd_vd_vd_vd_rvvm1_sleef(vd2getx_vd_vd2_rvvm1_sleef(s), vd2gety_vd_vd2_rvvm1_sleef(s));
r = vsel_vd_vo_vd_vd_rvvm1_sleef(vgt_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(1e+307)), vcast_vd_d_rvvm1_sleef(__builtin_inf()), r);
rvv_dp_vopmask_rvvm1_sleef ocore = vle_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(0x1.c7b1f3cac7433p+1019));
if(!__builtin_expect(!!(vtestallones_i_vo64_rvvm1_sleef (ocore)), 1)) r = vsel_vd_vo_vd_vd_rvvm1_sleef(ocore, r, Sleef_logdx_u10rvvm1(d));
r = vsel_vd_vo_vd_vd_rvvm1_sleef(rvv_dp_vor_vo_vo_vo(vlt_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(-1)), visnan_vo_vd_rvvm1_sleef(d)), vcast_vd_d_rvvm1_sleef(__builtin_nan("")), r);
r = vsel_vd_vo_vd_vd_rvvm1_sleef(veq_vo_vd_vd_rvvm1_sleef(d, vcast_vd_d_rvvm1_sleef(-1)), vcast_vd_d_rvvm1_sleef(-__builtin_inf()), r);
r = vsel_vd_vo_vd_vd_rvvm1_sleef(visnegzero_vo_vd_rvvm1_sleef(d), vcast_vd_d_rvvm1_sleef(-0.0), r);
@ -4228,7 +4226,7 @@ SLEEF_INLINE SLEEF_CONST vint_rvvm1_sleef Sleef_expfrexpdx_rvvm1(vdouble_rvvm1_s
vint_rvvm1_sleef ret = vcastu_vi_vm_rvvm1_sleef(vreinterpret_vm_vd_rvvm1_sleef(x));
ret = vsub_vi_vi_vi_rvvm1_sleef(vand_vi_vi_vi_rvvm1_sleef(vsrl_vi_vi_i_rvvm1_sleef(ret, 20), vcast_vi_i_rvvm1_sleef(0x7ff)), vcast_vi_i_rvvm1_sleef(0x3fe));
ret = vsel_vi_vo_vi_vi_rvvm1_sleef(rvv_dp_vor_vo_vo_vo(rvv_dp_vor_vo_vo_vo(veq_vo_vd_vd_rvvm1_sleef(x, vcast_vd_d_rvvm1_sleef(0)), visnan_vo_vd_rvvm1_sleef(x)), visinf_vo_vd_rvvm1_sleef(x)), vcast_vi_i_rvvm1_sleef(0), ret);
ret = vsel_vi_vo_vi_vi_rvvm1_sleef(vcast_vo32_vo64_rvvm1_sleef(rvv_dp_vor_vo_vo_vo(rvv_dp_vor_vo_vo_vo(veq_vo_vd_vd_rvvm1_sleef(x, vcast_vd_d_rvvm1_sleef(0)), visnan_vo_vd_rvvm1_sleef(x)), visinf_vo_vd_rvvm1_sleef(x))), vcast_vi_i_rvvm1_sleef(0), ret);
return ret;
}
@ -4631,14 +4629,6 @@ SLEEF_INLINE SLEEF_CONST vdouble_rvvm1_sleef Sleef_erfcdx_u15rvvm1(vdouble_rvvm1
return r;
}
#if !defined(__NVCC__) && ((defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8))
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && !defined(__NVCC__) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;
@ -5105,6 +5095,7 @@ SLEEF_INLINE SLEEF_CONST vfloat_rvvm1_sleef Sleef_tanfx_u35rvvm1(vfloat_rvvm1_sl
if (__builtin_expect(!!(vtestallones_i_vo32_rvvm1_sleef(vlt_vo_vf_vf_rvvm1_sleef(vabs_vf_vf_rvvm1_sleef(d), vcast_vf_f_rvvm1_sleef(125.0f*0.5f)))), 1)) {
q = vrint_vi2_vf_rvvm1_sleef(vmul_vf_vf_vf_rvvm1_sleef(d, vcast_vf_f_rvvm1_sleef((float)(2 * 0.318309886183790671537767526745028724))));
u = vcast_vf_vi2_rvvm1_sleef(q);
x = vmla_vf_vf_vf_vf_rvvm1_sleef(u, vcast_vf_f_rvvm1_sleef(-3.1414794921875f*0.5f), x);
x = vmla_vf_vf_vf_vf_rvvm1_sleef(u, vcast_vf_f_rvvm1_sleef(-0.00011315941810607910156f*0.5f), x);
x = vmla_vf_vf_vf_vf_rvvm1_sleef(u, vcast_vf_f_rvvm1_sleef(-1.9841872589410058936e-09f*0.5f), x);
@ -6506,7 +6497,8 @@ SLEEF_INLINE SLEEF_CONST vfloat_rvvm1_sleef Sleef_log1pfx_u10rvvm1(vfloat_rvvm1_
vfloat_rvvm1_sleef r = vadd_vf_vf_vf_rvvm1_sleef(vf2getx_vf_vf2_rvvm1_sleef(s), vf2gety_vf_vf2_rvvm1_sleef(s));
r = vsel_vf_vo_vf_vf_rvvm1_sleef(vgt_vo_vf_vf_rvvm1_sleef(d, vcast_vf_f_rvvm1_sleef(1e+38)), vcast_vf_f_rvvm1_sleef(__builtin_inff()), r);
rvv_sp_vopmask_rvvm1_sleef ocore = vle_vo_vf_vf_rvvm1_sleef(d, vcast_vf_f_rvvm1_sleef(0x1.2ced32p+126));
if(!__builtin_expect(!!(vtestallones_i_vo32_rvvm1_sleef (ocore)), 1)) r = vsel_vf_vo_vf_vf_rvvm1_sleef(ocore, r, Sleef_logfx_u10rvvm1(d));
r = vreinterpret_vf_vm_rvvm1_sleef(vor_vm_vo32_vm_rvvm1_sleef(vgt_vo_vf_vf_rvvm1_sleef(vcast_vf_f_rvvm1_sleef(-1), d), vreinterpret_vm_vf_rvvm1_sleef(r)));
r = vsel_vf_vo_vf_vf_rvvm1_sleef(veq_vo_vf_vf_rvvm1_sleef(d, vcast_vf_f_rvvm1_sleef(-1)), vcast_vf_f_rvvm1_sleef(-__builtin_inff()), r);
r = vsel_vf_vo_vf_vf_rvvm1_sleef(visnegzero_vo_vf_rvvm1_sleef(d), vcast_vf_f_rvvm1_sleef(-0.0f), r);

View File

@ -1,8 +1,11 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See http://www.boost.org/LICENSE_1_0.txt)
// This file is generated by SLEEF 3.6.1
// This file is generated by SLEEF 3.9.0
/* #undef SLEEF_FLOAT128_IS_IEEEQP */
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#ifndef SLEEF_ALWAYS_INLINE
#if defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)
@ -1010,6 +1013,7 @@ static const double Sleef_rempitabdp[] = {
2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322,
2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322,
2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323,
0, 0, 0, 0,
};
static const float Sleef_rempitabsp[] = {
@ -1116,17 +1120,10 @@ static const float Sleef_rempitabsp[] = {
1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
2.743283031e-13, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34,
0, 0, 0, 0,
};
#endif // #ifndef __SLEEF_REMPITAB__
#if !defined(__NVCC__) && ((defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8))
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && !defined(__NVCC__) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;
@ -1833,13 +1830,13 @@ static SLEEF_ALWAYS_INLINE vfloat_sve_sleef vmlsubadd_vf_vf_vf_vf_sve_sleef(vflo
static SLEEF_ALWAYS_INLINE vdouble_sve_sleef vrev21_vd_vd_sve_sleef(vdouble_sve_sleef x) { return svzip1_f64(svuzp2_f64(x, x), svuzp1_f64(x, x)); }
static SLEEF_ALWAYS_INLINE vdouble_sve_sleef vreva2_vd_vd_sve_sleef(vdouble_sve_sleef vd_sve_sleef) {
svint64_t x = svindex_s64(((svcntd())-1), -1);
svint64_t x = svindex_s64((((int)svcntd())-1), -1);
x = svzip1_s64(svuzp2_s64(x, x), svuzp1_s64(x, x));
return svtbl_f64(vd_sve_sleef, svreinterpret_u64_s64(x));
}
static SLEEF_ALWAYS_INLINE vfloat_sve_sleef vreva2_vf_vf_sve_sleef(vfloat_sve_sleef vf) {
svint32_t x = svindex_s32(((svcntw())-1), -1);
svint32_t x = svindex_s32((((int)svcntw())-1), -1);
x = svzip1_s32(svuzp2_s32(x, x), svuzp1_s32(x, x));
return svtbl_f32(vf, svreinterpret_u32_s32(x));
}
@ -3381,7 +3378,8 @@ SLEEF_INLINE SLEEF_CONST vdouble_sve_sleef Sleef_expdx_u10sve(vdouble_sve_sleef
u = vldexp2_vd_vd_vi_sve_sleef(u, q);
u = vsel_vd_vo_vd_vd_sve_sleef(vgt_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(709.78271114955742909217217426)), vcast_vd_d_sve_sleef(__builtin_inf()), u);
vopmask_sve_sleef o = vgt_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(0x1.62e42fefa39efp+9));
u = vsel_vd_vo_vd_vd_sve_sleef(o, vcast_vd_d_sve_sleef(__builtin_inf()), u);
u = vreinterpret_vd_vm_sve_sleef(vandnot_vm_vo64_vm_sve_sleef(vlt_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(-1000)), vreinterpret_vm_vd_sve_sleef(u)));
return u;
@ -3498,13 +3496,13 @@ static SLEEF_ALWAYS_INLINE SLEEF_CONST vdouble_sve_sleef expk_sve_sleef(vdouble2
}
SLEEF_INLINE SLEEF_CONST vdouble_sve_sleef Sleef_powdx_u10sve(vdouble_sve_sleef x, vdouble_sve_sleef y) {
vopmask_sve_sleef yisint = visint_vo_vd_sve_sleef(y);
vopmask_sve_sleef yisodd = vand_vo_vo_vo_sve_sleef(visodd_vo_vd_sve_sleef(y), yisint);
vdouble2_sve_sleef d = ddmul_vd2_vd2_vd_sve_sleef(logk_sve_sleef(vabs_vd_vd_sve_sleef(x)), y);
vdouble_sve_sleef result = expk_sve_sleef(d);
result = vsel_vd_vo_vd_vd_sve_sleef(vgt_vo_vd_vd_sve_sleef(vd2getx_vd_vd2_sve_sleef(d), vcast_vd_d_sve_sleef(709.78271114955742909217217426)), vcast_vd_d_sve_sleef(__builtin_inf()), result);
vopmask_sve_sleef o = vgt_vo_vd_vd_sve_sleef(vd2getx_vd_vd2_sve_sleef(d), vcast_vd_d_sve_sleef(0x1.62e42fefa39efp+9));
result = vsel_vd_vo_vd_vd_sve_sleef(o, vcast_vd_d_sve_sleef(__builtin_inf()), result);
result = vmul_vd_vd_vd_sve_sleef(result,
vsel_vd_vo_vd_vd_sve_sleef(vgt_vo_vd_vd_sve_sleef(x, vcast_vd_d_sve_sleef(0)),
@ -3530,7 +3528,6 @@ SLEEF_INLINE SLEEF_CONST vdouble_sve_sleef Sleef_powdx_u10sve(vdouble_sve_sleef
result = vsel_vd_vo_vd_vd_sve_sleef(vor_vo_vo_vo_sve_sleef(veq_vo_vd_vd_sve_sleef(y, vcast_vd_d_sve_sleef(0)), veq_vo_vd_vd_sve_sleef(x, vcast_vd_d_sve_sleef(1))), vcast_vd_d_sve_sleef(1), result);
return result;
}
static SLEEF_ALWAYS_INLINE SLEEF_CONST vdouble2_sve_sleef expk2_sve_sleef(vdouble2_sve_sleef d) {
@ -4018,7 +4015,8 @@ SLEEF_INLINE SLEEF_CONST vdouble_sve_sleef Sleef_log1pdx_u10sve(vdouble_sve_slee
vdouble_sve_sleef r = vadd_vd_vd_vd_sve_sleef(vd2getx_vd_vd2_sve_sleef(s), vd2gety_vd_vd2_sve_sleef(s));
r = vsel_vd_vo_vd_vd_sve_sleef(vgt_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(1e+307)), vcast_vd_d_sve_sleef(__builtin_inf()), r);
vopmask_sve_sleef ocore = vle_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(0x1.c7b1f3cac7433p+1019));
if(!__builtin_expect(!!(vtestallones_i_vo64_sve_sleef (ocore)), 1)) r = vsel_vd_vo_vd_vd_sve_sleef(ocore, r, Sleef_logdx_u10sve(d));
r = vsel_vd_vo_vd_vd_sve_sleef(vor_vo_vo_vo_sve_sleef(vlt_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(-1)), visnan_vo_vd_sve_sleef(d)), vcast_vd_d_sve_sleef(__builtin_nan("")), r);
r = vsel_vd_vo_vd_vd_sve_sleef(veq_vo_vd_vd_sve_sleef(d, vcast_vd_d_sve_sleef(-1)), vcast_vd_d_sve_sleef(-__builtin_inf()), r);
r = vsel_vd_vo_vd_vd_sve_sleef(visnegzero_vo_vd_sve_sleef(d), vcast_vd_d_sve_sleef(-0.0), r);
@ -4098,7 +4096,7 @@ SLEEF_INLINE SLEEF_CONST vint_sve_sleef Sleef_expfrexpdx_sve(vdouble_sve_sleef x
vint_sve_sleef ret = vcastu_vi_vm_sve_sleef(vreinterpret_vm_vd_sve_sleef(x));
ret = vsub_vi_vi_vi_sve_sleef(vand_vi_vi_vi_sve_sleef(vsrl_vi_vi_i_sve_sleef(ret, 20), vcast_vi_i_sve_sleef(0x7ff)), vcast_vi_i_sve_sleef(0x3fe));
ret = vsel_vi_vo_vi_vi_sve_sleef(vor_vo_vo_vo_sve_sleef(vor_vo_vo_vo_sve_sleef(veq_vo_vd_vd_sve_sleef(x, vcast_vd_d_sve_sleef(0)), visnan_vo_vd_sve_sleef(x)), visinf_vo_vd_sve_sleef(x)), vcast_vi_i_sve_sleef(0), ret);
ret = vsel_vi_vo_vi_vi_sve_sleef(vcast_vo32_vo64_sve_sleef(vor_vo_vo_vo_sve_sleef(vor_vo_vo_vo_sve_sleef(veq_vo_vd_vd_sve_sleef(x, vcast_vd_d_sve_sleef(0)), visnan_vo_vd_sve_sleef(x)), visinf_vo_vd_sve_sleef(x))), vcast_vi_i_sve_sleef(0), ret);
return ret;
}
@ -4497,14 +4495,6 @@ SLEEF_INLINE SLEEF_CONST vdouble_sve_sleef Sleef_erfcdx_u15sve(vdouble_sve_sleef
return r;
}
#if !defined(__NVCC__) && ((defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8))
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && !defined(__NVCC__) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;
@ -4983,6 +4973,7 @@ SLEEF_INLINE SLEEF_CONST vfloat_sve_sleef Sleef_tanfx_u35sve(vfloat_sve_sleef d)
if (__builtin_expect(!!(vtestallones_i_vo32_sve_sleef(vlt_vo_vf_vf_sve_sleef(vabs_vf_vf_sve_sleef(d), vcast_vf_f_sve_sleef(125.0f*0.5f)))), 1)) {
q = vrint_vi2_vf_sve_sleef(vmul_vf_vf_vf_sve_sleef(d, vcast_vf_f_sve_sleef((float)(2 * 0.318309886183790671537767526745028724))));
u = vcast_vf_vi2_sve_sleef(q);
x = vmla_vf_vf_vf_vf_sve_sleef(u, vcast_vf_f_sve_sleef(-3.1414794921875f*0.5f), x);
x = vmla_vf_vf_vf_vf_sve_sleef(u, vcast_vf_f_sve_sleef(-0.00011315941810607910156f*0.5f), x);
x = vmla_vf_vf_vf_vf_sve_sleef(u, vcast_vf_f_sve_sleef(-1.9841872589410058936e-09f*0.5f), x);
@ -6384,7 +6375,8 @@ SLEEF_INLINE SLEEF_CONST vfloat_sve_sleef Sleef_log1pfx_u10sve(vfloat_sve_sleef
vfloat_sve_sleef r = vadd_vf_vf_vf_sve_sleef(vf2getx_vf_vf2_sve_sleef(s), vf2gety_vf_vf2_sve_sleef(s));
r = vsel_vf_vo_vf_vf_sve_sleef(vgt_vo_vf_vf_sve_sleef(d, vcast_vf_f_sve_sleef(1e+38)), vcast_vf_f_sve_sleef(__builtin_inff()), r);
vopmask_sve_sleef ocore = vle_vo_vf_vf_sve_sleef(d, vcast_vf_f_sve_sleef(0x1.2ced32p+126));
if(!__builtin_expect(!!(vtestallones_i_vo32_sve_sleef (ocore)), 1)) r = vsel_vf_vo_vf_vf_sve_sleef(ocore, r, Sleef_logfx_u10sve(d));
r = vreinterpret_vf_vm_sve_sleef(vor_vm_vo32_vm_sve_sleef(vgt_vo_vf_vf_sve_sleef(vcast_vf_f_sve_sleef(-1), d), vreinterpret_vm_vf_sve_sleef(r)));
r = vsel_vf_vo_vf_vf_sve_sleef(veq_vo_vf_vf_sve_sleef(d, vcast_vf_f_sve_sleef(-1)), vcast_vf_f_sve_sleef(-__builtin_inff()), r);
r = vsel_vf_vo_vf_vf_sve_sleef(visnegzero_vo_vf_sve_sleef(d), vcast_vf_f_sve_sleef(-0.0f), r);

View File

@ -1,3 +1,52 @@
## 3.8 - 2025-01-27
The focus of this release has been to facilitate benchmarking in SLEEF.
It does so by providing a benchmarking tool and a plotting tool to postprocess
the results.
AArch64 self-hosted runners have been added to CI. Following this, the Linux and
compiler version have been updated.
Fix inaccuracy issues in a few functions, failures with cpp checks and a few
bugs.
Finally, the project has been extended with a blog section and its first blog
[post](https://sleef.org/2024/10/02/new-pulse.html).
### Added
- Add benchmark and plotting tool by @joanaxcruz in #589, #597, #608 and #609
- Use Arm-hosted runners by @blapie in #581
- Add blog section and first post. by @blapie in #582
### Changed
- Update GH runners to Ubuntu 24.04 and GCC14 by @blapie in #598, #599 and #601
### Fixed
- Fix cbrt on AArch32, and atanf(+-0) with gcc-13 by @shibatch in #592
- Fix oflow bound in log1p(f), exp and pow by @blapie in #604 and #606
- Work around removal of some PowerPC intrinsics in GCC 15 by @musicinmybrain in #612
- Fix errors reported by cppcheck by @blapie in #595
## 3.7 - 2024-09-17
The focus of this release has been to meet open-source community standards. It
does so by providing Contributing Guidelines, Issues and Pull-Requests
templates. Additionally, the documentation has been reworked to improve
navigation (via search bar, side menu/panel, eased navigation on GitHub, ...)
and maintainability (reduced line count, mostly markdown sources, ...). The
website rendering is now delegated to a template customisable theme. See the
new website at [sleef.org](https://sleef.org/), and [docs/](./docs) for the
GitHub-rendered documentation. The release also provides various bug fixes on
several targets, for CPU detection and in the benchmark infrastructure.
### Added
- Add issue and PR templates. by @blapie in https://github.com/shibatch/sleef/pull/565
### Changed
- Adjust scheduling of GHA workflows by @blapie in https://github.com/shibatch/sleef/pull/553
- Port documentation from html to markdown by @blapie in https://github.com/shibatch/sleef/pull/564
- Update acosh documentation by @joanaxcruz in https://github.com/shibatch/sleef/pull/572
### Fixed
- S/390: Use getauxval for detecting VXE2 to fix #560 by @Andreas-Krebbel in https://github.com/shibatch/sleef/pull/561
- Revive micro-benchmarks for vector functions by @joanaxcruz in https://github.com/shibatch/sleef/pull/571
## 3.6.1 - 2024-06-10
This patch release provides important bug fixes, including a fix

View File

@ -1,8 +1,14 @@
cmake_minimum_required(VERSION 3.18)
project(SLEEF VERSION 3.6.1 LANGUAGES C)
set(SLEEF_VERSION 3.9.0)
message(STATUS "Configuring SLEEF ${SLEEF_VERSION}")
project(SLEEF VERSION ${SLEEF_VERSION} LANGUAGES C CXX)
set(SLEEF_SOVERSION ${SLEEF_VERSION_MAJOR})
set(CMAKE_CXX_STANDARD 20)
# Options
option(SLEEF_BUILD_STATIC_TEST_BINS "Build statically linked test executables" OFF)
@ -13,28 +19,96 @@ option(SLEEF_BUILD_QUAD "libsleefquad will be built." OFF)
option(SLEEF_BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON)
option(SLEEF_BUILD_SCALAR_LIB "libsleefscalar will be built." OFF)
option(SLEEF_BUILD_TESTS "Tests will be built." ON)
option(SLEEF_BUILD_BENCH "Bench will be built." OFF)
option(SLEEF_BUILD_BENCH_REF "Benchmark script for reference (e.g. system libm) will be built." OFF)
option(SLEEF_BUILD_INLINE_HEADERS "Build header for inlining whole SLEEF functions" OFF)
option(SLEEF_ENFORCE_DFT "Build fails if DFT is not built" OFF)
option(SLEEFDFT_ENABLE_STREAM "Streaming instructions are utilized in DFT." OFF)
option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF)
option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON)
option(SLEEF_SHOW_ERROR_LOG "Show cmake error log." OFF)
option(SLEEF_ASAN "Enable address sanitizing on all targets." OFF)
option(SLEEF_ENABLE_TESTER "Enable testing libm with tester" OFF)
option(SLEEF_ENFORCE_TESTER "Build fails if tester is not available" OFF)
option(SLEEF_ENFORCE_TESTER3 "Build fails if tester3 is not built" OFF)
option(SLEEF_ENABLE_TESTER4 "Enable testing with tester4" ON)
option(SLEEF_ENFORCE_TESTER4 "Build fails if tester4 is not available" OFF)
option(SLEEF_ENABLE_ALTDIV "Enable alternative division method (aarch64 only)" OFF)
option(SLEEF_ENABLE_ALTSQRT "Enable alternative sqrt method (aarch64 only)" OFF)
option(SLEEF_DISABLE_FFTW "Disable testing the DFT library with FFTW" OFF)
option(SLEEF_DISABLE_MPFR "Disable testing with the MPFR library" OFF)
option(SLEEF_ENABLE_TLFLOAT "Enable use of TLFloat library" ON)
option(SLEEF_DISABLE_SSL "Disable testing with the SSL library" OFF)
set(OPENSSL_EXTRA_LIBRARIES "" CACHE STRING "Extra libraries for openssl")
option(SLEEF_ENABLE_CUDA "Enable CUDA" OFF)
option(SLEEF_ENABLE_CXX "Enable C++" OFF)
option(SLEEF_BUILD_WITH_LIBM "build libsleef with libm, can turn off on Windows to solve mutiple math functions issue." ON)
option(SLEEF_DISABLE_LONG_DOUBLE "Disable long double" OFF)
option(SLEEF_ENFORCE_LONG_DOUBLE "Build fails if long double is not supported by the compiler" OFF)
option(SLEEF_DISABLE_FLOAT128 "Disable float128" OFF)
option(SLEEF_ENFORCE_FLOAT128 "Build fails if float128 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_SSE2 "Disable SSE2" OFF)
option(SLEEF_ENFORCE_SSE2 "Build fails if SSE2 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_SSE4 "Disable SSE4" OFF)
option(SLEEF_ENFORCE_SSE4 "Build fails if SSE4 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_AVX "Disable AVX" OFF)
option(SLEEF_ENFORCE_AVX "Build fails if AVX is not supported by the compiler" OFF)
option(SLEEF_DISABLE_FMA4 "Disable FMA4" OFF)
option(SLEEF_ENFORCE_FMA4 "Build fails if FMA4 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_AVX2 "Disable AVX2" OFF)
option(SLEEF_ENFORCE_AVX2 "Build fails if AVX2 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_AVX512F "Disable AVX512F" OFF)
option(SLEEF_ENFORCE_AVX512F "Build fails if AVX512F is not supported by the compiler" OFF)
option(SLEEF_DISABLE_SVE "Disable SVE" OFF)
option(SLEEF_ENFORCE_SVE "Build fails if SVE is not supported by the compiler" OFF)
option(SLEEF_DISABLE_VSX "Disable VSX" OFF)
option(SLEEF_ENFORCE_VSX "Build fails if VSX is not supported by the compiler" OFF)
option(SLEEF_DISABLE_VSX3 "Disable VSX3" OFF)
option(SLEEF_ENFORCE_VSX3 "Build fails if VSX3 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_VXE "Disable VXE" OFF)
option(SLEEF_ENFORCE_VXE "Build fails if VXE is not supported by the compiler" OFF)
option(SLEEF_DISABLE_VXE2 "Disable VXE2" OFF)
option(SLEEF_ENFORCE_VXE2 "Build fails if VXE2 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_RVVM1 "Disable RVVM1" OFF)
option(SLEEF_ENFORCE_RVVM1 "Build fails if RVVM1 is not supported by the compiler" OFF)
option(SLEEF_DISABLE_RVVM2 "Disable RVVM2" OFF)
option(SLEEF_ENFORCE_RVVM2 "Build fails if RVVM2 is not supported by the compiler" OFF)
option(SLEEF_ENFORCE_CUDA "Build fails if CUDA is not supported" OFF)
option(SLEEF_DISABLE_OPENMP "Disable OPENMP" OFF)
option(SLEEF_ENFORCE_OPENMP "Build fails if OPENMP is not supported by the compiler" OFF)
#
if ((NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "${CMAKE_CXX_COMPILER_ID}") OR
(NOT CMAKE_C_COMPILER_VERSION VERSION_EQUAL CMAKE_CXX_COMPILER_VERSION))
message(FATAL_ERROR "Different versions of C compiler and C++ compiler")
endif()
#
if (SLEEF_BUILD_BENCH_REF)
if (NOT SLEEF_BUILD_BENCH)
message(FATAL_ERROR "SLEEF_BUILD_BENCH must be on when SLEEF_BUILD_BENCH_REF is enabled.")
endif ()
if(NOT CMAKE_SYSTEM_NAME MATCHES Linux)
message(FATAL_ERROR "Libm benchmarking not supported in this OS.")
endif()
endif ()
if (DEFINED SLEEF_BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS ${SLEEF_BUILD_SHARED_LIBS})
endif ()
@ -133,13 +207,11 @@ set(COSTOVERRIDE_RVVM2NOFMA 20)
#
enable_testing()
if (SLEEF_ENABLE_CXX)
enable_language(CXX)
endif()
enable_language(CXX)
if (SLEEF_ENABLE_CUDA)
enable_language(CUDA)
set(CMAKE_CUDA_ARCHITECTURES all-major)
endif()
# For specifying installation directories
@ -197,6 +269,7 @@ include(Configure.cmake)
configure_file(
${PROJECT_SOURCE_DIR}/sleef-config.h.in
${PROJECT_BINARY_DIR}/include/sleef-config.h @ONLY)
include_directories(AFTER "${PROJECT_BINARY_DIR}/include")
# We like to have a documented index of all targets in the project. The
# variables listed below carry the names of the targets defined throughout
@ -228,7 +301,9 @@ set(TARGET_MKALIAS "mkalias")
# Generates static library common
# Defined in src/common/CMakeLists.txt via command add_library
set(TARGET_LIBCOMMON_OBJ "common")
set(TARGET_LIBARRAYMAP_OBJ "arraymap")
set(TARGET_PSHA_OBJ "psha_obj")
set(TARGET_TESTERUTIL_OBJ "testerutil_obj")
set(TARGET_QTESTERUTIL_OBJ "qtesterutil_obj")
# Function used to add an executable that is executed on host
function(add_host_executable TARGETNAME)
@ -239,15 +314,23 @@ function(add_host_executable TARGETNAME)
target_compile_options(${TARGETNAME} PRIVATE -arch "${CMAKE_HOST_SYSTEM_PROCESSOR}")
target_link_options(${TARGETNAME} PRIVATE -arch "${CMAKE_HOST_SYSTEM_PROCESSOR}")
endif()
elseif (DEFINED ENV{SLEEF_TARGET_EXEC_USE_QEMU})
if($ENV{SLEEF_TARGET_EXEC_USE_QEMU})
add_executable(${TARGETNAME} ${ARGN})
endif()
else()
add_executable(${TARGETNAME} IMPORTED GLOBAL)
set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME})
if(CMAKE_HOST_WIN32)
set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME}.exe)
else()
set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME})
endif()
endif()
endfunction()
function(host_target_AAVPCS_definitions TARGETNAME)
if (NOT CMAKE_CROSSCOMPILING)
target_compile_definitions(${TARGETNAME} PRIVATE ENABLE_AAVPCS=1)
# target_compile_definitions(${TARGETNAME} PRIVATE ENABLE_AAVPCS=1)
endif()
endfunction()
@ -303,6 +386,7 @@ if(SLEEF_SHOW_CONFIG)
message(" Detected C compiler: ${CMAKE_C_COMPILER_ID} @ ${CMAKE_C_COMPILER}")
message(" CMake: ${CMAKE_VERSION}")
message(" Make program: ${CMAKE_MAKE_PROGRAM}")
message(" CMake build type: ${CMAKE_BUILD_TYPE}")
if(CMAKE_CROSSCOMPILING)
message(" Crosscompiling SLEEF.")
message(" Native build dir: ${NATIVE_BUILD_DIR}")
@ -317,6 +401,7 @@ if(SLEEF_SHOW_CONFIG)
message(STATUS "GMP : " ${LIBGMP})
message(STATUS "RT : " ${LIBRT})
message(STATUS "FFTW3 : " ${LIBFFTW3})
message(STATUS "FFTW3F : " ${LIBFFTW3F})
message(STATUS "OPENSSL : " ${OPENSSL_VERSION})
message(STATUS "SDE : " ${SDE_COMMAND})
if (SLEEF_BUILD_INLINE_HEADERS)
@ -337,3 +422,4 @@ if(SLEEF_SHOW_CONFIG)
message(STATUS "Building SLEEF with AArch64 Vector PCS support")
endif()
endif(SLEEF_SHOW_CONFIG)

View File

@ -1,27 +0,0 @@
# List of contributors
These lists are not exhaustive and only provide most relevant contact information.
For an exhausitive list of contributors please refer to the
[GitHub contributors section for SLEEF](https://github.com/shibatch/sleef/graphs/contributors).
## Maintainers
| Name | Affiliation | Github profile |
| -------------------- | ----------------------- | ---------------------------------- |
| Pierre Blanchard | Arm Ltd. | https://github.com/blapie |
| Joana Cruz | Arm Ltd. | https://github.com/joanaxcruz |
| Joe Ramsay | Arm Ltd. | https://github.com/joeramsay |
| Naoki Shibata | Nara Institute of Science and Technology | https://github.com/shibatch |
## Contributors
| Name | Affiliation | Github profile |
| -------------------- | ----------------------- | ---------------------------------- |
| Anonymous | | https://github.com/friendlyanon |
| Diana Bite | Former Arm Ltd. | https://github.com/diaena |
| Ludovic Henry | Rivos Inc. | https://github.com/luhenry |
| Martin Krastev | Chaos Group | https://github.com/blu |
| Jilayne Lovejoy | Former Arm Inc. | https://github.com/jlovejoy |
| Kerry McLaughlin | Arm Ltd. | https://github.com/kmclaughlin-arm |
| Alexandre Mutel | Unity Technologies | https://github.com/xoofx |
| Francesco Petrogalli | Former Arm Ltd. | https://github.com/fpetrogalli-arm |

View File

@ -1,5 +1,6 @@
include(CheckCCompilerFlag)
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)
include(CheckTypeSize)
include(CheckLanguage)
@ -11,35 +12,39 @@ if (SLEEF_BUILD_STATIC_TEST_BINS)
set(CMAKE_EXE_LINKER_FLAGS "-static")
endif()
set(OPENSSL_EXTRA_LIBRARIES "" CACHE STRING "Extra libraries for openssl")
if (NOT CMAKE_CROSSCOMPILING AND NOT SLEEF_FORCE_FIND_PACKAGE_SSL)
if (SLEEF_BUILD_STATIC_TEST_BINS)
set(OPENSSL_USE_STATIC_LIBS TRUE)
endif()
find_package(OpenSSL)
if (OPENSSL_FOUND)
set(SLEEF_OPENSSL_FOUND TRUE)
set(SLEEF_OPENSSL_LIBRARIES ${OPENSSL_LIBRARIES})
# Work around for tester3 sig segv, when linking versions of openssl (1.1.1) statically.
# This is a known issue https://github.com/openssl/openssl/issues/13872.
if (NOT SLEEF_DISABLE_SSL)
if (NOT CMAKE_CROSSCOMPILING AND NOT SLEEF_FORCE_FIND_PACKAGE_SSL)
if (SLEEF_BUILD_STATIC_TEST_BINS)
string(REGEX REPLACE
"-lpthread" "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive"
SLEEF_OPENSSL_LIBRARIES "${OPENSSL_LIBRARIES}")
set(OPENSSL_USE_STATIC_LIBS TRUE)
endif()
find_package(OpenSSL)
if (OPENSSL_FOUND)
set(SLEEF_OPENSSL_FOUND TRUE)
set(SLEEF_OPENSSL_LIBRARIES ${OPENSSL_LIBRARIES})
# Work around for tester3 sig segv, when linking versions of openssl (1.1.1) statically.
# This is a known issue https://github.com/openssl/openssl/issues/13872.
if (SLEEF_BUILD_STATIC_TEST_BINS)
string(REGEX REPLACE
"-lpthread" "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive"
SLEEF_OPENSSL_LIBRARIES "${OPENSSL_LIBRARIES}")
endif()
set(SLEEF_OPENSSL_VERSION ${OPENSSL_VERSION})
set(SLEEF_OPENSSL_LIBRARIES ${SLEEF_OPENSSL_LIBRARIES} ${OPENSSL_EXTRA_LIBRARIES})
set(SLEEF_OPENSSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
endif()
else()
# find_package cannot find OpenSSL when cross-compiling
find_library(LIBSSL ssl)
find_library(LIBCRYPTO crypto)
if (LIBSSL AND LIBCRYPTO)
set(SLEEF_OPENSSL_FOUND TRUE)
set(SLEEF_OPENSSL_LIBRARIES ${LIBSSL} ${LIBCRYPTO} ${OPENSSL_EXTRA_LIBRARIES})
set(SLEEF_OPENSSL_VERSION ${LIBSSL})
endif()
set(SLEEF_OPENSSL_VERSION ${OPENSSL_VERSION})
set(SLEEF_OPENSSL_LIBRARIES ${SLEEF_OPENSSL_LIBRARIES} ${OPENSSL_EXTRA_LIBRARIES})
set(SLEEF_OPENSSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
endif()
else()
# find_package cannot find OpenSSL when cross-compiling
find_library(LIBSSL ssl)
find_library(LIBCRYPTO crypto)
if (LIBSSL AND LIBCRYPTO)
set(SLEEF_OPENSSL_FOUND TRUE)
set(SLEEF_OPENSSL_LIBRARIES ${LIBSSL} ${LIBCRYPTO} ${OPENSSL_EXTRA_LIBRARIES})
set(SLEEF_OPENSSL_VERSION ${LIBSSL})
endif()
set(SLEEF_OPENSSL_FOUND FALSE)
message(STATUS "Detection of OpenSSL is skipped since SLEEF_DISABLE_SSL is specified")
endif()
if (SLEEF_ENFORCE_TESTER3 AND NOT SLEEF_OPENSSL_FOUND)
@ -48,10 +53,20 @@ endif()
# Some toolchains require explicit linking of the libraries following.
find_library(LIB_MPFR mpfr)
find_library(LIBM m)
if(SLEEF_BUILD_WITH_LIBM)
find_library(LIBM m)
endif()
find_library(LIBGMP gmp)
find_library(LIBRT rt)
find_library(LIBFFTW3 fftw3)
find_library(LIBFFTW3F fftw3f)
find_library(LIBFFTW3_OMP fftw3_omp)
find_library(LIBFFTW3F_OMP fftw3f_omp)
if (LIBFFTW3 AND LIBFFTW3F AND LIBFFTW3_OMP AND LIBFFTW3F_OMP)
set(SLEEF_LIBFFTW3_LIBRARIES ${LIBFFTW3} ${LIBFFTW3F} ${LIBFFTW3_OMP} ${LIBFFTW3F_OMP})
endif()
if (LIB_MPFR)
find_path(MPFR_INCLUDE_DIR
@ -63,7 +78,7 @@ if (LIBFFTW3)
find_path(FFTW3_INCLUDE_DIR
NAMES fftw3.h
ONLY_CMAKE_FIND_ROOT_PATH)
endif(LIBFFTW3)
endif()
if (NOT LIBM)
set(LIBM "")
@ -77,10 +92,77 @@ if (SLEEF_DISABLE_MPFR)
set(LIB_MPFR "")
endif()
if (SLEEF_DISABLE_SSL)
set(SLEEF_OPENSSL_FOUND FALSE)
# Include submodules
set(SLEEF_SUBMODULE_INSTALL_DIR "${CMAKE_BINARY_DIR}/submodules")
include(ExternalProject)
include(FindPkgConfig)
if (NOT EXISTS "${PROJECT_SOURCE_DIR}/submodules")
file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/submodules")
endif()
# Include TLFloat as a submodule
if (SLEEF_ENABLE_TLFLOAT)
set(TLFLOAT_MINIMUM_VERSION 1.15.0)
set(TLFLOAT_GIT_TAG "fb0390157d5c8811fc2a5a6d7d8eac27261f06fb")
set(TLFLOAT_SOURCE_DIR "${PROJECT_SOURCE_DIR}/submodules/tlfloat")
set(TLFLOAT_INSTALL_DIR "${SLEEF_SUBMODULE_INSTALL_DIR}/tlfloat")
set(TLFLOAT_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TLFLOAT_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_LIBS=True -DBUILD_UTILS=False -DBUILD_TESTS=False)
if (CMAKE_C_COMPILER)
list(APPEND TLFLOAT_CMAKE_ARGS -DCMAKE_C_COMPILER:PATH=${CMAKE_C_COMPILER})
endif()
if (CMAKE_CXX_COMPILER)
list(APPEND TLFLOAT_CMAKE_ARGS -DCMAKE_CXX_COMPILER:PATH=${CMAKE_CXX_COMPILER})
endif()
if (CMAKE_TOOLCHAIN_FILE)
list(APPEND TLFLOAT_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})
endif()
if (EXISTS "${TLFLOAT_SOURCE_DIR}/CMakeLists.txt")
# If the source code of tlfloat is already downloaded, use it
ExternalProject_Add(ext_tlfloat
SOURCE_DIR "${TLFLOAT_SOURCE_DIR}"
CMAKE_ARGS ${TLFLOAT_CMAKE_ARGS}
UPDATE_DISCONNECTED TRUE
)
include_directories(BEFORE "${TLFLOAT_INSTALL_DIR}/include")
link_directories(BEFORE "${TLFLOAT_INSTALL_DIR}/lib")
set(TLFLOAT_LIBRARIES "tlfloat")
else()
pkg_search_module(TLFLOAT tlfloat)
if (TLFLOAT_FOUND AND TLFLOAT_VERSION VERSION_GREATER_EQUAL TLFLOAT_MINIMUM_VERSION)
# If tlfloat is installed on the system
add_custom_target(ext_tlfloat ALL)
include_directories(BEFORE "${TLFLOAT_INCLUDE_DIRS}")
link_directories(BEFORE "${TLFLOAT_LIBDIR}")
message(STATUS "Found installed TLFloat " ${TLFLOAT_VERSION})
else()
# Otherwise, download the source code
find_package(Git REQUIRED)
ExternalProject_Add(ext_tlfloat
GIT_REPOSITORY https://github.com/shibatch/tlfloat
GIT_TAG "${TLFLOAT_GIT_TAG}"
SOURCE_DIR "${TLFLOAT_SOURCE_DIR}"
CMAKE_ARGS ${TLFLOAT_CMAKE_ARGS}
UPDATE_DISCONNECTED TRUE
)
include_directories(BEFORE "${TLFLOAT_INSTALL_DIR}/include")
link_directories(BEFORE "${TLFLOAT_INSTALL_DIR}/lib")
set(TLFLOAT_LIBRARIES "tlfloat")
endif()
endif()
endif(SLEEF_ENABLE_TLFLOAT)
# Force set default build type if none was specified
# Note: some sleef code requires the optimisation flags turned on
if(NOT CMAKE_BUILD_TYPE)
@ -124,7 +206,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
set(COMPILER_SUPPORTS_NEON32VFPV4 1)
set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mfpu=vfpv4")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
set(SLEEF_ARCH_PPC64 ON CACHE INTERNAL "True for PPC64 architecture.")
set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mvsx")
@ -149,7 +231,7 @@ if(NOT CLANG_EXE_PATH)
set(CLANG_EXE_PATH ${CMAKE_C_COMPILER})
else()
# Else we may find clang on the path?
find_program(CLANG_EXE_PATH NAMES clang "clang-11" "clang-10" "clang-9" "clang-8" "clang-7" "clang-6.0" "clang-5.0" "clang-4.0" "clang-3.9")
find_program(CLANG_EXE_PATH NAMES clang "clang-25" "clang-24" "clang-23" "clang-22" "clang-21" "clang-20" "clang-19" "clang-18" "clang-17")
endif()
endif()
@ -188,7 +270,7 @@ set(CLANG_FLAGS_ENABLE_RVVM2NOFMA "-march=rv64gcv_zba_zbb_zbs")
set(FLAGS_OTHERS "")
# All variables storing compiler flags should be prefixed with FLAGS_
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang|QCC)")
# Always compile sleef with -ffp-contract.
set(FLAGS_STRICTMATH "-ffp-contract=off")
set(FLAGS_FASTMATH "-ffast-math")
@ -209,13 +291,13 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
# Warning flags.
set(FLAGS_WALL "-Wall -Wno-unused-function -Wno-attributes -Wno-unused-result")
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|QCC)")
# The following compiler option is needed to suppress the warning
# "AVX vector return without AVX enabled changes the ABI" at
# src/arch/helpervecext.h:88
string(CONCAT FLAGS_WALL ${FLAGS_WALL} " -Wno-psabi")
set(FLAGS_ENABLE_NEON32 "-mfpu=neon")
endif(CMAKE_C_COMPILER_ID MATCHES "GNU")
endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|QCC)")
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND SLEEF_ENABLE_LTO)
if (NOT SLEEF_LLVM_AR_COMMAND)
@ -296,7 +378,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
endif()
set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_OTHERS}")
if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.99)
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|QCC)" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.99)
set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_NOSTRICTALIASING} ${FLAGS_OTHERS}")
else()
set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_NOSTRICTALIASING} ${FLAGS_FASTMATH} ${FLAGS_OTHERS}")
@ -306,9 +388,17 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU")
set(FLAGS_ENABLE_SVE "${FLAGS_ENABLE_SVE};-fno-tree-vrp")
endif()
if(QNX AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
#set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -march=armv8-a ")
#set(DFT_C_FLAGS "${DFT_C_FLAGS} -march=armv8-a ")
endif()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "GNU")
set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse")
set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse -m128bit-long-double")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "QCC")
set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse")
set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse -m128bit-long-double")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "Clang")
set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse")
set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse")
@ -328,9 +418,6 @@ endif()
# Long double
option(SLEEF_DISABLE_LONG_DOUBLE "Disable long double" OFF)
option(SLEEF_ENFORCE_LONG_DOUBLE "Build fails if long double is not supported by the compiler" OFF)
if(NOT SLEEF_DISABLE_LONG_DOUBLE)
CHECK_TYPE_SIZE("long double" LD_SIZE)
if(LD_SIZE GREATER "9")
@ -351,9 +438,6 @@ endif()
# float128
option(SLEEF_DISABLE_FLOAT128 "Disable float128" OFF)
option(SLEEF_ENFORCE_FLOAT128 "Build fails if float128 is not supported by the compiler" OFF)
if(NOT SLEEF_DISABLE_FLOAT128)
CHECK_C_SOURCE_COMPILES("
int main() { __float128 r = 1;
@ -373,10 +457,37 @@ if(COMPILER_SUPPORTS_FLOAT128)
}" COMPILER_SUPPORTS_QUADMATH)
endif()
# SSE2
if(COMPILER_SUPPORTS_FLOAT128)
if (CMAKE_CXX_COMPILER_TARGET)
set(CMAKE_REQUIRED_FLAGS "--target=${CMAKE_CXX_COMPILER_TARGET}")
endif()
CHECK_CXX_SOURCE_COMPILES("
#include <bit>
struct s { long long x, y; };
int main(int argc, char **argv) {
constexpr s a = std::bit_cast<s>(__float128(0.1234)*__float128(56.789));
static_assert((a.x ^ a.y) == 0xc7d695c93a4e2b71LL);
__float128 i = argc;
return (int)i;
}
" SLEEF_FLOAT128_IS_IEEEQP)
set(CMAKE_REQUIRED_FLAGS)
endif()
option(SLEEF_DISABLE_SSE2 "Disable SSE2" OFF)
option(SLEEF_ENFORCE_SSE2 "Build fails if SSE2 is not supported by the compiler" OFF)
if (CMAKE_CXX_COMPILER_TARGET)
set(CMAKE_REQUIRED_FLAGS "--target=${CMAKE_CXX_COMPILER_TARGET}")
endif()
CHECK_CXX_SOURCE_COMPILES("
#include <bit>
struct s { long long x, y; };
int main(void) {
constexpr s a = std::bit_cast<s>((long double)0.1234*(long double)56.789);
static_assert((a.x ^ a.y) == 0xc7d695c93a4e2b71LL);
}
" SLEEF_LONGDOUBLE_IS_IEEEQP)
set(CMAKE_REQUIRED_FLAGS)
# SSE2
if(SLEEF_ARCH_X86 AND NOT SLEEF_DISABLE_SSE2)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_SSE2}")
@ -397,9 +508,6 @@ endif()
# SSE 4.1
option(SLEEF_DISABLE_SSE4 "Disable SSE4" OFF)
option(SLEEF_ENFORCE_SSE4 "Build fails if SSE4 is not supported by the compiler" OFF)
if(SLEEF_ARCH_X86 AND NOT SLEEF_DISABLE_SSE4)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_SSE4}")
CHECK_C_SOURCE_COMPILES("
@ -419,9 +527,6 @@ endif()
# AVX
option(SLEEF_ENFORCE_AVX "Disable AVX" OFF)
option(SLEEF_ENFORCE_AVX "Build fails if AVX is not supported by the compiler" OFF)
if(SLEEF_ARCH_X86 AND NOT SLEEF_DISABLE_AVX)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_AVX}")
CHECK_C_SOURCE_COMPILES("
@ -441,9 +546,6 @@ endif()
# FMA4
option(SLEEF_DISABLE_FMA4 "Disable FMA4" OFF)
option(SLEEF_ENFORCE_FMA4 "Build fails if FMA4 is not supported by the compiler" OFF)
if(SLEEF_ARCH_X86 AND NOT SLEEF_DISABLE_FMA4)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_FMA4}")
CHECK_C_SOURCE_COMPILES("
@ -463,9 +565,6 @@ endif()
# AVX2
option(SLEEF_DISABLE_AVX2 "Disable AVX2" OFF)
option(SLEEF_ENFORCE_AVX2 "Build fails if AVX2 is not supported by the compiler" OFF)
if(SLEEF_ARCH_X86 AND NOT SLEEF_DISABLE_AVX2)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_AVX2}")
CHECK_C_SOURCE_COMPILES("
@ -490,9 +589,6 @@ endif()
# AVX512F
option(SLEEF_DISABLE_AVX512F "Disable AVX512F" OFF)
option(SLEEF_ENFORCE_AVX512F "Build fails if AVX512F is not supported by the compiler" OFF)
if(SLEEF_ARCH_X86 AND NOT SLEEF_DISABLE_AVX512F)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_AVX512F}")
CHECK_C_SOURCE_COMPILES("
@ -522,9 +618,6 @@ endif()
# SVE
option(SLEEF_DISABLE_SVE "Disable SVE" OFF)
option(SLEEF_ENFORCE_SVE "Build fails if SVE is not supported by the compiler" OFF)
# Darwin does not support SVE yet (see issue #474),
# therefore we disable SVE on Darwin systems.
if(SLEEF_ARCH_AARCH64 AND NOT SLEEF_DISABLE_SVE AND NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")
@ -546,15 +639,12 @@ endif()
# VSX
option(SLEEF_DISABLE_VSX "Disable VSX" OFF)
option(SLEEF_ENFORCE_VSX "Build fails if VSX is not supported by the compiler" OFF)
if(SLEEF_ARCH_PPC64 AND NOT SLEEF_DISABLE_VSX)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_VSX}")
CHECK_C_SOURCE_COMPILES("
#include <altivec.h>
#ifndef __LITTLE_ENDIAN__
#error \"Only VSX(ISA2.07) little-endian mode is supported \"
#if !defined(__LITTLE_ENDIAN__) && !defined(_AIX)
#error \"Only VSX(ISA2.07) little-endian mode and AIX is supported \"
#endif
int main() {
vector double d;
@ -576,9 +666,6 @@ endif()
# VSX3
option(SLEEF_DISABLE_VSX3 "Disable VSX3" OFF)
option(SLEEF_ENFORCE_VSX3 "Build fails if VSX3 is not supported by the compiler" OFF)
if(SLEEF_ARCH_PPC64 AND NOT SLEEF_DISABLE_VSX3)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_VSX3}")
CHECK_C_SOURCE_COMPILES("
@ -605,9 +692,6 @@ endif()
# IBM Z
option(SLEEF_DISABLE_VXE "Disable VXE" OFF)
option(SLEEF_ENFORCE_VXE "Build fails if VXE is not supported by the compiler" OFF)
if(SLEEF_ARCH_S390X AND NOT SLEEF_DISABLE_VXE)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_VXE}")
CHECK_C_SOURCE_COMPILES("
@ -629,9 +713,6 @@ endif()
#
option(SLEEF_DISABLE_VXE2 "Disable VXE2" OFF)
option(SLEEF_ENFORCE_VXE2 "Build fails if VXE2 is not supported by the compiler" OFF)
if(SLEEF_ARCH_S390X AND NOT SLEEF_DISABLE_VXE2)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_VXE2}")
CHECK_C_SOURCE_COMPILES("
@ -653,15 +734,26 @@ endif()
# RVVM1
option(SLEEF_DISABLE_RVVM1 "Disable RVVM1" OFF)
option(SLEEF_ENFORCE_RVVM1 "Build fails if RVVM1 is not supported by the compiler" OFF)
if(SLEEF_ARCH_RISCV64 AND NOT SLEEF_DISABLE_RVVM1)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM1}")
CHECK_C_SOURCE_COMPILES("
#include <riscv_vector.h>
int main() {
vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_vlenb() * 8 / 32); }"
#ifdef __riscv_v
#if __riscv_v < 1000000
#error \"RVV version 1.0 not supported\"
#endif
#else
#error \"RVV not supported\"
#endif
#ifdef __riscv_v_intrinsic
#if __riscv_v_intrinsic < 12000
#error \"RVV instrinsics version 0.12 not supported\"
#endif
#else
#error \"RVV intrinsics not supported\"
#endif
int main(void) { return 0; }"
COMPILER_SUPPORTS_RVVM1)
if(COMPILER_SUPPORTS_RVVM1)
@ -675,15 +767,26 @@ endif()
# RVVM2
option(SLEEF_DISABLE_RVVM2 "Disable RVVM2" OFF)
option(SLEEF_ENFORCE_RVVM2 "Build fails if RVVM2 is not supported by the compiler" OFF)
if(SLEEF_ARCH_RISCV64 AND NOT SLEEF_DISABLE_RVVM2)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM2}")
CHECK_C_SOURCE_COMPILES("
#include <riscv_vector.h>
int main() {
vint32m2_t r = __riscv_vmv_v_x_i32m2(1, 2 * __riscv_vlenb() * 8 / 32); }"
#ifdef __riscv_v
#if __riscv_v < 1000000
#error \"RVV version 1.0 not supported\"
#endif
#else
#error \"RVV not supported\"
#endif
#ifdef __riscv_v_intrinsic
#if __riscv_v_intrinsic < 12000
#error \"RVV instrinsics version 0.12 not supported\"
#endif
#else
#error \"RVV intrinsics not supported\"
#endif
int main(void) { return 0; }"
COMPILER_SUPPORTS_RVVM2)
if(COMPILER_SUPPORTS_RVVM2)
@ -697,18 +800,14 @@ endif()
# CUDA
option(SLEEF_ENFORCE_CUDA "Build fails if CUDA is not supported" OFF)
if (SLEEF_ENFORCE_CUDA AND NOT CMAKE_CUDA_COMPILER)
message(FATAL_ERROR "SLEEF_ENFORCE_CUDA is specified and that feature is disabled or not supported by the compiler")
endif()
# OpenMP
option(SLEEF_DISABLE_OPENMP "Disable OPENMP" OFF)
option(SLEEF_ENFORCE_OPENMP "Build fails if OPENMP is not supported by the compiler" OFF)
if(NOT SLEEF_DISABLE_OPENMP)
set(CMAKE_REQUIRED_FLAGS)
find_package(OpenMP)
# Check if compilation with OpenMP really succeeds
# It might not succeed even though find_package(OpenMP) succeeds.
@ -796,6 +895,7 @@ set(CMAKE_REQUIRED_LIBRARIES)
# Save the default C flags
set(ORG_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
set(ORG_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
##
@ -838,10 +938,6 @@ if(SLEEF_SHOW_ERROR_LOG)
endif()
endif(SLEEF_SHOW_ERROR_LOG)
if (MSVC OR SLEEF_CLANG_ON_WINDOWS)
set(COMPILER_SUPPORTS_OPENMP FALSE) # At this time, OpenMP is not supported on MSVC
endif()
##
# Set common definitions

View File

@ -0,0 +1,247 @@
pipeline {
agent { label 'jenkinsfile' }
stages {
stage('Preamble') {
parallel {
stage('x86_64 linux clang-19-lto') {
agent { label 'x86_64 && ubuntu24 && avx512f' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
sh '''
echo "x86_64 clang-19 with LTO on" `hostname`
export CC=clang-19
export CXX=clang++-19
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEFDFT_ENABLE_STREAM=True -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=True -DSLEEF_ENFORCE_TESTER=True -DSLEEF_ENABLE_LTO=True -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld-19"
cmake -E time ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
ninja install
'''
}
}
stage('x86_64 linux clang-19-asan') {
agent { label 'x86_64 && ubuntu24 && avx512f' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
sh '''
echo "x86_64 clang-19 with ASAN on" `hostname`
export CC=clang-19
export CXX=clang++-19
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEFDFT_ENABLE_STREAM=True -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=True -DSLEEF_ENFORCE_TESTER=True -DSLEEF_ASAN=True
cmake -E time ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
ninja install
'''
}
}
stage('x86_64 linux gcc-13') {
agent { label 'x86_64 && ubuntu24 && cuda' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
sh '''
echo "x86_64 gcc-13 on" `hostname`
export CC=gcc-13
export CXX=g++-13
export CUDACXX=/opt/cuda-12.6/bin/nvcc
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_ENABLE_CUDA=True -DSLEEF_ENFORCE_CUDA=True -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=True -DSLEEF_ENFORCE_TESTER=True
cmake -E time ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
ninja install
'''
}
}
stage('x86_64 windows clang') {
agent { label 'windows11 && vs2022' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
bat """
call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat"
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
call "winbuild-clang.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=True -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=True -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_DISABLE_SSL=True
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
ctest -j 4 --output-on-failure
exit /b %ERRORLEVEL%
"""
}
}
stage('x86_64 windows vs2022') {
agent { label 'windows11 && vs2022' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
bat """
call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat"
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
call "winbuild-msvc.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=True -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENFORCE_TESTER4=True
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
ctest -j 4 --output-on-failure
exit /b %ERRORLEVEL%
"""
}
}
stage('riscv linux gcc-14') {
agent { label 'riscv && ubuntu23' }
options { skipDefaultCheckout() }
steps {
script {
System.setProperty("org.jenkinsci.plugins.durabletask.BourneShellScript.HEARTBEAT_CHECK_INTERVAL", "86400");
}
cleanWs()
checkout scm
sh '''
echo "riscv gcc-14 on" `hostname`
export CC=gcc-14.2.0
export CXX=g++-14.2.0
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=False -DSLEEF_ENFORCE_DFT=False -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=False -DSLEEF_ENFORCE_RVVM1=True -DSLEEF_ENFORCE_RVVM2=True
cmake -E time oomstaller ninja -j `nproc`
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
ninja install
'''
}
}
stage('arm32 linux gcc-12') {
agent { label 'armv7 && debian12' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
sh '''
echo "arm32 gcc-12 on" `hostname`
export CC=gcc-12
export CXX=g++-12
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=False
cmake -E time oomstaller ninja -j `nproc`
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
ninja install
'''
}
}
stage('aarch64 linux clang-19') {
agent { label 'aarch64 && ubuntu24 && apple' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
sh '''
echo "aarch64 clang-19 on" `hostname`
export CC=clang-19
export CXX=clang++-19
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64-static -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=False -DSLEEF_ENABLE_LTO=True -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld-19"
cmake -E time oomstaller ninja -j `nproc`
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
'''
}
}
stage('aarch64 linux gcc-14') {
agent { label 'aarch64 && ubuntu24 && apple' }
options { skipDefaultCheckout() }
steps {
cleanWs()
checkout scm
sh '''
echo "aarch64 gcc-14 on" `hostname`
export CC=gcc-14
export CXX=g++-14
mkdir build
cd build
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64-static -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=False
cmake -E time oomstaller ninja -j `nproc`
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
'''
}
}
stage('cross-ppc64el gcc') {
agent { label 'x86_64 && ubuntu24 && cuda' }
steps {
cleanWs()
checkout scm
sh '''
echo "Cross ppc64el gcc on" `hostname`
rm -rf build-native
mkdir build-native
cd build-native
cmake -GNinja .. -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE
cmake -E time ninja
cd ..
mkdir build
cd build
cmake -GNinja .. -DCMAKE_TOOLCHAIN_FILE=../toolchains/ppc64el-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_ENFORCE_TESTER3=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=False -DSLEEF_ENFORCE_VSX=True -DSLEEF_ENFORCE_VSX3=True
cmake -E time ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
export LD_LIBRARY_PATH=/usr/powerpc64le-linux-gnu/lib
ctest -j `nproc`
ninja install
'''
}
}
stage('cross-s390x gcc') {
agent { label 'x86_64 && ubuntu24 && cuda' }
steps {
cleanWs()
checkout scm
sh '''
echo "Cross s390x gcc on" `hostname`
rm -rf build-native
mkdir build-native
cd build-native
cmake -GNinja .. -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE
cmake -E time ninja
cd ..
mkdir build
cd build
cmake -GNinja .. -DCMAKE_TOOLCHAIN_FILE=../toolchains/s390x-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_ENFORCE_TESTER3=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_DFT=TRUE -DSLEEF_ENFORCE_DFT=TRUE -DSLEEF_ENFORCE_TESTER4=True -DSLEEF_ENABLE_TESTER=False -DSLEEF_ENFORCE_VXE=True -DSLEEF_ENFORCE_VXE2=True
cmake -E time ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
ctest -j `nproc`
ninja install
'''
}
}
}
}
}
}

View File

@ -0,0 +1,129 @@
== SLEEF - SIMD Library for Evaluating Elementary Functions
image:http://img.shields.io/badge/DOI-10.1109/TPDS.2019.2960333-blue.svg[TPDS, link=https://ieeexplore.ieee.org/document/8936472]
SLEEF is a library that implements vectorized versions of C standard
math functions. This library also includes DFT subroutines.
* *Web Page:* https://sleef.org/
* *Sources:* https://github.com/shibatch/sleef
== Supported environment
=== Test matrix
The following table summarizes currently supported OSes and compilers.
[cols="1,1,1,1,1,1,1,1,1"]
|===
| 2+|Linux 4+|Windows 2+|Mac
| |gcc |llvm |MSVC |Clang |MinGW |Cygwin |Clang |GCC
|x86_64 |&#x2714; |&#x2714; |&#x2714; |&#x2714; |&#x2714; |&#x2753; |&#x2714; |&#x2753;
|RISC-V 64 |&#x2714; |&#x2753; |N/A |N/A |N/A |N/A |N/A |N/A
|AArch64 |&#x2714; |&#x2714; |&#x274c; |&#x274c; |&#x274c; |&#x274c; |&#x2714; |&#x2753;
|POWER |&#x2714; |&#x2753; |N/A |N/A |N/A |N/A |N/A |N/A
|S390X |&#x2714; |&#x2753; |N/A |N/A |N/A |N/A |N/A |N/A
|AArch32 |&#x2714; |&#x2753; |N/A |N/A |N/A |N/A |N/A |N/A
|===
&#x2714; : Tested on CI, &#x2753; : Not tested, &#x274c; : Not supported
== How to build SLEEF
The library itself does not have any additional dependency.
In order to build SLEEF, you need CMake 3.18+, and C and C++ compilers of the same version.
It is also recommended to have the following tools.
* Ninja
* Git
https://github.com/shibatch/tlfloat[TLFloat] is automatically downloaded if no suitable version is found on your system.
Some tests require:
* libssl and libcrypto, that can be provided by installing openssl.
* libm, libgmp and libmpfr
* libfftw.
The build procedure is as follows.
[arabic]
. Check out the source code from our GitHub repository
....
git clone https://github.com/shibatch/sleef
....
[arabic, start=2]
. Make a separate directory to create an out-of-source build
....
cd sleef && mkdir build
....
[arabic, start=3]
. Run cmake to configure the project
....
cmake -S . -B build
....
By default this will generate shared libraries. In order to generate
static libraries, pass option `-DBUILD_SHARED_LIBS=OFF`.
For more verbose output add option `-DSLEEF_SHOW_CONFIG=ON`.
[arabic, start=4]
. Run make to build the project
....
cmake --build build -j --clean-first
....
[arabic, start=5]
. Run tests using ctests
....
ctest --test-dir build -j
....
For more detailed build instructions please refer to
https://sleef.org/compile.xhtml#preliminaries[our web page].
== How to cross-compile SLEEF
For more detailed please refer to
https://sleef.org/compile.xhtml#cross[cross-compile SLEEF]
== Install SLEEF
=== From source
Assuming following instructions were followed.
[arabic, start=6]
. Install to specified directory `<prefix>`
....
cmake --install build --prefix=<prefix>
....
=== Uninstall
In order to uninstall SLEEF library and headers run
....
sudo xargs rm -v < build/install_manifest.txt
....
== License
The software is distributed under the Boost Software License, Version
1.0. See accompanying file link:./LICENSE.txt[LICENSE.txt] or copy at
http://www.boost.org/LICENSE_1_0.txt. Contributions to this project are
accepted under the same license.
Copyright © 2010-2025 SLEEF Project, Naoki Shibata and contributors.

View File

@ -1,221 +0,0 @@
# SLEEF
![Github Actions](https://github.com/shibatch/sleef/actions/workflows/build_and_test.yml/badge.svg?event=push&branch=master)
[![DOI:10.1109/TPDS.2019.2960333](http://img.shields.io/badge/DOI-10.1109/TPDS.2019.2960333-blue.svg)](https://ieeexplore.ieee.org/document/8936472)
[![License](https://img.shields.io/badge/License-Boost_1.0-lightblue.svg)](https://www.boost.org/LICENSE_1_0.txt)
![CMake](https://img.shields.io/badge/cmake-v3.18+-yellow.svg)
[![Spack](https://img.shields.io/spack/v/sleef)](https://spack.readthedocs.io/en/v0.16.2/package_list.html#sleef)
[![SourceForge Downloads](https://img.shields.io/sourceforge/dt/sleef)](https://sourceforge.net/projects/sleef/)
SLEEF is a library that implements vectorized versions of C standard math functions. This library also includes DFT subroutines.
- **Web Page:** [https://sleef.org/][webpage_url]
- **Sources:** [https://github.com/shibatch/sleef][repo_url]
## Supported environment
### Test matrix
The following table summarises currently supported vector extensions, compilers and OS-es.
:green_circle: : Tested extensively in CI.
:yellow_circle: : Tested partially in CI.
:x: : Currently failing some tests in CI.
:white_circle: : Not tested in CI. Might have passed tests in previous CI framework.
[This issue](https://github.com/shibatch/sleef/issues/481) tracks progress on improving test coverage.
Compilation of SLEEF on previously supported environments might still be safe, we just cannot verify it yet.
<table>
<tr>
<th colspan="2" rowspan="2"></th>
<th colspan="9">OS/Compiler</th>
</tr>
<tr>
<th colspan="3">Linux</th>
<th colspan="2">macOS</th>
<th colspan="4">Windows</th>
</tr>
<tr>
<th>Arch.</th>
<th>Vector Extensions</th>
<th>gcc</th><th>llvm</th><th>icc</th>
<th>gcc</th><th>llvm</th>
<th>gcc</th><th>llvm-gnu</th><th>llvm-msvc</th><th>msvc</th>
</tr>
<tr align="center"><th>x86_64</th><th>SSE2, SSE4,<br>AVX, AVX2, AVX512F</th>
<td>:green_circle:</td><td>:green_circle:</td><td>:white_circle:</td>
<td>:white_circle:</td><td>:green_circle:</td>
<td>:white_circle:</td><td>:yellow_circle:</td><td>:white_circle:</td><td>:white_circle:</td>
</tr>
<tr align="center"><th>x86 32bit<br>(i386)</th><th>SSE</th>
<td>:white_circle:</td><td>:white_circle:</td><td>:white_circle:</td>
<td colspan="2">N/A</td>
<td>:white_circle:</td><td>:white_circle:</td><td>:white_circle:</td><td>:white_circle:</td>
</tr>
<tr align="center"><th>AArch64<br>(arm)</th><th>Neon, SVE</th>
<td>:green_circle:</td><td>:green_circle:</td><td>N/A</td>
<td colspan="1">N/A</td><td>:green_circle:</td>
<td colspan="1">N/A</td><td>:white_circle:</td><td>:white_circle:</td><td>:white_circle:</td>
</tr>
<tr align="center"><th>AArch32<br>(armhf)</th><th>NEON</th>
<td>:green_circle:</td><td>:green_circle:</td><td>N/A</td>
<td colspan="2">N/A</td>
<td colspan="4">N/A</td>
</tr>
<tr align="center"><th>PowerPC<br>(ppc64el)</th><th>VSX, VSX3</th>
<td>:green_circle:</td><td>:green_circle:</td><td>N/A</td>
<td colspan="2">N/A</td>
<td colspan="4">N/A</td>
</tr>
<tr align="center"><th>IBM/Z<br>(s390x)</th><th>VXE, VXE2</th>
<td>:green_circle:</td><td>:green_circle:</td><td>N/A</td>
<td colspan="2">N/A</td>
<td colspan="4">N/A</td>
</tr>
<tr align="center"><th>RISC-V<br>(riscv64)</th><th>RVV1, RVV2</th>
<td>N/A (14+)</td><td>:green_circle:</td><td>N/A</td>
<td colspan="2">N/A</td>
<td colspan="4">N/A</td>
</tr>
</table>
### Component support
The above table is valid for libm in single, double and quadruple precision, as well as fast Discrete Fourier Transform (DFT).
Generation of inline headers is also supported for most vector extensions.
LTO is not tested in CI yet, except on Windows.
### Compiler support
Results are displayed for gcc 11 and llvm 17, the compiler versions used in CI tests with GitHub Actions.
Older versions should be supported too, while newer ones are either not tested or have known issues.
Some compiler versions simply do not support certain vector extensions, for instance SVE is only supported for gcc version 9 onwards.
Similarly, the RISC-V interface in SLEEF is based on version 1.0 of the intrinsics, which is only supported from llvm version 17 and gcc version 14 onwards.
Toolchain files provide some information on supported compiler versions.
### OS support
Only Linux distributions and macOS are fully tested in CI and thus officially supported.
Building SLEEF for Windows on x86 machines was officially supported ( :white_circle: ), as of 3.5.1,
however it is only partially tested due to [known limitations of the test suite with MinGW or MSYS2](https://github.com/shibatch/sleef/issues/544).
As a result tests for Windows on x86 only include DFT for now (other tests are disabled in build system),
but all components are built.
Support for iOS and Android is only preliminary on AArch64.
SVE is not supported on Darwin-based system and therefore automatically disabled by SLEEF on Darwin.
### More on supported environment
Refer to our web page for [more on supported environment][supported_env_url].
## Install SLEEF dependencies
The library itself does not have any additional dependency.
However some tests require:
- libssl and libcrypto, that can be provided by installing openssl.
- libm, libgmp and libmpfr
- libfftw.
These tests can be disabled if necessary.
## How to build SLEEF
We recommend relying on CMake as much as possible in the build process to ensure portability.
**CMake 3.18+** is the minimum required.
1. Check out the source code from our GitHub repository
```
git clone https://github.com/shibatch/sleef
```
2. Make a separate directory to create an out-of-source build
```
cd sleef && mkdir build
```
3. Run cmake to configure the project
```
cmake -S . -B build
```
By default this will generate shared libraries. In order to generate static libraries, pass option `-DBUILD_SHARED_LIBS=OFF`.
For more verbose output add option `-DSLEEF_SHOW_CONFIG=ON`.
4. Run make to build the project
```
cmake --build build -j --clean-first
```
5. Run tests using ctests
```
ctest --test-dir build -j
```
For more detailed build instructions please refer to the [dedicated section on CMake](./docs/build-with-cmake.md) or to [our web page][build_info_url].
## Install SLEEF
### From source
Assuming following instructions were followed.
6. Install to specified directory `<prefix>`
```
cmake --install build --prefix=<prefix>
```
### Using Spack
SLEEF can also be directly installed using Spack.
```
spack install sleef@master
```
### Uninstall
In order to uninstall SLEEF library and headers run
```
sudo xargs rm -v < build/install_manifest.txt
```
## License
The software is distributed under the Boost Software License, Version 1.0.
See accompanying file [LICENSE.txt](./LICENSE.txt) or copy at [http://www.boost.org/LICENSE_1_0.txt][license_url].
Contributions to this project are accepted under the same license.
Copyright &copy; 2010-2024 SLEEF Project, Naoki Shibata and contributors.<br/>
<!-- Repository links -->
[webpage_url]: https://sleef.org/
[build_info_url]: https://sleef.org/compile.xhtml
[supported_env_url]: https://sleef.org/index.xhtml#environment
[repo_url]: https://github.com/shibatch/sleef
[repo_license_url]: https://github.com/shibatch/sleef/blob/main/LICENSE.txt
[license_url]: http://www.boost.org/LICENSE_1_0.txt

View File

@ -6,6 +6,7 @@ extern "C"
{
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@ -46,20 +47,24 @@ IMPORT struct SleefDFT *SleefDFT_float_init1d(uint32_t n, const float *in, float
IMPORT struct SleefDFT *SleefDFT_float_init2d(uint32_t n, uint32_t m, const float *in, float *out, uint64_t mode);
IMPORT void SleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out);
IMPORT void SleefDFT_execute(struct SleefDFT *ptr, const void *in, void *out);
IMPORT void SleefDFT_dispose(struct SleefDFT *ptr);
IMPORT void SleefDFT_setPath(struct SleefDFT *ptr, char *pathStr);
IMPORT int SleefDFT_getPath(struct SleefDFT *ptr, char *pathStr, int pathStrSize);
IMPORT void SleefDFT_setDefaultVerboseFP(FILE *fp);
//
IMPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode);
IMPORT int SleefDFT_savePlan(const char *pathStr);
#define SLEEF_PLAN_AUTOMATIC 0
#define SLEEF_PLAN_READONLY (1 << 0)
#define SLEEF_PLAN_RESET (1 << 1)
#define SLEEF_PLAN_BUILDALLPLAN (1 << 2)
#define SLEEF_PLAN_AUTOMATIC (1 << 2)
#define SLEEF_PLAN_NOLOCK (1 << 3)
#define SLEEF_PLAN_MEASURE (1 << 29)
#define SLEEF_PLAN_REFERTOENVVAR (1 << 30)
#undef IMPORT

View File

@ -6,6 +6,11 @@
#define SLEEF_VERSION_MAJOR @SLEEF_VERSION_MAJOR@
#define SLEEF_VERSION_MINOR @SLEEF_VERSION_MINOR@
#cmakedefine SLEEF_FLOAT128_IS_IEEEQP
#cmakedefine SLEEF_LONGDOUBLE_IS_IEEEQP
#ifndef SLEEF_STATIC_LIBS
#cmakedefine SLEEF_STATIC_LIBS
#endif
#endif // SLEEF_CONFIG_H

View File

@ -7,11 +7,19 @@ if (SLEEF_BUILD_TESTS AND NOT MINGW)
endif()
add_subdirectory("common")
if (SLEEF_BUILD_DFT)
if (SLEEF_BUILD_BENCH)
# add_subdirectory("libm-benchmarks")
endif()
if (SLEEF_BUILD_DFT AND COMPILER_SUPPORTS_OPENMP)
add_subdirectory("dft")
if (SLEEF_BUILD_TESTS)
add_subdirectory("dft-tester")
endif()
else()
if (SLEEF_ENFORCE_DFT)
message(FATAL_ERROR "SLEEF_ENFORCE_DFT is specified and DFT is not built")
endif()
endif()
if (SLEEF_BUILD_QUAD)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -110,7 +110,7 @@ static INLINE int vavailability_i(int name) {
#endif // #if !defined(SLEEF_GENHEADER)
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); }
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); }
static INLINE int vtestallones_i_vo32(vopmask g) {
return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1)));
@ -516,10 +516,10 @@ static INLINE float vcast_f_vf(vfloat v) {
#endif
//
#define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 })
#define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 })
#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f })
#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f })
#define PNMASK _mm256_set_pd( -0.0, +0.0, -0.0, +0.0 )
#define NPMASK _mm256_set_pd( +0.0, -0.0, +0.0, -0.0 )
#define PNMASKf _mm256_set_ps( -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f )
#define NPMASKf _mm256_set_ps( +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f )
static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); }
static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); }
@ -629,7 +629,7 @@ static INLINE vmask vcast_vm_vi(vint vi) {
}
static INLINE vint vcast_vi_vm(vmask vm) {
return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vm)), _mm_set1_ps(0), 0x08)),
_mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80)));
_mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80)));
}
static INLINE vmask vreinterpret_vm_vi64(vint64 v) { return v; }

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -96,7 +96,7 @@ static INLINE int vavailability_i(int name) {
#endif // #if !defined(SLEEF_GENHEADER)
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); }
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); }
static INLINE int vtestallones_i_vo32(vopmask g) {
return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1)));
@ -168,7 +168,7 @@ static INLINE vmask vcastu_vm_vi(vint vi) {
static INLINE vint vcastu_vi_vm(vmask vi) {
return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vi)), _mm_set1_ps(0), 0x0d)),
_mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vi, 1)), 0xd0)));
_mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vi, 1)), 0xd0)));
}
static INLINE vmask vcast_vm_i_i(int i0, int i1) {
@ -392,10 +392,10 @@ static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm2
//
#define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 })
#define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 })
#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f })
#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f })
#define PNMASK _mm256_set_pd( -0.0, +0.0, -0.0, +0.0 )
#define NPMASK _mm256_set_pd( +0.0, -0.0, +0.0, -0.0 )
#define PNMASKf _mm256_set_ps( -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f )
#define NPMASKf _mm256_set_ps( +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f )
static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); }
static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); }
@ -476,7 +476,7 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpgt_epi
static INLINE vmask vcast_vm_vi(vint vi) { return _mm256_cvtepi32_epi64(vi); } // signed 32-bit => 64-bit
static INLINE vint vcast_vi_vm(vmask vm) { // signed 32-bit <= 64-bit
return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vm)), _mm_set1_ps(0), 0x08)),
_mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80)));
_mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80)));
}
static INLINE vmask vreinterpret_vm_vi64(vint64 v) { return v; }

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -96,7 +96,7 @@ static INLINE int vavailability_i(int name) {
#endif // #if !defined(SLEEF_GENHEADER)
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); }
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); }
static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; }
static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; }
@ -371,10 +371,10 @@ static INLINE float vcast_f_vf(vfloat v) {
//
#define PNMASK ((vdouble) { +0.0, -0.0 })
#define NPMASK ((vdouble) { -0.0, +0.0 })
#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f })
#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f })
#define PNMASK _mm_set_pd( -0.0, +0.0 )
#define NPMASK _mm_set_pd( +0.0, -0.0 )
#define PNMASKf _mm_set_ps( -0.0f, +0.0f, -0.0f, +0.0f )
#define NPMASKf _mm_set_ps( +0.0f, -0.0f, +0.0f, -0.0f )
static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); }
static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); }

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -102,7 +102,7 @@ static INLINE int vavailability_i(int name) {
#endif // #if !defined(SLEEF_GENHEADER)
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); }
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); }
#ifdef __INTEL_COMPILER
static INLINE int vtestallones_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0xff; }

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -257,10 +257,10 @@ static INLINE int vavailability_i(int name) {
}
static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32(__builtin_assume_aligned(ptr, 16)); }
static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32((const float32_t*)__builtin_assume_aligned(ptr, 16)); }
static INLINE vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); }
static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(__builtin_assume_aligned(ptr, 16), v); }
static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32((float32_t*)__builtin_assume_aligned(ptr, 16), v); }
static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); }
static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) {

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -103,16 +103,16 @@ typedef vquad vargquad;
#define vset__s64(...) ((v__i64) {__VA_ARGS__})
#define vset__u64(...) ((v__u64) {__VA_ARGS__})
#define vsetall__vi(v) vset__vi(v, v)
#define vsetall__vi2(v) vset__vi2(v, v, v, v)
#define vsetall__vi(v) vset__vi((int)v, (int)v)
#define vsetall__vi2(v) vset__vi2((int)v, (int)v, (int)v, (int)v)
#define vsetall__vm(v) vset__vm(v, v, v, v)
#define vsetall__vo(v) vset__vo(v, v, v, v)
#define vsetall__vf(v) vset__vf(v, v, v, v)
#define vsetall__vd(v) vset__vd(v, v)
#define vsetall__u8(v) vset__u8(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v)
#define vsetall__u32(v) vset__u32(v, v, v, v)
#define vsetall__s64(v) vset__s64(v, v)
#define vsetall__u64(v) vset__u64(v, v)
#define vsetall__vf(v) vset__vf((float)v, (float)v, (float)v, (float)v)
#define vsetall__vd(v) vset__vd((double)v, (double)v)
#define vsetall__u8(v) vset__u8((uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v, (uint8_t)v)
#define vsetall__u32(v) vset__u32((uint32_t)v, (uint32_t)v, (uint32_t)v, (uint32_t)v)
#define vsetall__s64(v) vset__s64((int64_t)v, (int64_t)v)
#define vsetall__u64(v) vset__u64((uint64_t)v, (uint64_t)v)
#define vzero__vi() vsetall__vi(0)
#define vzero__vi2() vsetall__vi2(0)
@ -351,7 +351,7 @@ static INLINE vmask vcastu_vm_vi(vint vi)
static INLINE vopmask vcast_vo_i(int i) {
i = i ? -1 : 0;
return (vopmask) { i, i, i, i };
return (vopmask) { (unsigned int)i, (unsigned int)i, (unsigned int)i, (unsigned int)i };
}
// signed int to single-precision
@ -371,7 +371,7 @@ static INLINE vdouble vcast_vd_vi(vint vi)
{
vdouble ret;
vint swap = vec_mergeh(vi, vi);
#if defined(__clang__) || __GNUC__ >= 7
#if defined(__clang__) || (__GNUC__ >= 7 && __GNUC__ < 15)
ret = __builtin_vsx_xvcvsxwdp(swap);
#else
__asm__ __volatile__("xvcvsxwdp %x0,%x1" : "=wa" (ret) : "wa" (swap));
@ -406,7 +406,7 @@ static INLINE vint2 vtruncate_vi2_vf(vfloat vf)
static INLINE vint vtruncate_vi_vd(vdouble vd)
{
vint ret;
#if defined(__clang__) || __GNUC__ >= 7
#if defined(__clang__) || (__GNUC__ >= 7 && __GNUC__ < 15)
ret = __builtin_vsx_xvcvdpsxws(vd);
#else
__asm__ __volatile__("xvcvdpsxws %x0,%x1" : "=wa" (ret) : "wa" (vd));
@ -860,11 +860,11 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
#define vsrl64_vm_vm_i(x, c) ((vmask)vec_sr((__vector signed long long)x, (__vector unsigned long long)vsetall__vm(c)))
static INLINE vint vcast_vi_vm(vmask vm) {
return (vint) { vm[0], vm[2] };
return (vint) { (int)vm[0], (int)vm[2] };
}
static INLINE vmask vcast_vm_vi(vint vi) {
return (vmask) (__vector signed long long) { vi[0], vi[1] };
return (vmask) (__vector signed long long) { (signed long long)vi[0], (signed long long)vi[1] };
}
static INLINE vmask vreinterpret_vm_vi64(vint64 v) { return (vmask)v; }

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2023.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -426,7 +426,7 @@ static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { *ptr = v; }
static vquad loadu_vq_p(void *p) {
vquad vq;
memcpy(8 + (char *)&vq, p, 8);
memcpy((char *)&vq, 8 + p, 8);
memcpy((char *)&vq, 8 + (char *)p, 8);
return vq;
}

View File

@ -91,6 +91,7 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wuninitialized"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
static INLINE vfloat64m1x4_t __riscv_vcreate_v_f64m1x4(vfloat64m1_t x, vfloat64m1_t y, vfloat64m1_t z, vfloat64m1_t w) {
vfloat64m1x4_t unused;
return __riscv_vset(__riscv_vset(__riscv_vset(__riscv_vset(unused, 0, x), 1, y), 2, z), 3, w);
@ -158,14 +159,14 @@ typedef vfloat64m1x4_t tdi_t;
#define SLEEF_RVV_SP_LMUL 1
#define SLEEF_RVV_DP_LMUL 1
#define SLEEF_RVV_DP_RUNTIME_VL() __riscv_vsetvlmax_e64m1()
#define SLEEF_RVV_DP_RUNTIME_VL() ((int)__riscv_vsetvlmax_e64m1())
#if SLEEF_RVV_VLEN == 0
// The configuration didn't provide a constant vector length, meaning it'll
// have to be determined at run-time. RVV offers per-data-width operations for
// this so the result doesn't need to be adjusted and that operation is likely
// to fold into the surrounding code for free.
//
#define VECTLENSP (__riscv_vsetvlmax_e32m1())
#define VECTLENSP ((int)__riscv_vsetvlmax_e32m1())
#define VECTLENDP SLEEF_RVV_DP_RUNTIME_VL()
//@#define VECTLENSP __riscv_vsetvlmax_e32m1()
//@#define VECTLENDP __riscv_vsetvlmax_e64m1()
@ -268,7 +269,7 @@ typedef vfloat64m2x4_t tdi_t;
#define SLEEF_RVV_SP_LMUL 2
#define SLEEF_RVV_DP_LMUL 2
#define SLEEF_RVV_DP_RUNTIME_VL() __riscv_vsetvlmax_e64m2()
#define SLEEF_RVV_DP_RUNTIME_VL() ((int)__riscv_vsetvlmax_e64m2())
#if SLEEF_RVV_VLEN == 0
// The configuration didn't provide a constant vector length, meaning it'll
// have to be determined at run-time. RVV offers per-data-width operations for
@ -605,7 +606,7 @@ static INLINE vmask vreinterpret_vm_vf(vfloat vf) {
// needed.
//
static INLINE int vtestallones_i_vo32(rvv_sp_vopmask g) {
return __riscv_vcpop(g, VECTLENSP) == VECTLENSP;
return (int)__riscv_vcpop(g, VECTLENSP) == (int)VECTLENSP;
}
static INLINE vmask vor_vm_vo32_vm(rvv_sp_vopmask x, vmask y) {
rvv_vmask32 y32 = SLEEF_RVV_SP_VREINTERPRET_VM(y);
@ -1080,7 +1081,7 @@ static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(rvv_dp_vopmask o0, rvv_dp_vopmask
return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d3), d2, o2, VECTLENDP), d1, o1, VECTLENDP), d0, o0, VECTLENDP);
}
static INLINE int vtestallones_i_vo64(rvv_dp_vopmask g) {
return __riscv_vcpop(g, VECTLENDP) == VECTLENDP;
return (int)__riscv_vcpop(g, VECTLENDP) == (int)VECTLENDP;
}
// integer comparison
static INLINE rvv_dp_vopmask veq_vo_vi_vi(vint x, vint y) {
@ -1171,7 +1172,7 @@ static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdoub
// probably only iterate 2 or 4 times.
//
ptr += offset * 2;
for (int i = 0; i < VECTLENDP; i += 2) {
for (int i = 0; i < (int)VECTLENDP; i += 2) {
// PROTIP: Avoid modifying `v` within the loop, and just extract the useful
// part directly in each iteration, because we can. This avoids a
// loop-carried dependency.
@ -1185,7 +1186,7 @@ static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdoub
static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) {
// as above re: looping
ptr += offset * 2;
for (int i = 0; i < VECTLENSP; i += 2) {
for (int i = 0; i < (int)VECTLENSP; i += 2) {
vfloat vv = __riscv_vslidedown(v, i, 2);
__riscv_vse32(ptr, vv, 2);
ptr += step * 2;

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -78,7 +78,7 @@ typedef vquad vargquad;
static INLINE int vavailability_i(int n) {
if (n == 1 || n == 2) {
return vec_max((vdouble) {n, n}, (vdouble) {n, n})[0] != 0;
return vec_max((vdouble) {(double)n, (double)n}, (vdouble) {(double)n, (double)n})[0] != 0;
}
return 0;
}
@ -127,23 +127,23 @@ static INLINE vfloat vgather_vf_p_vi2(const float *p, vint2 vi2) {
return ((vfloat) { p[vi2[0]], p[vi2[1]], p[vi2[2]], p[vi2[3]] });
}
static INLINE vopmask vcast_vo_i(int i) { return (vopmask) { i ? (long long)-1 : 0, i ? (long long)-1 : 0 }; }
static INLINE vopmask vcast_vo_i(int i) { return (vopmask) { i ? (unsigned long long)-1 : 0, i ? (unsigned long long)-1 : 0 }; }
static INLINE vint vcast_vi_i(int i) { return (vint) { i, i }; }
static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i }; }
static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f }; }
static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d }; }
static INLINE vdouble vcast_vd_vi(vint vi) { return (vdouble) { vi[0], vi[1] }; }
static INLINE vfloat vcast_vf_vi2(vint2 vi) { return (vfloat) { vi[0], vi[1], vi[2], vi[3] }; }
static INLINE vdouble vcast_vd_vi(vint vi) { return (vdouble) { (double)vi[0], (double)vi[1] }; }
static INLINE vfloat vcast_vf_vi2(vint2 vi) { return (vfloat) { (float)vi[0], (float)vi[1], (float)vi[2], (float)vi[3] }; }
static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return __builtin_s390_vfidb(vd, 4, 5); }
static INLINE vdouble vrint_vd_vd(vdouble vd) { return __builtin_s390_vfidb(vd, 4, 4); }
static INLINE vint vrint_vi_vd(vdouble vd) {
vd = vrint_vd_vd(vd);
return (vint) { vd[0], vd[1] };
return (vint) { (int)vd[0], (int)vd[1] };
}
static INLINE vint vtruncate_vi_vd(vdouble vd) { return (vint) { vd[0], vd[1] }; }
static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return (vint) { vf[0], vf[1], vf[2], vf[3] }; }
static INLINE vint vtruncate_vi_vd(vdouble vd) { return (vint) { (int)vd[0], (int)vd[1] }; }
static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return (vint) { (int)vf[0], (int)vf[1], (int)vf[2], (int)vf[3] }; }
static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (vmask)vd; }
static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (vdouble)vm; }
@ -202,7 +202,7 @@ static INLINE vmask vcast_vm_i64(int64_t i) { return (vmask)(vint64){ i, i }; }
static INLINE vmask vcast_vm_u64(uint64_t i) { return (vmask)(vuint64){ i, i }; }
static INLINE vmask vcastu_vm_vi(vint vi) { return (vmask)(vint2){ vi[0], 0, vi[1], 0 }; }
static INLINE vint vcastu_vi_vm(vmask vi2) { return (vint){ vi2[0] >> 32, vi2[1] >> 32 }; }
static INLINE vint vcastu_vi_vm(vmask vi2) { return (vint){ (int)(vi2[0] >> 32), (int)(vi2[1] >> 32) }; }
static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1] }; }
static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], 0, 0 }; }
@ -309,8 +309,8 @@ static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return x ^ y; }
static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vreinterpretFirstHalf_vi_vi2((vint2)x) & y; }
static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return vec_andc(y, vreinterpretFirstHalf_vi_vi2((vint2)x)); }
static INLINE vint vsll_vi_vi_i(vint x, int c) { return (vint)(((__vector unsigned int)x) << (__vector unsigned int){c, c, c, c}); }
static INLINE vint vsrl_vi_vi_i(vint x, int c) { return (vint)(((__vector unsigned int)x) >> (__vector unsigned int){c, c, c, c}); }
static INLINE vint vsll_vi_vi_i(vint x, int c) { return (vint)(((__vector unsigned int)x) << (__vector unsigned int){(unsigned int)c, (unsigned int)c, (unsigned int)c, (unsigned int)c}); }
static INLINE vint vsrl_vi_vi_i(vint x, int c) { return (vint)(((__vector unsigned int)x) >> (__vector unsigned int){(unsigned int)c, (unsigned int)c, (unsigned int)c, (unsigned int)c}); }
static INLINE vint vsra_vi_vi_i(vint x, int c) { return x >> (__vector int){c, c, c, c}; }
static INLINE vint veq_vi_vi_vi(vint x, vint y) { return vec_cmpeq(x, y); }
@ -364,8 +364,8 @@ static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return x ^ y; }
static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)x & y; }
static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return y & ~(vint2)x; }
static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return (vint2)(((__vector unsigned int)x) << (__vector unsigned int){c, c, c, c}); }
static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return (vint2)(((__vector unsigned int)x) >> (__vector unsigned int){c, c, c, c}); }
static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return (vint2)(((__vector unsigned int)x) << (__vector unsigned int){(unsigned int)c, (unsigned int)c, (unsigned int)c, (unsigned int)c}); }
static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return (vint2)(((__vector unsigned int)x) >> (__vector unsigned int){(unsigned int)c, (unsigned int)c, (unsigned int)c, (unsigned int)c}); }
static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return x >> (__vector int){c, c, c, c}; }
static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)vec_cmpeq(x, y); }
@ -405,7 +405,7 @@ static INLINE vopmask visnan_vo_vf (vfloat d) { return vneq_vo_vf_vf(d, d); }
static INLINE vint2 vrint_vi2_vf(vfloat vf) {
vf = vrint_vf_vf(vf);
return (vint) { vf[0], vf[1], vf[2], vf[3] };
return (vint) { (int)vf[0], (int)vf[1], (int)vf[2], (int)vf[3] };
}
//
@ -445,11 +445,11 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
return (vopmask)vec_cmpgt((__vector signed long long)x, (__vector signed long long)y);
}
#define vsll64_vm_vm_i(x, c) ((vmask)((__vector unsigned long long)x << (__vector unsigned long long) { c, c }))
#define vsrl64_vm_vm_i(x, c) ((vmask)((__vector unsigned long long)x >> (__vector unsigned long long) { c, c }))
#define vsll64_vm_vm_i(x, c) ((vmask)((__vector unsigned long long)x << (__vector unsigned long long) { (unsigned long long)c, (unsigned long long)c }))
#define vsrl64_vm_vm_i(x, c) ((vmask)((__vector unsigned long long)x >> (__vector unsigned long long) { (unsigned long long)c, (unsigned long long)c }))
static INLINE vint vcast_vi_vm(vmask vm) {
return (vint) { vm[0], vm[1] };
return (vint) { (int)vm[0], (int)vm[1] };
}
static INLINE vmask vcast_vm_vi(vint vi) {

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -124,7 +124,7 @@ static INLINE int vavailability_i(int name) {
#endif // #if !defined(SLEEF_GENHEADER)
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); }
static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); }
static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; }
static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; }
@ -420,10 +420,10 @@ static INLINE float vcast_f_vf(vfloat v) {
//
#define PNMASK ((vdouble) { +0.0, -0.0 })
#define NPMASK ((vdouble) { -0.0, +0.0 })
#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f })
#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f })
#define PNMASK _mm_set_pd( -0.0, +0.0 )
#define NPMASK _mm_set_pd( +0.0, -0.0 )
#define PNMASKf _mm_set_ps( -0.0f, +0.0f, -0.0f, +0.0f )
#define NPMASKf _mm_set_ps( +0.0f, -0.0f, +0.0f, -0.0f )
static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); }
static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); }

View File

@ -22,9 +22,9 @@
#if CONFIG == 1 || CONFIG == 2
// Vector length agnostic
#define VECTLENSP (svcntw())
#define VECTLENSP ((int)svcntw())
//@#define VECTLENSP (svcntw())
#define VECTLENDP (svcntd())
#define VECTLENDP ((int)svcntd())
//@#define VECTLENDP (svcntd())
#define ISANAME "AArch64 SVE"
#define ptrue svptrue_b8()

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -16,10 +16,49 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}")
add_library(${TARGET_LIBCOMMON_OBJ} OBJECT common.c)
set_target_properties(${TARGET_LIBCOMMON_OBJ} PROPERTIES ${COMMON_TARGET_PROPERTIES})
# Target TARGET_LIBARRAYMAP_OBJ
add_library(${TARGET_LIBARRAYMAP_OBJ} OBJECT arraymap.c)
set_target_properties(${TARGET_LIBARRAYMAP_OBJ} PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_host_executable("addSuffix" addSuffix.c)
set_target_properties("addSuffix" PROPERTIES C_STANDARD 99)
if (NOT SLEEF_OPENSSL_FOUND)
add_library(${TARGET_PSHA_OBJ} OBJECT psha2_capi.cpp)
else()
# Tests for internal sha256
add_executable(test_psha test_psha2.cpp)
target_link_libraries(test_psha ${SLEEF_OPENSSL_LIBRARIES})
target_include_directories(test_psha PRIVATE ${SLEEF_OPENSSL_INCLUDE_DIR})
add_test(NAME test_psha COMMAND test_psha)
set_tests_properties(test_psha PROPERTIES COST 2.0)
add_executable(test_psha_capi test_psha2.cpp)
target_compile_definitions(test_psha_capi PRIVATE TEST_CAPI=1)
target_link_libraries(test_psha_capi ${SLEEF_OPENSSL_LIBRARIES})
target_include_directories(test_psha_capi PRIVATE ${SLEEF_OPENSSL_INCLUDE_DIR})
add_test(NAME test_psha_capi COMMAND test_psha_capi)
set_tests_properties(test_psha_capi PROPERTIES COST 2.0)
endif()
# Target TARGET_TESTERUTIL_OBJ
add_library(${TARGET_TESTERUTIL_OBJ} OBJECT testerutil.c)
target_compile_definitions(${TARGET_TESTERUTIL_OBJ} PRIVATE ${COMMON_TARGET_DEFINITIONS})
if(LIB_MPFR)
target_compile_definitions(${TARGET_TESTERUTIL_OBJ} PRIVATE USEMPFR=1)
target_link_libraries(${TARGET_TESTERUTIL_OBJ} ${LIB_MPFR} ${LIBGMP})
endif()
if (MPFR_INCLUDE_DIR)
target_include_directories(${TARGET_TESTERUTIL_OBJ} PRIVATE ${MPFR_INCLUDE_DIR})
endif()
# Target TARGET_QTESTERUTIL_OBJ
add_library(${TARGET_QTESTERUTIL_OBJ} OBJECT qtesterutil.c)
target_compile_definitions(${TARGET_QTESTERUTIL_OBJ} PRIVATE ${COMMON_TARGET_DEFINITIONS})
if(LIB_MPFR)
target_compile_definitions(${TARGET_QTESTERUTIL_OBJ} PRIVATE USEMPFR=1)
target_link_libraries(${TARGET_QTESTERUTIL_OBJ} ${LIB_MPFR} ${LIBGMP})
endif()
if (MPFR_INCLUDE_DIR)
target_include_directories(${TARGET_QTESTERUTIL_OBJ} PRIVATE ${MPFR_INCLUDE_DIR})
endif()
if(COMPILER_SUPPORTS_QUADMATH)
target_link_libraries(${TARGET_QTESTERUTIL_OBJ} "-lquadmath")
target_compile_definitions(${TARGET_QTESTERUTIL_OBJ} PRIVATE ENABLEFLOAT128=1)
endif()

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -207,7 +207,18 @@ int main(int argc, char **argv) {
nkeywords++;
if (nkeywords >= nalloc) {
nalloc *= 2;
keywords = realloc(keywords, sizeof(char *) * nalloc);
char ** tmp = realloc(keywords, sizeof(char *) * nalloc);
if (tmp == NULL) {
// free keywords if realloc fails
// otherwise address is lost.
free(keywords);
fclose(fp);
fprintf(stderr, "Failed realloc!\n");
exit(-1);
}
else {
keywords = tmp;
}
}
}
@ -228,6 +239,10 @@ int main(int argc, char **argv) {
fclose(fp);
for(int i=0;i<nkeywords;i++) free(keywords[i]);
free(keywords);
exit(0);
}

View File

@ -1,347 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <inttypes.h>
#include <assert.h>
//
#if !(defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER))
#include <unistd.h>
#include <sys/types.h>
#include <sys/file.h>
static void FLOCK(FILE *fp) { flock(fileno(fp), LOCK_EX); }
static void FUNLOCK(FILE *fp) { flock(fileno(fp), LOCK_UN); }
static void FTRUNCATE(FILE *fp, off_t z) {
if (ftruncate(fileno(fp), z))
;
}
static FILE *OPENTMPFILE() { return tmpfile(); }
static void CLOSETMPFILE(FILE *fp) { fclose(fp); }
#else
#include <windows.h>
#include <io.h>
static void FLOCK(FILE *fp) { }
static void FUNLOCK(FILE *fp) { }
static void FTRUNCATE(FILE *fp, long z) {
fseek(fp, 0, SEEK_SET);
SetEndOfFile((HANDLE)_get_osfhandle(_fileno(fp)));
}
static FILE *OPENTMPFILE() { return fopen("tmpfile.txt", "w+"); }
static void CLOSETMPFILE(FILE *fp) {
fclose(fp);
remove("tmpfile.txt");
}
#endif
//
#define MAGIC_ARRAYMAPNODE 0xf73130fa
#define MAGIC_ARRAYMAP 0x8693bd21
#define LOGNBUCKETS 8
#define NBUCKETS (1 << LOGNBUCKETS)
static int hash(uint64_t key) {
return (key ^ (key >> LOGNBUCKETS) ^ (key >> (LOGNBUCKETS*2)) ^ (key >> (LOGNBUCKETS*3))) & (NBUCKETS-1);
}
static void String_trim(char *str) {
char *dst = str, *src = str, *pterm = src;
while(*src != '\0' && isspace((int)*src)) src++;
for(;*src != '\0';src++) {
*dst++ = *src;
if (!isspace((int)*src)) pterm = dst;
}
*pterm = '\0';
}
typedef struct ArrayMapNode {
uint32_t magic;
uint64_t key;
void *value;
} ArrayMapNode;
typedef struct ArrayMap {
uint32_t magic;
ArrayMapNode *array[NBUCKETS];
int size[NBUCKETS], capacity[NBUCKETS], totalSize;
} ArrayMap;
ArrayMap *initArrayMap() {
ArrayMap *thiz = (ArrayMap *)calloc(1, sizeof(ArrayMap));
thiz->magic = MAGIC_ARRAYMAP;
for(int i=0;i<NBUCKETS;i++) {
thiz->capacity[i] = 8;
thiz->array[i] = (ArrayMapNode *)malloc(thiz->capacity[i] * sizeof(ArrayMapNode));
thiz->size[i] = 0;
}
thiz->totalSize = 0;
return thiz;
}
void ArrayMap_dispose(ArrayMap *thiz) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
for(int j=0;j<NBUCKETS;j++) {
for(int i=0;i<thiz->size[j];i++) {
assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE);
thiz->array[j][i].magic = 0;
}
free(thiz->array[j]);
}
thiz->magic = 0;
free(thiz);
}
int ArrayMap_size(ArrayMap *thiz) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
return thiz->totalSize;
}
uint64_t *ArrayMap_keyArray(ArrayMap *thiz) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
uint64_t *a = (uint64_t *)malloc(sizeof(uint64_t) * thiz->totalSize);
int p = 0;
for(int j=0;j<NBUCKETS;j++) {
for(int i=0;i<thiz->size[j];i++) {
assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE);
a[p++] = thiz->array[j][i].key;
}
}
return a;
}
void **ArrayMap_valueArray(ArrayMap *thiz) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
void **a = (void **)malloc(sizeof(void *) * thiz->totalSize);
int p = 0;
for(int j=0;j<NBUCKETS;j++) {
for(int i=0;i<thiz->size[j];i++) {
assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE);
a[p++] = thiz->array[j][i].value;
}
}
return a;
}
void *ArrayMap_remove(ArrayMap *thiz, uint64_t key) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
int h = hash(key);
for(int i=0;i<thiz->size[h];i++) {
assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE);
if (thiz->array[h][i].key == key) {
void *old = thiz->array[h][i].value;
thiz->array[h][i].key = thiz->array[h][thiz->size[h]-1].key;
thiz->array[h][i].value = thiz->array[h][thiz->size[h]-1].value;
thiz->array[h][thiz->size[h]-1].magic = 0;
thiz->size[h]--;
thiz->totalSize--;
return old;
}
}
return NULL;
}
void *ArrayMap_put(ArrayMap *thiz, uint64_t key, void *value) {
if (value == NULL) return ArrayMap_remove(thiz, key);
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
int h = hash(key);
for(int i=0;i<thiz->size[h];i++) {
assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE);
if (thiz->array[h][i].key == key) {
void *old = thiz->array[h][i].value;
thiz->array[h][i].value = value;
return old;
}
}
if (thiz->size[h] >= thiz->capacity[h]) {
thiz->capacity[h] *= 2;
thiz->array[h] = (ArrayMapNode *)realloc(thiz->array[h], thiz->capacity[h] * sizeof(ArrayMapNode));
}
ArrayMapNode *n = &(thiz->array[h][thiz->size[h]++]);
n->magic = MAGIC_ARRAYMAPNODE;
n->key = key;
n->value = value;
thiz->totalSize++;
return NULL;
}
void *ArrayMap_get(ArrayMap *thiz, uint64_t key) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
int h = hash(key);
for(int i=0;i<thiz->size[h];i++) {
assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE);
if (thiz->array[h][i].key == key) {
return thiz->array[h][i].value;
}
}
return NULL;
}
#define LINELEN (1024*1024)
ArrayMap *ArrayMap_load(const char *fn, const char *prefix, const char *idstr, int doLock) {
const int idstrlen = (int)strlen(idstr);
int prefixLen = (int)strlen(prefix) + 3;
if (prefixLen >= LINELEN-10 || idstrlen >= LINELEN-10) return NULL;
FILE *fp = fopen(fn, "r");
if (fp == NULL) return NULL;
if (doLock) FLOCK(fp);
ArrayMap *thiz = initArrayMap();
char *prefix2 = malloc(prefixLen+10);
strcpy(prefix2, prefix);
String_trim(prefix2);
for(char *p = prefix2;*p != '\0';p++) {
if (*p == ':') *p = ';';
if (*p == ' ') *p = '_';
}
strcat(prefix2, " : ");
prefixLen = (int)strlen(prefix2);
char *line = malloc(sizeof(char) * (LINELEN+10));
line[idstrlen] = '\0';
if (fread(line, sizeof(char), idstrlen, fp) != idstrlen ||
strcmp(idstr, line) != 0) {
if (doLock) FUNLOCK(fp);
fclose(fp);
free(prefix2);
free(line);
return NULL;
}
for(;;) {
line[LINELEN] = '\0';
if (fgets(line, LINELEN, fp) == NULL) break;
if (strncmp(line, prefix2, prefixLen) != 0) continue;
uint64_t key;
char *value = malloc(sizeof(char) * LINELEN);
if (sscanf(line + prefixLen, "%" SCNx64 " : %s\n", &key, value) == 2) {
ArrayMap_put(thiz, (uint64_t)key, (void *)value);
} else {
free(value);
}
}
if (doLock) FUNLOCK(fp);
fclose(fp);
free(prefix2);
free(line);
return thiz;
}
int ArrayMap_save(ArrayMap *thiz, const char *fn, const char *prefix, const char *idstr) {
assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP);
const int idstrlen = (int)strlen(idstr);
int prefixLen = (int)strlen(prefix) + 3;
if (prefixLen >= LINELEN-10 || idstrlen >= LINELEN-10) return -1;
// Generate prefix2
char *prefix2 = malloc(prefixLen+10);
strcpy(prefix2, prefix);
String_trim(prefix2);
for(char *p = prefix2;*p != '\0';p++) {
if (*p == ':') *p = ';';
if (*p == ' ') *p = '_';
}
strcat(prefix2, " : ");
prefixLen = (int)strlen(prefix2);
//
FILE *fp = fopen(fn, "a+");
if (fp == NULL) return -1;
FLOCK(fp);
fseek(fp, 0, SEEK_SET);
// Copy the file specified by fn to tmpfile
FILE *tmpfp = OPENTMPFILE();
if (tmpfp == NULL) {
FUNLOCK(fp);
fclose(fp);
return -1;
}
char *line = malloc(sizeof(char) * (LINELEN+10));
line[idstrlen] = '\0';
if (fread(line, sizeof(char), idstrlen, fp) == idstrlen && strcmp(idstr, line) == 0) {
for(;;) {
line[LINELEN] = '\0';
if (fgets(line, LINELEN, fp) == NULL) break;
if (strncmp(line, prefix2, prefixLen) != 0) fputs(line, tmpfp);
}
}
// Write the contents in the map into tmpfile
uint64_t *keys = ArrayMap_keyArray(thiz);
int s = ArrayMap_size(thiz);
for(int i=0;i<s;i++) {
char *value = ArrayMap_get(thiz, keys[i]);
if (strlen(value) + prefixLen >= LINELEN-10) continue;
fprintf(tmpfp, "%s %" PRIx64 " : %s\n", prefix2, keys[i], value);
}
free(keys);
fseek(fp, 0, SEEK_SET);
FTRUNCATE(fp, 0);
fwrite(idstr, sizeof(char), strlen(idstr), fp);
fseek(tmpfp, 0, SEEK_SET);
for(;;) {
size_t s = fread(line, 1, LINELEN, tmpfp);
if (s == 0) break;
fwrite(line, 1, s, fp);
}
FUNLOCK(fp);
fclose(fp);
CLOSETMPFILE(tmpfp);
free(prefix2);
free(line);
return 0;
}

View File

@ -1,21 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef __ARRAYMAP_H__
#define __ARRAYMAP_H__
typedef struct ArrayMap ArrayMap;
ArrayMap *initArrayMap();
void ArrayMap_dispose(ArrayMap *thiz);
int ArrayMap_size(ArrayMap *thiz);
void *ArrayMap_remove(ArrayMap *thiz, uint64_t key);
void *ArrayMap_put(ArrayMap *thiz, uint64_t key, void *value);
void *ArrayMap_get(ArrayMap *thiz, uint64_t key);
uint64_t *ArrayMap_keyArray(ArrayMap *thiz);
void **ArrayMap_valueArray(ArrayMap *thiz);
int ArrayMap_save(ArrayMap *thiz, const char *fn, const char *prefix, const char *idstr);
ArrayMap *ArrayMap_load(const char *fn, const char *prefix, const char *idstr, int doLock);
#endif

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,9 +1,20 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef __COMMON_H__
#define __COMMON_H__
char *Sleef_getCpuIdString();
#ifdef __cplusplus
extern "C"
{
#endif
char *Sleef_getCpuIdString();
#ifdef __cplusplus
}
#endif
#endif // #ifndef __COMMON_H__

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2023.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -415,7 +415,7 @@ static INLINE CONST vquad add128_vq_vq_vq(vquad x, vquad y) {
static INLINE CONST vquad imdvq_vq_vm_vm(vmask x, vmask y) { vquad r = vqsetxy_vq_vm_vm(x, y); return r; }
// imm must be smaller than 64
#define srl128_vq_vq_i(m, imm) \
#define srl128_vq_vq_i(m, imm) \
imdvq_vq_vm_vm(vor_vm_vm_vm(vsrl64_vm_vm_i(vqgetx_vm_vq(m), imm), vsll64_vm_vm_i(vqgety_vm_vq(m), 64-imm)), vsrl64_vm_vm_i(vqgety_vm_vq(m), imm))
// This function is equivalent to :

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2024.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2024.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,92 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <quadmath.h>
#include <inttypes.h>
static __float128 mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd) {
if (isnan(mpfr_get_d(m, GMP_RNDN))) return __builtin_nan("");
mpfr_t frr, frd;
mpfr_inits(frr, frd, NULL);
mpfr_exp_t e;
mpfr_frexp(&e, frr, m, GMP_RNDN);
double d0 = mpfr_get_d(frr, GMP_RNDN);
mpfr_set_d(frd, d0, GMP_RNDN);
mpfr_sub(frr, frr, frd, GMP_RNDN);
double d1 = mpfr_get_d(frr, GMP_RNDN);
mpfr_set_d(frd, d1, GMP_RNDN);
mpfr_sub(frr, frr, frd, GMP_RNDN);
double d2 = mpfr_get_d(frr, GMP_RNDN);
mpfr_clears(frr, frd, NULL);
return ldexpq((__float128)d2 + (__float128)d1 + (__float128)d0, e);
}
static void mpfr_set_f128(mpfr_t frx, __float128 f, mpfr_rnd_t rnd) {
char s[128];
quadmath_snprintf(s, 120, "%.50Qg", f);
mpfr_set_str(frx, s, 10, rnd);
}
static void printf128(__float128 f) {
char s[128];
quadmath_snprintf(s, 120, "%.50Qg", f);
printf("%s", s);
}
static char frstr[16][1000];
static int frstrcnt = 0;
static char *toBC(double d) {
union {
double d;
uint64_t u64;
int64_t i64;
} cnv;
cnv.d = d;
int64_t l = cnv.i64;
int e = (int)((l >> 52) & ~(-1L << 11));
int s = (int)(l >> 63);
l = d == 0 ? 0 : ((l & ~((-1L) << 52)) | (1L << 52));
char *ptr = frstr[(frstrcnt++) & 15];
sprintf(ptr, "%s%lld*2^%d", s != 0 ? "-" : "", (long long int)l, (e-0x3ff-52));
return ptr;
}
static char *toBCq(__float128 d) {
union {
__float128 d;
__uint128_t u128;
} cnv;
cnv.d = d;
__uint128_t m = cnv.u128;
int e = (int)((m >> 112) & ~(-1L << 15));
int s = (int)(m >> 127);
m = d == 0 ? 0 : ((m & ((((__uint128_t)1) << 112)-1)) | ((__uint128_t)1 << 112));
uint64_t h = m / UINT64_C(10000000000000000000);
uint64_t l = m % UINT64_C(10000000000000000000);
char *ptr = frstr[(frstrcnt++) & 15];
sprintf(ptr, "%s%" PRIu64 "%019" PRIu64 "*2^%d", s != 0 ? "-" : "", h, l, (e-0x3fff-112));
return ptr;
}
static int xisnanq(Sleef_quad x) { return x != x; }
static int xisinfq(Sleef_quad x) { return x == (Sleef_quad)__builtin_inf() || x == -(Sleef_quad)__builtin_inf(); }
static int xisfiniteq(Sleef_quad x) { return !xisnanq(x) && !isinfq(x); }

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2024.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -13,10 +13,15 @@
#include <string.h>
#endif
#ifndef M_PI
#define M_PI 3.141592653589793238462643383279502884
#endif
#ifndef M_PIf
# define M_PIf ((float)M_PI)
#endif
#ifndef M_PIl
#define M_PIl 3.141592653589793238462643383279502884L
#endif
@ -137,9 +142,17 @@
#define L2Lf 1.428606765330187045e-06f
#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
#ifndef M_PIf
# define M_PIf ((float)M_PI)
#endif
// Overflow bounds
// - exp(x) overflows for x over (also used in pow)
#define LOG_DBL_MAX 0x1.62e42fefa39efp+9 /* 709.782712893384 */
// Other bounds
// - log1p(f)(x) approximation holds up to x equals
#define LOG1PF_BOUND 0x1.2ced32p+126 /* 1.0e+38 */
#define LOG1P_BOUND 0x1.c7b1f3cac7433p+1019 /* 1.0e+307 */
//
@ -249,6 +262,9 @@ typedef struct {
#else // #if defined(SLEEF_GENHEADER)
#define INLINE __forceinline
#ifdef CONST
#undef CONST
#endif
#define CONST
#ifndef SLEEF_STATIC_LIBS
#define EXPORT __declspec(dllexport)

View File

@ -0,0 +1,182 @@
#ifndef __PSHA2_HPP_INCLUDED__
#define __PSHA2_HPP_INCLUDED__
#include <cstddef>
#include <cstdint>
struct PSHA2_256_Internal {
// https://github.com/983/SHA-256
// This is public domain implementation of SHA256
static inline uint32_t rotr(uint32_t x, int n) {
return (x >> n) | (x << (32 - n));
}
static inline uint32_t step1(uint32_t e, uint32_t f, uint32_t g) {
return (rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)) + ((e & f) ^ ((~ e) & g));
}
static inline uint32_t step2(uint32_t a, uint32_t b, uint32_t c) {
return (rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22)) + ((a & b) ^ (a & c) ^ (b & c));
}
static inline void update_w(uint32_t *w, int i, const uint8_t *buffer) {
int j;
for(j = 0;j < 16;j++) {
if (i < 16) {
w[j] =
((uint32_t)buffer[0] << 24) |
((uint32_t)buffer[1] << 16) |
((uint32_t)buffer[2] << 8) |
((uint32_t)buffer[3]);
buffer += 4;
} else {
uint32_t a = w[(j + 1) & 15];
uint32_t b = w[(j + 14) & 15];
uint32_t s0 = (rotr(a, 7) ^ rotr(a, 18) ^ (a >> 3));
uint32_t s1 = (rotr(b, 17) ^ rotr(b, 19) ^ (b >> 10));
w[j] += w[(j + 9) & 15] + s0 + s1;
}
}
}
uint32_t state[8];
uint64_t n_bits;
uint8_t buffer_counter;
uint8_t buffer[64];
PSHA2_256_Internal() {
state[0] = 0x6a09e667;
state[1] = 0xbb67ae85;
state[2] = 0x3c6ef372;
state[3] = 0xa54ff53a;
state[4] = 0x510e527f;
state[5] = 0x9b05688c;
state[6] = 0x1f83d9ab;
state[7] = 0x5be0cd19;
n_bits = 0;
buffer_counter = 0;
for(int i=0;i<64;i++) buffer[i] = 0;
}
void block() {
static const uint32_t k[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};
uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
uint32_t e = state[4];
uint32_t f = state[5];
uint32_t g = state[6];
uint32_t h = state[7];
uint32_t w[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
for(int i = 0;i < 64;i += 16) {
update_w(w, i, buffer);
#if defined(__clang__)
#pragma clang loop unroll(full)
#endif
for(int j = 0;j < 16;j += 4) {
uint32_t temp;
temp = h + step1(e, f, g) + k[i + j + 0] + w[j + 0];
h = temp + d;
d = temp + step2(a, b, c);
temp = g + step1(h, e, f) + k[i + j + 1] + w[j + 1];
g = temp + c;
c = temp + step2(d, a, b);
temp = f + step1(g, h, e) + k[i + j + 2] + w[j + 2];
f = temp + b;
b = temp + step2(c, d, a);
temp = e + step1(f, g, h) + k[i + j + 3] + w[j + 3];
e = temp + a;
a = temp + step2(b, c, d);
}
}
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
}
void append_byte(uint8_t byte) {
buffer[buffer_counter++] = byte;
n_bits += 8;
if (buffer_counter == 64) {
buffer_counter = 0;
block();
}
}
void append(const void *src, size_t n_bytes) {
for(size_t i = 0;i < n_bytes;i++) {
append_byte(((const uint8_t*)src)[i]);
}
}
void appendWord(const void *src, size_t n_bytes) {
#if !defined(__BYTE_ORDER__) || (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
for(size_t i = 0;i < n_bytes;i++) {
append_byte(((const uint8_t*)src)[i]);
}
#else
for(int i = int(n_bytes)-1;i >= 0;i--) {
append_byte(((const uint8_t*)src)[i]);
}
#endif
}
void finalize() {
uint64_t nb = n_bits;
append_byte(0x80);
while(buffer_counter != 64 - 8) {
append_byte(0);
}
for(int i = 7;i >= 0;i--) {
uint8_t byte = (nb >> 8 * i) & 0xff;
append_byte(byte);
}
}
void finalize_bytes(void *dst_bytes32) {
uint8_t *ptr = (uint8_t*)dst_bytes32;
finalize();
for(int i = 0;i < 8;i++) {
for(int j = 3;j >= 0;j--) {
*ptr++ = (state[i] >> j * 8) & 0xff;
}
}
}
};
#endif // #ifndef __PSHA2_HPP_INCLUDED__

View File

@ -0,0 +1,57 @@
#include "psha2.hpp"
#include "psha2_capi.h"
#include <cstdlib>
const EVP_MD *EVP_sha256(void) {
static const int one[1] = { 1 };
return &one[0];
}
size_t EVP_MD_size(const EVP_MD *e) {
if (*e == 1) return SHA256_DIGEST_LENGTH;
return 0;
}
int EVP_MD_get_size(const EVP_MD *e) {
if (*e == 1) return SHA256_DIGEST_LENGTH;
return 0;
}
EVP_MD_CTX *EVP_MD_CTX_new(void) {
return (EVP_MD_CTX *)calloc(1, sizeof(EVP_MD_CTX));
}
int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl) {
ctx->type = *type;
if (*type == 1) {
ctx->psha_256 = new PSHA2_256_Internal();
return 1;
}
return 0;
}
int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *d, size_t cnt) {
if (ctx->type == 1) {
ctx->psha_256->append(d, cnt);
return 1;
}
return 0;
}
int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *s) {
if (ctx->type == 1) {
ctx->psha_256->finalize_bytes(md);
if (s) *s = SHA256_DIGEST_LENGTH;
return 1;
}
return 0;
}
void EVP_MD_CTX_free(EVP_MD_CTX *ctx) {
if (ctx->type == 1) {
delete ctx->psha_256;
ctx->psha_256 = nullptr;
}
free(ctx);
}

View File

@ -0,0 +1,30 @@
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
static const size_t SHA256_DIGEST_LENGTH = 32;
typedef int EVP_MD;
typedef void ENGINE;
typedef struct {
int type;
union {
struct PSHA2_256_Internal *psha_256;
};
} EVP_MD_CTX;
const EVP_MD *EVP_sha256(void);
int EVP_MD_get_size(const EVP_MD *);
size_t EVP_MD_size(const EVP_MD *);
EVP_MD_CTX *EVP_MD_CTX_new(void);
int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl);
int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *d, size_t cnt);
int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *s);
void EVP_MD_CTX_free(EVP_MD_CTX *ctx);
#ifdef __cplusplus
}
#endif

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -22,6 +22,10 @@
#include <mpfr.h>
#endif
#ifdef ENABLEFLOAT128
#include <quadmath.h>
#endif
#if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER)
#define STDIN_FILENO 0
#else
@ -42,33 +46,6 @@
//
int readln(int fd, char *buf, int cnt) {
int i, rcnt = 0;
if (cnt < 1) return -1;
while(cnt >= 2) {
i = read(fd, buf, 1);
if (i != 1) return i;
if (*buf == '\n') break;
rcnt++;
buf++;
cnt--;
}
*++buf = '\0';
rcnt++;
return rcnt;
}
int startsWith(char *str, char *prefix) {
return strncmp(str, prefix, strlen(prefix)) == 0;
}
//
xuint128 xu(uint64_t h, uint64_t l) {
xuint128 r = { .l = l, .h = h };
return r;
@ -150,31 +127,6 @@ int isnanf128(Sleef_quad a) {
//
static uint64_t xseed;
uint64_t xrand() {
uint64_t u = xseed;
xseed = xseed * UINT64_C(6364136223846793005) + 1;
u = (u & ((~UINT64_C(0)) << 32)) | (xseed >> 32);
xseed = xseed * UINT64_C(6364136223846793005) + 1;
return u;
}
void xsrand(uint64_t s) {
xseed = s;
xrand();
xrand();
xrand();
}
void memrand(void *p, int size) {
uint64_t *q = (uint64_t *)p;
int i;
for(i=0;i<size;i+=8) *q++ = xrand();
uint8_t *r = (uint8_t *)q;
for(;i<size;i++) *r++ = xrand() & 0xff;
}
Sleef_quad rndf128(Sleef_quad min, Sleef_quad max, int setSignRandomly) {
cnv_t cmin = { .q = min }, cmax = { .q = max }, c;
do {
@ -581,6 +533,14 @@ char *sprintf128(Sleef_quad q) {
return ret;
}
#ifdef QUADMATH_H
void printf128(Sleef_quad f) {
char s[128];
quadmath_snprintf(s, 120, "%.50Qg", f);
printf("%s", s);
}
#endif
double cast_d_q(Sleef_quad q) {
mpfr_t fr;
mpfr_inits(fr, NULL);

View File

@ -1,9 +1,14 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include "quaddef.h"
#include "testerutil.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
@ -33,24 +38,6 @@ int isinff128(Sleef_quad a);
int isnonnumberf128(Sleef_quad a);
int isnanf128(Sleef_quad a);
static double u2d(uint64_t u) {
union {
double f;
uint64_t i;
} tmp;
tmp.i = u;
return tmp.f;
}
static uint64_t d2u(double d) {
union {
double f;
uint64_t i;
} tmp;
tmp.f = d;
return tmp.i;
}
#ifdef USEMPFR
void mpfr_set_f128(mpfr_t frx, Sleef_quad a, mpfr_rnd_t rnd);
Sleef_quad mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd);
@ -59,8 +46,16 @@ double countULPf128(Sleef_quad d, mpfr_t c, int checkNegZero);
char *sprintfr(mpfr_t fr);
char *sprintf128(Sleef_quad x);
#ifdef QUADMATH_H
void printf128(Sleef_quad f);
#endif
double cast_d_q(Sleef_quad q);
Sleef_quad cast_q_str(const char *s);
Sleef_quad cast_q_str_hex(const char *s);
Sleef_quad add_q_d(Sleef_quad q, double d);
#endif
#ifdef __cplusplus
}
#endif

View File

@ -1,17 +1,11 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#if !defined(SLEEF_GENHEADER)
#if (defined(__SIZEOF_FLOAT128__) && __SIZEOF_FLOAT128__ == 16) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8)
#define SLEEF_FLOAT128_IS_IEEEQP
#endif
#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__))
#define SLEEF_LONGDOUBLE_IS_IEEEQP
#endif
#include "sleef-config.h"
#if !defined(Sleef_quad_DEFINED)
#define Sleef_quad_DEFINED
@ -74,14 +68,6 @@ typedef union {
#else // #if !defined(SLEEF_GENHEADER)
SLEEFSHARPif !defined(SLEEFXXX__NVCC__) && ((defined(SLEEFXXX__SIZEOF_FLOAT128__) && SLEEFXXX__SIZEOF_FLOAT128__ == 16) || (defined(SLEEFXXX__linux__) && defined(SLEEFXXX__GNUC__) && (defined(SLEEFXXX__i386__) || defined(SLEEFXXX__x86_64__))) || (defined(SLEEFXXX__PPC64__) && defined(SLEEFXXX__GNUC__) && !defined(SLEEFXXX__clang__) && SLEEFXXX__GNUC__ >= 8))
SLEEFSHARPdefine SLEEFXXXSLEEF_FLOAT128_IS_IEEEQP
SLEEFSHARPendif
SLEEFSHARPif !defined(SLEEFXXXSLEEF_FLOAT128_IS_IEEEQP) && !defined(SLEEFXXX__NVCC__) && defined(SLEEFXXX__SIZEOF_LONG_DOUBLE__) && SLEEFXXX__SIZEOF_LONG_DOUBLE__ == 16 && (defined(SLEEFXXX__aarch64__) || defined(SLEEFXXX__zarch__))
SLEEFSHARPdefine SLEEFXXXSLEEF_LONGDOUBLE_IS_IEEEQP
SLEEFSHARPendif
SLEEFSHARPif !defined(SLEEFXXXSleef_quad_DEFINED)
SLEEFSHARPdefine SLEEFXXXSleef_quad_DEFINED
typedef struct { uint64_t x, y; } Sleef_uint64_2t;

View File

@ -0,0 +1,58 @@
#include "psha2.hpp"
#if TEST_CAPI
#include "psha2_capi.h"
#else
#include <openssl/sha.h>
#include <openssl/evp.h>
#endif
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include <ctime>
int main(int argc, char **argv) {
srand(time(NULL));
bool success = true;
for(int i=0;i<10000;i++) {
int len = (rand() + ((int64_t)RAND_MAX + 1) * rand()) % (1 << (1 + (rand() % 18)));
unsigned char *plaintext = (unsigned char *)malloc(len);
for(int i=0;i<len;i++) plaintext[i] = rand() & 0xff;
//
PSHA2_256_Internal psha;
unsigned char dgst0[SHA256_DIGEST_LENGTH];
psha.append(plaintext, len);
psha.finalize_bytes(dgst0);
//
unsigned char dgst1[SHA256_DIGEST_LENGTH];
EVP_MD_CTX *ctx = EVP_MD_CTX_new();
EVP_DigestInit_ex(ctx, EVP_sha256(), NULL);
EVP_DigestUpdate(ctx, plaintext, len);
EVP_DigestFinal_ex(ctx, dgst1, NULL);
EVP_MD_CTX_free(ctx);
//
if (memcmp(dgst0, dgst1, SHA256_DIGEST_LENGTH) != 0) success = false;
free(plaintext);
}
if (success) {
printf("OK\n");
return 0;
}
printf("NG\n");
return -1;
}

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2023.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -38,7 +38,7 @@
#define POSITIVE_INFINITYf ((float)INFINITY)
#define NEGATIVE_INFINITYf (-(float)INFINITY)
int isnumber(double x) { return !isinf(x) && !isnan(x); }
int xisnumber(double x) { return !isinf(x) && !isnan(x); }
int isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; }
int isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; }
double sign(double d) { return d < 0 ? -1 : 1; }
@ -83,21 +83,38 @@ int readln(int fd, char *buf, int cnt) {
static uint64_t xseed;
uint64_t xrand() {
uint64_t u = xseed;
xseed = xseed * UINT64_C(6364136223846793005) + 1;
return xseed;
u = (u & ((~UINT64_C(0)) << 32)) | (xseed >> 32);
xseed = xseed * UINT64_C(6364136223846793005) + 1;
return u;
}
void xsrand(uint64_t s) {
xseed = s;
xrand();
xrand();
xrand();
}
// Fill memory with random bits
void memrand(void *p, int size) {
uint64_t *q = (uint64_t *)p;
uint8_t *q = (uint8_t *)p;
int i;
for(i=0;i<size/8;i++) *q++ = xrand();
uint8_t *r = (uint8_t *)q;
for(i *= 8;i<size;i++) *r++ = xrand() & 0xff;
for(i=0;i<(size & ~7);i+=8) {
uint64_t u = xrand();
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
*q++ = (uint8_t)(u & 0xff); u >>= 8;
}
for(;i<size;i++) *q++ = xrand() & 0xff;
}
void xsrand(uint64_t s) { xseed = s; }
//
#ifdef USEMPFR

View File

@ -0,0 +1,144 @@
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <string.h>
#ifdef __cplusplus
#include <tlfloat/tlfloat.h>
using namespace tlfloat;
#endif
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic ignored "-Wuninitialized"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wvla-cxx-extension"
#pragma clang diagnostic ignored "-Wuninitialized"
#pragma clang diagnostic ignored "-Wtautological-compare"
#endif
#define DENORMAL_DBL_MIN (4.9406564584124654418e-324)
#define POSITIVE_INFINITY INFINITY
#define NEGATIVE_INFINITY (-INFINITY)
#define DENORMAL_FLT_MIN (1.4012984643248170709e-45f)
#define POSITIVE_INFINITYf ((float)INFINITY)
#define NEGATIVE_INFINITYf (-(float)INFINITY)
#ifndef M_PIf
# define M_PIf ((float)M_PI)
#endif
#ifdef __cplusplus
extern "C" {
#endif
extern int enableFlushToZero;
double flushToZero(double y);
int xisnumber(double x);
int isPlusZero(double x);
int isMinusZero(double x);
int xisnan(double x);
double sign(double d);
int isnumberf(float x);
int isPlusZerof(float x);
int isMinusZerof(float x);
int xisnanf(float x);
float signf(float d);
int readln(int fd, char *buf, int cnt);
#define XRAND_MAX (INT64_C(0x100000000) * (double)INT64_C(0x100000000))
void xsrand(uint64_t s);
uint64_t xrand();
void memrand(void *p, int size);
// The following functions are meant to be inlined
static double u2d(uint64_t u) {
double d = 0;
memcpy(&d, &u, sizeof(d));
return d;
}
static uint64_t d2u(double d) {
uint64_t u = 0;
memcpy(&u, &d, sizeof(u));
return u;
}
static float u2f(uint32_t u) {
float f = 0;
memcpy(&f, &u, sizeof(f));
return f;
}
static uint32_t f2u(float d) {
uint32_t u = 0;
memcpy(&u, &d, sizeof(u));
return u;
}
static int startsWith(char *str, char *prefix) {
while(*prefix != '\0') if (*str++ != *prefix++) return 0;
return *prefix == '\0';
}
//
#ifdef USEMPFR
int cmpDenormdp(double x, mpfr_t fry);
double countULPdp(double d, mpfr_t c);
double countULP2dp(double d, mpfr_t c);
int cmpDenormsp(float x, mpfr_t fry);
double countULPsp(float d, mpfr_t c);
double countULP2sp(float d, mpfr_t c);
#if MPFR_VERSION < MPFR_VERSION_NUM(4, 2, 0)
void mpfr_sinpi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd);
void mpfr_cospi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd);
#endif
void mpfr_lgamma_nosign(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd);
#endif
#ifdef __cplusplus
}
template<typename T>
static double countULP(T ot, const T& oc,
const int nbmant, const T& fltmin, const T& fltmax,
const bool checkSignedZero=false, const double abound=0.0) {
if (isnan_(oc) && isnan_(ot)) return 0;
if (isnan_(oc) || isnan_(ot)) return 10001;
if (isinf_(oc) && !isinf_(ot)) return INFINITY;
const T halffltmin = mul_(fltmin, T(0.5));
const bool ciszero = fabs_(oc) < halffltmin, cisinf = fabs_(oc) > fltmax;
if (cisinf && isinf_(ot) && signbit_(oc) == signbit_(ot)) return 0;
if (ciszero && ot != 0) return 10000;
if (checkSignedZero && ciszero && ot == 0 && signbit_(oc) != signbit_(ot)) return 10002;
double v = 0;
if (isinf_(ot) && !isinf_(oc)) {
ot = copysign_(fltmax, ot);
v = 1;
}
const int ec = ilogb_(oc);
auto e = fabs_(oc - ot);
if (e < abound) return 0;
return double(div_(e, fmax_(ldexp_(T(1), ec + 1 - nbmant), fltmin))) + v;
}
#endif

View File

@ -73,8 +73,36 @@ if((NOT MSVC) AND NOT SLEEF_CLANG_ON_WINDOWS)
add_test_dft(${TARGET_NAIVETESTSP}_4 $<TARGET_FILE:${TARGET_NAIVETESTSP}> 4)
add_test_dft(${TARGET_NAIVETESTSP}_5 $<TARGET_FILE:${TARGET_NAIVETESTSP}> 5)
add_test_dft(${TARGET_NAIVETESTSP}_10 $<TARGET_FILE:${TARGET_NAIVETESTSP}> 10)
# Target executable measuredft
set(TARGET_MEASUREDFT "measuredft")
add_executable(${TARGET_MEASUREDFT} measuredft.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h)
add_dependencies(${TARGET_MEASUREDFT} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT})
target_compile_definitions(${TARGET_MEASUREDFT} PRIVATE ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_MEASUREDFT} ${COMMON_LINK_LIBRARIES})
set_target_properties(${TARGET_MEASUREDFT} PROPERTIES ${COMMON_TARGET_PROPERTIES})
endif()
# Target executable test_dftplanner
set(TARGET_TEST_DFTPLANNER "test_dftplanner")
add_executable(${TARGET_TEST_DFTPLANNER} test_dftplanner.cpp ${PROJECT_SOURCE_DIR}/include/sleefdft.h)
add_dependencies(${TARGET_TEST_DFTPLANNER} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT})
target_compile_definitions(${TARGET_TEST_DFTPLANNER} PRIVATE ${COMMON_TARGET_DEFINITIONS} MEASURE=1)
target_link_libraries(${TARGET_TEST_DFTPLANNER} ${COMMON_LINK_LIBRARIES})
set_target_properties(${TARGET_TEST_DFTPLANNER} PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_test(NAME ${TARGET_TEST_DFTPLANNER} COMMAND $<TARGET_FILE:${TARGET_TEST_DFTPLANNER}> ${PROJECT_BINARY_DIR}/testm1.plan ${PROJECT_BINARY_DIR}/testm2.plan)
set_tests_properties(${TARGET_TEST_DFTPLANNER} PROPERTIES COST 2)
# Target executable test_dftplannerest
set(TARGET_TEST_DFTPLANNEREST "test_dftplannerest")
add_executable(${TARGET_TEST_DFTPLANNEREST} test_dftplanner.cpp ${PROJECT_SOURCE_DIR}/include/sleefdft.h)
add_dependencies(${TARGET_TEST_DFTPLANNEREST} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT})
target_compile_definitions(${TARGET_TEST_DFTPLANNEREST} PRIVATE ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_TEST_DFTPLANNEREST} ${COMMON_LINK_LIBRARIES})
set_target_properties(${TARGET_TEST_DFTPLANNEREST} PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_test(NAME ${TARGET_TEST_DFTPLANNEREST} COMMAND $<TARGET_FILE:${TARGET_TEST_DFTPLANNEREST}> ${PROJECT_BINARY_DIR}/teste1.plan ${PROJECT_BINARY_DIR}/teste2.plan)
set_tests_properties(${TARGET_TEST_DFTPLANNEREST} PROPERTIES COST 2)
# Target executable roundtriptest1ddp
set(TARGET_ROUNDTRIPTEST1DDP "roundtriptest1ddp")
add_executable(${TARGET_ROUNDTRIPTEST1DDP} roundtriptest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h)
@ -161,6 +189,34 @@ if (LIBFFTW3 AND NOT SLEEF_DISABLE_FFTW)
add_test_dft(${TARGET_FFTWTEST2DSP}_8_8 $<TARGET_FILE:${TARGET_FFTWTEST2DSP}> 8 8)
add_test_dft(${TARGET_FFTWTEST2DSP}_10_10 $<TARGET_FILE:${TARGET_FFTWTEST2DSP}> 10 10)
add_test_dft(${TARGET_FFTWTEST2DSP}_5_15 $<TARGET_FILE:${TARGET_FFTWTEST2DSP}> 5 15)
if (SLEEF_LIBFFTW3_LIBRARIES)
# Target executable dftbenchdp
set(TARGET_BENCH1DDP "dftbenchdp")
add_executable(${TARGET_BENCH1DDP} dftbench.cpp ${PROJECT_SOURCE_DIR}/include/sleefdft.h)
add_dependencies(${TARGET_BENCH1DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT})
target_compile_definitions(${TARGET_BENCH1DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1)
target_link_libraries(${TARGET_BENCH1DDP} ${COMMON_LINK_LIBRARIES} ${SLEEF_LIBFFTW3_LIBRARIES})
set_target_properties(${TARGET_BENCH1DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES})
#add_test_dft("dftbenchdp1d" $<TARGET_FILE:${TARGET_BENCH1DDP}> 8 0 1000 1)
#set_tests_properties("dftbenchdp1d" PROPERTIES COST 3)
add_test_dft("dftbenchdp2d" $<TARGET_FILE:${TARGET_BENCH1DDP}> 8 8 1000 1)
set_tests_properties("dftbenchdp2d" PROPERTIES COST 3)
# Target executable dftbenchsp
set(TARGET_BENCH1DSP "dftbenchsp")
add_executable(${TARGET_BENCH1DSP} dftbench.cpp ${PROJECT_SOURCE_DIR}/include/sleefdft.h)
add_dependencies(${TARGET_BENCH1DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT})
target_compile_definitions(${TARGET_BENCH1DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2)
target_link_libraries(${TARGET_BENCH1DSP} ${COMMON_LINK_LIBRARIES} ${SLEEF_LIBFFTW3_LIBRARIES})
set_target_properties(${TARGET_BENCH1DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES})
#add_test_dft("dftbenchsp1d" $<TARGET_FILE:${TARGET_BENCH1DSP}> 8 0 1000 1)
#set_tests_properties("dftbenchsp1d" PROPERTIES COST 3)
add_test_dft("dftbenchsp2d" $<TARGET_FILE:${TARGET_BENCH1DSP}> 8 8 1000 1)
set_tests_properties("dftbenchsp2d" PROPERTIES COST 3)
endif()
else(LIBFFTW3 AND NOT SLEEF_DISABLE_FFTW)
if(MSVC OR SLEEF_CLANG_ON_WINDOWS)
# Test roundtriptestdp

View File

@ -1,116 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#define _DEFAULT_SOURCE
#define _XOPEN_SOURCE 700
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <complex.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#ifdef USEFFTW
#include <fftw3.h>
#include <omp.h>
#else
#include "sleef.h"
#include "sleefdft.h"
#endif
typedef double real;
static uint64_t gettime() {
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (uint64_t)tp.tv_sec * 1000000000 + ((uint64_t)tp.tv_nsec);
}
#define REPEAT 8
int main(int argc, char **argv) {
if (argc == 1) {
fprintf(stderr, "%s <log2n>\n", argv[0]);
exit(-1);
}
int backward = 0;
int log2n = atoi(argv[1]);
if (log2n < 0) {
backward = 1;
log2n = -log2n;
}
const int n = 1 << log2n;
const int64_t niter = (int)(100000000000.0 / n / log2n);
printf("Number of iterations = %lld\n", (long long int)niter);
#ifdef USEFFTW
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n);
#if 0
int fftw_init_threads(void);
fftw_plan_with_nthreads(omp_get_max_threads());
#endif
fftw_plan w = fftw_plan_dft_1d(n, in, out, backward ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_MEASURE);
//fftw_plan w = fftw_plan_dft_1d(n, in, out, backward ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_PATIENT);
for(int i=0;i<n;i++) {
in[i] = (2.0 * (rand() / (double)RAND_MAX) - 1) + (2.0 * (rand() / (double)RAND_MAX) - 1) * _Complex_I;
}
for(int64_t i=0;i<niter/2;i++) fftw_execute(w);
#else
SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET);
real *in = (real *)Sleef_malloc(n*2 * sizeof(real));
real *out = (real *)Sleef_malloc(n*2 * sizeof(real));
int mode = SLEEF_MODE_MEASURE | SLEEF_MODE_VERBOSE; // | SLEEF_MODE_NO_MT;
if (argc >= 3) mode = SLEEF_MODE_VERBOSE | SLEEF_MODE_ESTIMATE;
if (backward) mode |= SLEEF_MODE_BACKWARD;
struct SleefDFT *p = SleefDFT_double_init1d(n, in, out, mode);
if (argc >= 3) SleefDFT_setPath(p, argv[2]);
for(int i=0;i<n*2;i++) {
in[i] = (2.0 * (rand() / (double)RAND_MAX) - 1);
}
for(int64_t i=0;i<niter/2;i++) SleefDFT_double_execute(p, in, out);
#endif
for(int rep=0;rep<REPEAT;rep++) {
uint64_t tm0 = gettime();
for(int64_t i=0;i<niter;i++) {
#ifdef USEFFTW
fftw_execute(w);
#else
SleefDFT_double_execute(p, in, out);
#endif
}
uint64_t tm1 = gettime();
printf("Actual time = %g ns\n", (double)(tm1 - tm0) / niter);
double timeus = (tm1 - tm0) / ((double)niter * 1000);
double mflops = 5 * n * log2n / timeus;
printf("%g Mflops\n", mflops);
}
//
exit(0);
}

View File

@ -0,0 +1,404 @@
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include <cassert>
#include <cmath>
#include <iostream>
#include <complex>
#include <ctime>
#include <chrono>
#include <thread>
#include <memory>
#include <vector>
#include <fftw3.h>
#include <omp.h>
#include "sleef.h"
#include "sleefdft.h"
using namespace std;
#if BASETYPEID == 1
typedef double xreal;
#define FFTW_COMPLEX fftw_complex
#define FFTW_PLAN_WITH_NTHREADS fftw_plan_with_nthreads
#define FFTW_PLAN fftw_plan
#define FFTW_MALLOC fftw_malloc
#define FFTW_FREE fftw_free
#define FFTW_PLAN_DFT_1D fftw_plan_dft_1d
#define FFTW_PLAN_DFT_2D fftw_plan_dft_2d
#define FFTW_EXECUTE fftw_execute
#define FFTW_DESTROY_PLAN fftw_destroy_plan
#define FFTW_CLEANUP fftw_cleanup
#define SLEEFDFT_INIT1D SleefDFT_double_init1d
#define SLEEFDFT_INIT2D SleefDFT_double_init2d
#elif BASETYPEID == 2
typedef float xreal;
#define FFTW_COMPLEX fftwf_complex
#define FFTW_PLAN_WITH_NTHREADS fftwf_plan_with_nthreads
#define FFTW_PLAN fftwf_plan
#define FFTW_MALLOC fftwf_malloc
#define FFTW_FREE fftwf_free
#define FFTW_PLAN_DFT_1D fftwf_plan_dft_1d
#define FFTW_PLAN_DFT_2D fftwf_plan_dft_2d
#define FFTW_EXECUTE fftwf_execute
#define FFTW_DESTROY_PLAN fftwf_destroy_plan
#define FFTW_CLEANUP fftwf_cleanup
#define SLEEFDFT_INIT1D SleefDFT_float_init1d
#define SLEEFDFT_INIT2D SleefDFT_float_init2d
#else
#error BASETYPEID not set
#endif
static uint64_t timens() {
return std::chrono::duration_cast<std::chrono::nanoseconds>
(std::chrono::high_resolution_clock::now() - std::chrono::high_resolution_clock::from_time_t(0)).count();
}
template<typename cplx>
class FFTFramework {
public:
virtual void execute() = 0;
virtual cplx* getInPtr() = 0;
virtual cplx* getOutPtr() = 0;
virtual ~FFTFramework() {};
int64_t niter(int64_t ns) {
int64_t niter = 10, t0, t1;
for(;;) {
t0 = timens();
for(int64_t i=0;i<niter;i++) execute();
t1 = timens();
if (t1 - t0 > 1000LL * 1000 * 10) break;
niter *= 2;
}
return 1 + int64_t((double)niter * ns / (t1 - t0));
}
};
template<typename cplx>
class FWSleefDFT : public FFTFramework<cplx> {
const int n, m;
cplx* in;
cplx* out;
SleefDFT *plan;
public:
FWSleefDFT(int n_, int m_, bool forward, bool mt, bool check) : n(n_), m(m_) {
SleefDFT_setDefaultVerboseFP(stderr);
SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET);
in = (cplx*)Sleef_malloc(sizeof(cplx) * n * m);
out = (cplx*)Sleef_malloc(sizeof(cplx) * n * m);
if (!in || !out) {
cerr << "Sleef_malloc failed" << endl;
exit(-1);
}
uint64_t mode = check ? SLEEF_MODE_ESTIMATE : SLEEF_MODE_MEASURE;
mode |= forward ? SLEEF_MODE_FORWARD : SLEEF_MODE_BACKWARD;
mode |= mt ? 0 : SLEEF_MODE_NO_MT;
//mode |= SLEEF_MODE_VERBOSE;
if (m == 1) {
plan = SLEEFDFT_INIT1D(n, (xreal*)in, (xreal*)out, mode);
} else {
plan = SLEEFDFT_INIT2D(n, m, (xreal*)in, (xreal*)out, mode);
}
}
string getPath() {
vector<char> pathstr(1024);
SleefDFT_getPath(plan, pathstr.data(), pathstr.size());
return pathstr.data();
}
~FWSleefDFT() {
SleefDFT_dispose(plan);
Sleef_free(out);
Sleef_free(in);
}
cplx* getInPtr () { return in ; }
cplx* getOutPtr() { return out; }
void execute() { SleefDFT_execute(plan, NULL, NULL); }
};
template<typename cplx>
class FWFFTW3 : public FFTFramework<cplx> {
const int n, m;
cplx* in;
cplx* out;
FFTW_PLAN plan;
public:
FWFFTW3(int n_, int m_, bool forward, bool mt, bool check) : n(n_), m(m_) {
//FFTW_CLEANUP();
FFTW_PLAN_WITH_NTHREADS(mt ? omp_get_max_threads() : 1);
in = (cplx*)FFTW_MALLOC(sizeof(FFTW_COMPLEX) * n * m);
out = (cplx*)FFTW_MALLOC(sizeof(FFTW_COMPLEX) * n * m);
unsigned flags = check ? FFTW_ESTIMATE : FFTW_MEASURE;
if (m == 1) {
plan = FFTW_PLAN_DFT_1D(n, (FFTW_COMPLEX*)in, (FFTW_COMPLEX*)out, forward ? FFTW_FORWARD : FFTW_BACKWARD, flags);
} else {
plan = FFTW_PLAN_DFT_2D(n, m, (FFTW_COMPLEX*)in, (FFTW_COMPLEX*)out, forward ? FFTW_FORWARD : FFTW_BACKWARD, flags);
}
}
~FWFFTW3() {
FFTW_DESTROY_PLAN(plan);
FFTW_FREE(out);
FFTW_FREE(in);
}
cplx* getInPtr() { return in; }
cplx* getOutPtr() { return out; }
void execute() { FFTW_EXECUTE(plan); }
};
int main(int argc, char **argv) {
if (argc == 1) {
fprintf(stderr, "%s <log2n> <log2m> <measurement time in ms> <nrepeat>\n", argv[0]);
exit(-1);
}
fftw_init_threads();
double measureTimeMillis = 3000;
if (argc >= 4) measureTimeMillis = atof(argv[3]);
bool forward = true;
int log2n = atoi(argv[1]);
if (log2n < 0) {
forward = false;
log2n = -log2n;
}
const int n = 1 << log2n;
const int log2m = argc >= 3 ? atoi(argv[2]) : 0;
const int m = 1 << log2m;
cerr << "n = " << n << ", m = " << m << ", " << (forward ? "forward" : "backward") << endl;
const int nrepeat = argc >= 5 ? atoi(argv[4]) : 1;
vector<double> mflops_sleefdftst, mflops_fftwst, mflops_sleefdftmt, mflops_fftwmt;
vector<complex<xreal>> v(n * m);
for(int i=0;i<n * m;i++) {
v[i] = (2.0 * (rand() / (double)RAND_MAX) - 1) + (2.0 * (rand() / (double)RAND_MAX) - 1) * 1i;
}
{
// Check if we are really computing the same values
auto sleefdft = make_shared<FWSleefDFT<complex<xreal>>>(n, m, forward, true , true);
auto fftw = make_shared<FWFFTW3 <complex<xreal>>>(n, m, forward, false, true);
complex<xreal> *in0 = sleefdft->getInPtr();
complex<xreal> *out0 = sleefdft->getOutPtr();
complex<xreal> *in1 = fftw->getInPtr();
complex<xreal> *out1 = fftw->getOutPtr();
for(int i=0;i<n * m;i++) in0[i] = in1[i] = v[i];
sleefdft->execute();
fftw ->execute();
for(int i=0;i<n * m;i++) {
if (std::real(abs((out0[i] - out1[i]) * (out0[i] - out1[i]))) > 0.1) {
cerr << "NG " << i << " : " << out0[i] << ", " << out1[i] << endl;
exit(-1);
}
}
cerr << "Check OK" << endl;
}
for(int nr = 0;nr < nrepeat;nr++) {
cerr << endl;
#if BASETYPEID == 1
cerr << "DP ";
#elif BASETYPEID == 2
cerr << "SP ";
#endif
cerr << "n = 2^" << log2n << " = " << n << ", m = 2^" << log2m << " = " << m << ", nr = " << nr << endl;
//
{
cerr << "Planning SleefDFT ST ... ";
int64_t ptm0 = timens();
auto sleefdftst = make_shared<FWSleefDFT<complex<xreal>>>(n, m, forward, false, false);
int64_t ptm1 = timens();
cerr << ((ptm1 - ptm0) / 1000.0 / 1000.0) << "ms" << endl;
cerr << sleefdftst->getPath() << endl;
complex<xreal> *in0 = sleefdftst->getInPtr();
for(int i=0;i<n * m;i++) in0[i] = v[i];
auto niter = sleefdftst->niter(1000LL * 1000 * measureTimeMillis);
cerr << "SleefDFT ST niter = " << niter << endl;
for(int64_t i=0;i<niter/10;i++) sleefdftst->execute(); // warm up
int64_t tm0 = timens();
for(int64_t i=0;i<niter;i++) sleefdftst->execute();
int64_t tm1 = timens();
double mflops = 5 * n * log2n / ((tm1 - tm0) / (double(niter)*1000));
if (m != 1) mflops *= m * log2m;
fprintf(stderr, "%g Mflops\n", mflops);
mflops_sleefdftst.push_back(mflops);
}
//
{
cerr << "Planning FFTW ST ... ";
int64_t ptm0 = timens();
auto fftwst = make_shared<FWFFTW3<complex<xreal>>>(n, m, forward, false, false);
int64_t ptm1 = timens();
cerr << ((ptm1 - ptm0) / 1000.0 / 1000.0) << "ms" << endl;
complex<xreal> *in0 = fftwst->getInPtr();
for(int i=0;i<n * m;i++) in0[i] = v[i];
auto niter = fftwst->niter(1000LL * 1000 * measureTimeMillis);
cerr << "FFTW ST niter = " << niter << endl;
for(int64_t i=0;i<niter/10;i++) fftwst->execute(); // warm up
int64_t tm0 = timens();
for(int64_t i=0;i<niter;i++) fftwst->execute();
int64_t tm1 = timens();
double mflops = 5 * n * log2n / ((tm1 - tm0) / (double(niter)*1000));
if (m != 1) mflops *= m * log2m;
fprintf(stderr, "%g Mflops\n", mflops);
mflops_fftwst.push_back(mflops);
}
//
{
cerr << "Planning SleefDFT MT ... ";
int64_t ptm0 = timens();
auto sleefdftmt = make_shared<FWSleefDFT<complex<xreal>>>(n, m, forward, true, false);
int64_t ptm1 = timens();
cerr << ((ptm1 - ptm0) / 1000.0 / 1000.0) << "ms" << endl;
cerr << sleefdftmt->getPath() << endl;
complex<xreal> *in0 = sleefdftmt->getInPtr();
for(int i=0;i<n * m;i++) in0[i] = v[i];
auto niter = sleefdftmt->niter(1000LL * 1000 * measureTimeMillis);
cerr << "SleefDFT MT niter = " << niter << endl;
for(int64_t i=0;i<niter/10;i++) sleefdftmt->execute(); // warm up
int64_t tm0 = timens();
for(int64_t i=0;i<niter;i++) sleefdftmt->execute();
int64_t tm1 = timens();
double mflops = 5 * n * log2n / ((tm1 - tm0) / (double(niter)*1000));
if (m != 1) mflops *= m * log2m;
fprintf(stderr, "%g Mflops\n", mflops);
mflops_sleefdftmt.push_back(mflops);
}
//
{
cerr << "Planning FFTW MT ... ";
int64_t ptm0 = timens();
auto fftwmt = make_shared<FWFFTW3<complex<xreal>>>(n, m, forward, true, false);
int64_t ptm1 = timens();
cerr << ((ptm1 - ptm0) / 1000.0 / 1000.0) << "ms" << endl;
complex<xreal> *in0 = fftwmt->getInPtr();
for(int i=0;i<n * m;i++) in0[i] = v[i];
auto niter = fftwmt->niter(1000LL * 1000 * measureTimeMillis);
cerr << "FFTW MT niter = " << niter << endl;
for(int64_t i=0;i<niter/10;i++) fftwmt->execute(); // warm up
int64_t tm0 = timens();
for(int64_t i=0;i<niter;i++) fftwmt->execute();
int64_t tm1 = timens();
double mflops = 5 * n * log2n / ((tm1 - tm0) / (double(niter)*1000));
if (m != 1) mflops *= m * log2m;
fprintf(stderr, "%g Mflops\n", mflops);
mflops_fftwmt.push_back(mflops);
}
}
cerr << endl;
cout << log2n << ", " << log2m << ", ";
{
double f = 0;
for(auto a : mflops_sleefdftst) {
if (a > f) f = a;
}
cout << f << ", ";
}
{
double f = 0;
for(auto a : mflops_sleefdftmt) {
if (a > f) f = a;
}
cout << f << ", ";
}
{
double f = 0;
for(auto a : mflops_fftwst) {
if (a > f) f = a;
}
cout << f << ", ";
}
{
double f = 0;
for(auto a : mflops_fftwmt) {
if (a > f) f = a;
}
cout << f << endl;
}
//
exit(0);
}

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -40,10 +40,22 @@ static double squ(double x) { return x * x; }
double check_cf(int n) {
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n);
if (!in || !out) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
fftw_plan w = fftw_plan_dft_1d(n, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
real *sx = (real *)Sleef_malloc(n*2*sizeof(real));
real *sy = (real *)Sleef_malloc(n*2*sizeof(real));
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
struct SleefDFT *p = SleefDFT_init1d(n, sx, sy, MODE);
for(int i=0;i<n;i++) {
@ -79,10 +91,22 @@ double check_cf(int n) {
double check_cb(int n) {
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n);
if (!in || !out) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
fftw_plan w = fftw_plan_dft_1d(n, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
real *sx = (real *)Sleef_malloc(n*2*sizeof(real));
real *sy = (real *)Sleef_malloc(n*2*sizeof(real));
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
struct SleefDFT *p = SleefDFT_init1d(n, sx, sy, SLEEF_MODE_BACKWARD | MODE);
for(int i=0;i<n;i++) {
@ -118,10 +142,22 @@ double check_cb(int n) {
double check_rf(int n) {
double *in = (double *) fftw_malloc(sizeof(double) * n);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (n/2+1));
if (!in || !out) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
fftw_plan w = fftw_plan_dft_r2c_1d(n, in, out, FFTW_ESTIMATE);
real *sx = (real *)Sleef_malloc(n*sizeof(real));
real *sy = (real *)Sleef_malloc((n/2+1)*sizeof(real)*2);
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
struct SleefDFT *p = SleefDFT_init1d(n, sx, sy, SLEEF_MODE_REAL | MODE);
for(int i=0;i<n;i++) {
@ -155,10 +191,22 @@ double check_rf(int n) {
double check_rb(int n) {
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (n/2+1));
double *out = (double *) fftw_malloc(sizeof(double) * n);
if (!in || !out) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
fftw_plan w = fftw_plan_dft_c2r_1d(n, in, out, FFTW_ESTIMATE);
real *sx = (real *)Sleef_malloc((n/2+1) * sizeof(real)*2);
real *sy = (real *)Sleef_malloc(sizeof(real)*n);
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
struct SleefDFT *p = SleefDFT_init1d(n, sx, sy, SLEEF_MODE_REAL | SLEEF_MODE_BACKWARD | MODE);
for(int i=0;i<n/2;i++) {

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -40,10 +40,22 @@ static double squ(double x) { return x * x; }
double check_cf(int n, int m) {
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m);
if (!in || !out) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
fftw_plan w = fftw_plan_dft_2d(n, m, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
real *sx = (real *)Sleef_malloc(n*m*2*sizeof(real));
real *sy = (real *)Sleef_malloc(n*m*2*sizeof(real));
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
struct SleefDFT *p = SleefDFT_init2d(n, m, sx, sy, MODE);
for(int i=0;i<n*m;i++) {
@ -79,10 +91,22 @@ double check_cf(int n, int m) {
double check_cb(int n, int m) {
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m);
if (!in || !out) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
fftw_plan w = fftw_plan_dft_2d(n, m, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
real *sx = (real *)Sleef_malloc(n*m*2*sizeof(real));
real *sy = (real *)Sleef_malloc(n*m*2*sizeof(real));
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
struct SleefDFT *p = SleefDFT_init2d(n, m, sx, sy, SLEEF_MODE_BACKWARD | MODE);
for(int i=0;i<n*m;i++) {

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -97,11 +97,15 @@ int check_cf(int n) {
int i;
real *sx = (real *)Sleef_malloc(n*2 * sizeof(real));
real *sy = (real *)Sleef_malloc(n*2 * sizeof(real));
cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);
if (!sx || !ts || !fs) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
//
for(i=0;i<n;i++) {
@ -121,25 +125,17 @@ int check_cf(int n) {
return 0;
}
SleefDFT_execute(p, sx, sy);
SleefDFT_execute(p, sx, sx);
//
int success = 1;
double rmsn = 0, rmsd = 0;
for(i=0;i<n;i++) {
if ((fabs(sy[(i*2+0)] - creal(fs[i])) > THRES) ||
(fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) {
if ((fabs(sx[(i*2+0)] - creal(fs[i])) > THRES) ||
(fabs(sx[(i*2+1)] - cimag(fs[i])) > THRES)) {
success = 0;
}
double t;
t = (sy[(i*2+0)] - creal(fs[i]));
rmsn += t*t;
t = (sy[(i*2+1)] - cimag(fs[i]));
rmsn += t*t;
rmsd += creal(fs[i]) * creal(fs[i]) + cimag(fs[i]) * cimag(fs[i]);
}
//
@ -148,7 +144,6 @@ int check_cf(int n) {
free(ts);
Sleef_free(sx);
Sleef_free(sy);
SleefDFT_dispose(p);
//
@ -161,11 +156,15 @@ int check_cb(int n) {
int i;
real *sx = (real *)Sleef_malloc(sizeof(real)*n*2);
real *sy = (real *)Sleef_malloc(sizeof(real)*n*2);
cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);
if (!sx || !ts || !fs) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
//
for(i=0;i<n;i++) {
@ -183,15 +182,15 @@ int check_cb(int n) {
return 0;
}
SleefDFT_execute(p, sx, sy);
SleefDFT_execute(p, sx, sx);
//
int success = 1;
for(i=0;i<n;i++) {
if ((fabs(sy[(i*2+0)] - creal(ts[i])) > THRES) ||
(fabs(sy[(i*2+1)] - cimag(ts[i])) > THRES)) {
if ((fabs(sx[(i*2+0)] - creal(ts[i])) > THRES) ||
(fabs(sx[(i*2+1)] - cimag(ts[i])) > THRES)) {
success = 0;
}
}
@ -202,7 +201,6 @@ int check_cb(int n) {
free(ts);
Sleef_free(sx);
Sleef_free(sy);
SleefDFT_dispose(p);
//
@ -214,12 +212,16 @@ int check_cb(int n) {
int check_rf(int n) {
int i;
real *sx = (real *)Sleef_malloc(n * sizeof(real));
real *sy = (real *)Sleef_malloc((n/2+1)*sizeof(real)*2);
real *sx = (real *)Sleef_malloc((n+2) * sizeof(real));
cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);
if (!sx || !ts || !fs) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
//
for(i=0;i<n;i++) {
@ -227,6 +229,8 @@ int check_rf(int n) {
sx[i] = creal(ts[i]);
}
sx[n] = sx[n+1] = 0;
//
forward(ts, fs, n);
@ -238,15 +242,15 @@ int check_rf(int n) {
return 0;
}
SleefDFT_execute(p, sx, sy);
SleefDFT_execute(p, sx, sx);
//
int success = 1;
for(i=0;i<n/2+1;i++) {
if (fabs(sy[(2*i+0)] - creal(fs[i])) > THRES) success = 0;
if (fabs(sy[(2*i+1)] - cimag(fs[i])) > THRES) success = 0;
if (fabs(sx[(2*i+0)] - creal(fs[i])) > THRES) success = 0;
if (fabs(sx[(2*i+1)] - cimag(fs[i])) > THRES) success = 0;
}
//
@ -255,7 +259,6 @@ int check_rf(int n) {
free(ts);
Sleef_free(sx);
Sleef_free(sy);
SleefDFT_dispose(p);
//
@ -270,6 +273,11 @@ int check_rb(int n) {
cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);
if (!ts || !fs) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
//
for(i=0;i<n/2;i++) {
@ -283,7 +291,11 @@ int check_rb(int n) {
}
real *sx = (real *)Sleef_malloc((n/2+1) * sizeof(real)*2);
real *sy = (real *)Sleef_malloc(sizeof(real)*n);
if (!sx) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
for(i=0;i<n/2+1;i++) {
sx[2*i+0] = creal(fs[i]);
@ -301,7 +313,7 @@ int check_rb(int n) {
return 0;
}
SleefDFT_execute(p, sx, sy);
SleefDFT_execute(p, sx, sx);
//
@ -312,7 +324,7 @@ int check_rb(int n) {
success = 0;
}
if ((fabs(sy[i] - creal(ts[i])) > THRES)) {
if ((fabs(sx[i] - creal(ts[i])) > THRES)) {
success = 0;
}
}
@ -323,7 +335,6 @@ int check_rb(int n) {
free(ts);
Sleef_free(sx);
Sleef_free(sy);
SleefDFT_dispose(p);
//
@ -335,11 +346,15 @@ int check_arf(int n) {
int i;
real *sx = (real *)Sleef_malloc(n * sizeof(real));
real *sy = (real *)Sleef_malloc(n * sizeof(real));
cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);
if (!sx || !ts || !fs) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
//
for(i=0;i<n;i++) {
@ -358,7 +373,7 @@ int check_arf(int n) {
return 0;
}
SleefDFT_execute(p, sx, sy);
SleefDFT_execute(p, sx, sx);
//
@ -366,18 +381,20 @@ int check_arf(int n) {
for(i=0;i<n/2;i++) {
if (i == 0) {
if (fabs(sy[(2*0+0)] - creal(fs[0 ])) > THRES) success = 0;
if (fabs(sy[(2*0+1)] - creal(fs[n/2])) > THRES) success = 0;
if (fabs(sx[(2*0+0)] - creal(fs[0 ])) > THRES) success = 0;
if (fabs(sx[(2*0+1)] - creal(fs[n/2])) > THRES) success = 0;
} else {
if (fabs(sy[(2*i+0)] - creal(fs[i])) > THRES) success = 0;
if (fabs(sy[(2*i+1)] - cimag(fs[i])) > THRES) success = 0;
if (fabs(sx[(2*i+0)] - creal(fs[i])) > THRES) success = 0;
if (fabs(sx[(2*i+1)] - cimag(fs[i])) > THRES) success = 0;
}
}
//
free(fs);
free(ts);
Sleef_free(sx);
Sleef_free(sy);
SleefDFT_dispose(p);
//
@ -394,6 +411,11 @@ int check_arb(int n) {
cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);
if (!sx || !sy || !ts || !fs) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
//
for(i=0;i<n/2;i++) {

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -43,6 +43,11 @@ double check_c(int n) {
real *sy = (real *)Sleef_malloc(n*2 * sizeof(real));
real *sz = (real *)Sleef_malloc(n*2 * sizeof(real));
if (!sx || !sy || !sz) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
for(int i=0;i<n*2;i++) sx[i] = (real)(2.0 * (rand() / (double)RAND_MAX) - 1);
//

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -41,7 +41,11 @@ double check_c(int n, int m) {
real *sx = (real *)Sleef_malloc(n*m*2 * sizeof(real));
real *sy = (real *)Sleef_malloc(n*m*2 * sizeof(real));
real *sz = (real *)Sleef_malloc(n*m*2 * sizeof(real));
if (!sx || !sy) {
fprintf(stderr, "Memory allocation failed");
exit(-1);
}
for(int i=0;i<n*m*2;i++) sx[i] = (real)(2.0 * (rand() / (double)RAND_MAX) - 1);
@ -66,7 +70,7 @@ double check_c(int n, int m) {
exit(-1);
}
SleefDFT_execute(p, sy, sz);
SleefDFT_execute(p, sy, sy);
SleefDFT_dispose(p);
//
@ -74,7 +78,7 @@ double check_c(int n, int m) {
double rmsn = 0, rmsd = 0, scale = 1 / (n*(double)m);
for(int i=0;i<n*m;i++) {
rmsn += squ(scale * sz[i*2+0] - sx[i*2+0]) + squ(scale * sz[i*2+1] - sx[i*2+1]);
rmsn += squ(scale * sy[i*2+0] - sx[i*2+0]) + squ(scale * sy[i*2+1] - sx[i*2+1]);
rmsd += squ( sx[i*2+0]) + squ( sx[i*2+1]);
}
@ -82,7 +86,6 @@ double check_c(int n, int m) {
Sleef_free(sx);
Sleef_free(sy);
Sleef_free(sz);
//

View File

@ -0,0 +1,168 @@
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <vector>
#include <string>
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include <cmath>
#include "sleef.h"
#include "sleefdft.h"
using namespace std;
vector<string> doTransform(int mode) {
SleefDFT *p;
vector<string> v;
vector<char> s(1024);
double *din = (double *)Sleef_malloc(2048*64*2 * sizeof(double));
double *dout = (double *)Sleef_malloc(2048*64*2 * sizeof(double));
float *fin = (float *)Sleef_malloc(2048*64*2 * sizeof(double));
float *fout = (float *)Sleef_malloc(2048*64*2 * sizeof(double));
//
p = SleefDFT_double_init1d(1024, din, dout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("1d double 1024 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_double_init1d(512, din, dout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("1d double 512 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_float_init1d(1024, fin, fout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("1d float 1024 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_float_init1d(512, fin, fout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("1d float 512 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_double_init2d(2048, 64, din, dout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("2d double 2048x64 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_double_init2d(128, 128, din, dout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("2d double 128x128 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_float_init2d(2048, 64, fin, fout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("2d float 2048x64 : " + string(s.data()));
SleefDFT_dispose(p);
p = SleefDFT_float_init2d(128, 128, fin, fout, mode);
SleefDFT_getPath(p, s.data(), s.size());
v.push_back("2d float 128x128 : " + string(s.data()));
SleefDFT_dispose(p);
Sleef_free(din);
Sleef_free(dout);
Sleef_free(fin);
Sleef_free(fout);
return v;
}
void compare(vector<string> &runa, vector<string> &runb) {
if (runa.size() != runb.size()) {
cerr << "Lengths do not match" << endl;
exit(-1);
}
for(size_t i=0;i<runa.size();i++) {
if (runa[i] != runb[i]) {
cerr << "Paths do not match" << endl;
cerr << runa[i] << endl;
cerr << runb[i] << endl;
exit(-1);
}
}
}
int main(int argc, char **argv) {
if (argc < 3) exit(-1);
string fn1 = argv[1], fn2 = argv[2];
#ifdef MEASURE
#ifdef MULTITHREAD
int mode = SLEEF_MODE_MEASURE | SLEEF_MODE_VERBOSE;
#else
int mode = SLEEF_MODE_MEASURE | SLEEF_MODE_VERBOSE | SLEEF_MODE_NO_MT;
#endif
#else
#ifdef MULTITHREAD
int mode = SLEEF_MODE_ESTIMATE | SLEEF_MODE_VERBOSE;
#else
int mode = SLEEF_MODE_ESTIMATE | SLEEF_MODE_VERBOSE | SLEEF_MODE_NO_MT;
#endif
#endif
int planMode = argc == 1 ? 0 : SLEEF_PLAN_AUTOMATIC;
//
cerr << "Run 0" << endl;
SleefDFT_setPlanFilePath(fn1.c_str(), NULL, planMode);
auto run0 = doTransform(mode);
cerr << endl << "Run 1" << endl;
SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET);
SleefDFT_setPlanFilePath(fn2.c_str(), NULL, planMode);
auto run1 = doTransform(mode);
cerr << endl << "Run 2" << endl;
SleefDFT_setPlanFilePath(fn1.c_str(), NULL, planMode);
auto run2 = doTransform(mode);
compare(run0, run2);
#ifdef MEASURE
SleefDFT_savePlan("manual.plan");
#endif
cerr << endl << "Run 3" << endl;
SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET);
SleefDFT_setPlanFilePath(fn2.c_str(), NULL, planMode);
auto run3 = doTransform(mode);
compare(run1, run3);
#ifdef MEASURE
cerr << endl << "Run 4" << endl;
SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET);
SleefDFT_setPlanFilePath("manual.plan", NULL, planMode);
auto run4 = doTransform(mode);
compare(run0, run4);
#endif
cerr << "OK" << endl;
exit(0);
}

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -11,7 +11,21 @@ if (SLEEFDFT_MAXBUTWIDTH GREATER 7)
message(FATAL_ERROR "SLEEFDFT_MAXBUTWIDTH has to be smaller than 8." )
endif()
option(SLEEFDFT_ENABLE_STREAM "Streaming instructions are utilized in DFT." OFF)
set(SLEEFDFT_MINSHIFT 1 CACHE STRING "Min hardcoded shift")
set(SLEEFDFT_MAXSHIFT 1 CACHE STRING "Max hardcoded shift")
if ((${SLEEFDFT_MINSHIFT} GREATER ${SLEEFDFT_MAXSHIFT}) OR (${SLEEFDFT_MINSHIFT} LESS 1))
message(FATAL_ERROR "SLEEFDFT_MINSHIFT, SLEEFDFT_MAXSHIFT range error")
endif()
math(EXPR SLEEFDFT_MAXSHIFT_MINUS_1 "${SLEEFDFT_MAXSHIFT} - 1")
if (${SLEEFDFT_MINSHIFT} LESS ${SLEEFDFT_MAXSHIFT})
foreach(J RANGE ${SLEEFDFT_MINSHIFT} ${SLEEFDFT_MAXSHIFT_MINUS_1})
list(APPEND LISTSHIFTSTR ${J})
endforeach()
else()
set(LISTSHIFTSTR)
endif()
# Settings
@ -21,18 +35,14 @@ set(LISTSHORTTYPENAME "dp" "sp")
set(LISTLONGTYPENAME "double" "float")
set(LISTTYPEID "1" "2")
set(MACRODEF_vecextdp BASETYPEID=1 ENABLE_VECEXT CONFIG=1)
set(CFLAGS_vecextdp ${FLAGS_ENABLE_VECEXT})
set(MACRODEF_vecextsp BASETYPEID=2 ENABLE_VECEXT CONFIG=1)
set(CFLAGS_vecextsp ${FLAGS_ENABLE_VECEXT})
set(MACRODEF_vecextld BASETYPEID=3 ENABLE_VECEXT CONFIG=1)
set(CFLAGS_vecextld ${FLAGS_ENABLE_VECEXT})
set(MACRODEF_vecextqp BASETYPEID=4 ENABLE_VECEXT CONFIG=1)
set(CFLAGS_vecextqp ${FLAGS_ENABLE_VECEXT})
set(MACRODEF_purecdp BASETYPEID=1 ENABLE_PUREC CONFIG=1)
set(CFLAGS_purecdp ${FLAGS_ENABLE_PUREC})
set(MACRODEF_purecsp BASETYPEID=2 ENABLE_PUREC CONFIG=1)
set(CFLAGS_purecsp ${FLAGS_ENABLE_PUREC})
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CFLAGS_purecdp ${FLAGS_ENABLE_PUREC} -O0)
set(CFLAGS_purecsp ${FLAGS_ENABLE_PUREC} -O0)
endif()
set(MACRODEF_purecld BASETYPEID=3 ENABLE_PUREC CONFIG=1)
set(CFLAGS_purecld ${FLAGS_ENABLE_PUREC})
set(MACRODEF_purecqp BASETYPEID=4 ENABLE_PUREC CONFIG=1)
@ -41,10 +51,6 @@ set(MACRODEF_sse2dp BASETYPEID=1 ENABLE_SSE2 CONFIG=4)
set(CFLAGS_sse2dp ${FLAGS_ENABLE_SSE4})
set(MACRODEF_sse2sp BASETYPEID=2 ENABLE_SSE2 CONFIG=4)
set(CFLAGS_sse2sp ${FLAGS_ENABLE_SSE4})
set(MACRODEF_avxdp BASETYPEID=1 ENABLE_AVX CONFIG=1)
set(CFLAGS_avxdp ${FLAGS_ENABLE_AVX})
set(MACRODEF_avxsp BASETYPEID=2 ENABLE_AVX CONFIG=1)
set(CFLAGS_avxsp ${FLAGS_ENABLE_AVX})
set(MACRODEF_avx2dp BASETYPEID=1 ENABLE_AVX2 CONFIG=1)
set(CFLAGS_avx2dp ${FLAGS_ENABLE_AVX2})
set(MACRODEF_avx2sp BASETYPEID=2 ENABLE_AVX2 CONFIG=1)
@ -138,10 +144,6 @@ set(ISALIST_SP purecsp)
set(ISALIST_DP purecdp)
set(LIST_SUPPORTED_FPTYPE 0 1)
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(ISALIST_SP vecextsp)
set(ISALIST_DP vecextdp)
endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
# List all available vector data types
@ -150,11 +152,6 @@ if (COMPILER_SUPPORTS_SSE4)
set(ISALIST_DP ${ISALIST_DP} sse2dp)
endif(COMPILER_SUPPORTS_SSE4)
if (COMPILER_SUPPORTS_AVX)
set(ISALIST_SP ${ISALIST_SP} avxsp)
set(ISALIST_DP ${ISALIST_DP} avxdp)
endif(COMPILER_SUPPORTS_AVX)
if (COMPILER_SUPPORTS_AVX2)
set(ISALIST_SP ${ISALIST_SP} avx2sp)
set(ISALIST_DP ${ISALIST_DP} avx2dp)
@ -219,7 +216,13 @@ endif()
# Compiler properties
set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${DFT_C_FLAGS}")
set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${DFT_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${ORG_CMAKE_C_FLAGS} ${DFT_C_FLAGS} ${OpenMP_C_FLAGS}")
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
endif()
set(COMMON_TARGET_PROPERTIES
C_STANDARD 99 # -std=gnu99
)
@ -228,7 +231,11 @@ if (BUILD_SHARED_LIBS)
list(APPEND COMMON_TARGET_PROPERTIES POSITION_INDEPENDENT_CODE ON) # -fPIC
endif()
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} MAXBUTWIDTH=${SLEEFDFT_MAXBUTWIDTH})
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS}
MAXBUTWIDTHDP=${SLEEFDFT_MAXBUTWIDTH} MAXBUTWIDTHSP=${SLEEFDFT_MAXBUTWIDTH}
MINSHIFTDP=${SLEEFDFT_MINSHIFT} MAXSHIFTDP=${SLEEFDFT_MAXSHIFT}
MINSHIFTSP=${SLEEFDFT_MINSHIFT} MAXSHIFTSP=${SLEEFDFT_MAXSHIFT}
)
if (SLEEFDFT_ENABLE_STREAM)
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=1)
@ -236,10 +243,6 @@ else()
set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=0)
endif()
if(COMPILER_SUPPORTS_OPENMP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
endif(COMPILER_SUPPORTS_OPENMP)
# Include directories
@ -269,7 +272,7 @@ endif()
add_custom_command(OUTPUT dispatchparam.h
COMMENT "Generating dispatchparam.h"
COMMAND $<TARGET_FILE:${TARGET_MKDISPATCH}> paramonly ${SLEEFDFT_MAXBUTWIDTH} ${ISALIST_DP} > ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h
COMMAND $<TARGET_FILE:${TARGET_MKDISPATCH}> paramonly ALL ${SLEEFDFT_MAXBUTWIDTH} ${SLEEFDFT_MINSHIFT} ${SLEEFDFT_MAXSHIFT} ${ISALIST_SP} > ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h
DEPENDS ${TARGET_MKDISPATCH}
)
add_custom_target(dispatchparam.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h)
@ -282,49 +285,51 @@ foreach(T ${LIST_SUPPORTED_FPTYPE})
list(GET LISTLONGTYPENAME ${T} LT) # LT is "double"
list(GET LISTTYPEID ${T} ID) # ID is 1
string(CONCAT S "dispatch" ${ST} ".h") # S is dispatchdp.h
string(CONCAT S "dispatch" ${ST} ".hpp") # S is dispatchdp.hpp
add_custom_command(OUTPUT ${S}
COMMENT "Generating ${S}"
COMMAND $<TARGET_FILE:${TARGET_MKDISPATCH}> ${LT} ${SLEEFDFT_MAXBUTWIDTH} ${ISALIST_${CST}} > ${S}
COMMAND $<TARGET_FILE:${TARGET_MKDISPATCH}> ${LT} ${CST} ${SLEEFDFT_MAXBUTWIDTH} ${SLEEFDFT_MINSHIFT} ${SLEEFDFT_MAXSHIFT} ${ISALIST_${CST}} > ${S}
DEPENDS ${TARGET_MKDISPATCH}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
string(CONCAT G ${S} "_generated") # G is dispatchdp.h_generated
string(CONCAT G ${S} "_generated") # G is dispatchdp.hpp_generated
add_custom_target(${G} SOURCES ${S})
endforeach()
# Target dftcommon.o
add_library(dftcommon_obj OBJECT dftcommon.c dftcommon.h ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h ${sleef_BINARY_DIR}/include/sleef.h)
add_library(dftcommon_obj OBJECT dftcommon.cpp dftcommon.hpp ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h ${sleef_BINARY_DIR}/include/sleef.h)
add_dependencies(dftcommon_obj ${TARGET_HEADERS} dispatchparam.h_generated)
set_source_files_properties(${sleef_BINARY_DIR}/include/sleef.h PROPERTIES GENERATED TRUE)
set_target_properties(dftcommon_obj PROPERTIES ${COMMON_TARGET_PROPERTIES})
target_compile_definitions(dftcommon_obj PRIVATE ${COMMON_TARGET_DEFINITIONS})
# Target dft*.o
# Target dft.o
foreach(T ${LIST_SUPPORTED_FPTYPE})
list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example
add_library(dft_obj OBJECT dft.cpp dftcommon.hpp)
add_dependencies(dft_obj "dispatchdp.hpp_generated" "dispatchsp.hpp_generated" dispatchparam.h_generated ${TARGET_HEADERS})
set_target_properties(dft_obj PROPERTIES ${COMMON_TARGET_PROPERTIES})
target_compile_definitions(dft_obj PRIVATE ${COMMON_TARGET_DEFINITIONS})
string(CONCAT G "dft" ${ST} "_obj") # G is "dftdp_obj"
string(CONCAT S "dispatch" ${ST} ".h") # S is "dispatchdp.h"
add_library(${G} OBJECT dft.c dftcommon.h ${S})
string(CONCAT SG ${S} "_generated") # SG is "dispatchdp.h_generated"
add_dependencies(${G} ${SG} ${TARGET_HEADERS})
set_target_properties(${G} PROPERTIES ${COMMON_TARGET_PROPERTIES})
list(GET LISTTYPEID ${T} ID) # ID is 1
target_compile_definitions(${G} PRIVATE BASETYPEID=${ID} ${COMMON_TARGET_DEFINITIONS})
endforeach()
# Copy unroll*.cpp.in to ${CMAKE_CURRENT_BINARY_DIR}
# Copy unroll0.org to ${CMAKE_CURRENT_BINARY_DIR}
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/unroll0.cpp.in
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.cpp.in ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.cpp.in)
add_custom_target(unroll0.cpp.in.copied DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/unroll0.cpp.in)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/unroll0.org
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.org ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.org)
add_custom_target(unroll0.org.copied DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/unroll0.org)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/unroll1.cpp.in
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unroll1.cpp.in ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/unroll1.cpp.in)
add_custom_target(unroll1.cpp.in.copied DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/unroll1.cpp.in)
# Target unroll*.c
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/unroll2.cpp.in
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unroll2.cpp.in ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/unroll2.cpp.in)
add_custom_target(unroll2.cpp.in.copied DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/unroll2.cpp.in)
# Target unroll*.cpp
foreach(T ${LIST_SUPPORTED_FPTYPE})
list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example
@ -333,7 +338,7 @@ foreach(T ${LIST_SUPPORTED_FPTYPE})
foreach(E ${ISALIST_${CST}}) # E is "sse2dp"
foreach(N ${NLIST})
string(CONCAT UC unroll_ ${N} _ ${E} ".c") # UC is "unroll_0_sse2dp.c"
string(CONCAT UC unroll_ ${N} _ ${E} ".cpp") # UC is "unroll_0_sse2dp.cpp"
set(UNROLL_TARGET_${CST} ${UNROLL_TARGET_${CST}} ${UC})
endforeach()
endforeach()
@ -342,11 +347,31 @@ foreach(T ${LIST_SUPPORTED_FPTYPE})
if(UNROLL_TARGET_${CST})
add_custom_command(OUTPUT ${UNROLL_TARGET_${CST}}
COMMENT "Generating ${UNROLL_TARGET_${CST}}"
COMMAND $<TARGET_FILE:${TARGET_MKUNROLL}> ${LT} ${ISALIST_${CST}}
COMMAND $<TARGET_FILE:${TARGET_MKUNROLL}> unroll0.cpp.in ${LT} ${CST} - ${ISALIST_${CST}}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${TARGET_MKUNROLL} unroll0.org.copied
DEPENDS ${TARGET_MKUNROLL} unroll0.cpp.in.copied
)
add_custom_target(unroll_target_${ST} DEPENDS ${UNROLL_TARGET_${CST}})
#
foreach(I ${LISTSHIFTSTR})
foreach(E ${ISALIST_${CST}}) # E is "sse2dp"
foreach(N ${NLIST})
string(CONCAT UC unroll_ ${N} _ ${E} _ ${I} ".cpp") # UC is "unroll_0_sse2dp_1.cpp"
set(UNROLL_TARGET_${CST}_${I} ${UNROLL_TARGET_${CST}_${I}} ${UC})
endforeach()
endforeach()
message(STATUS "Unroll target for ${CST}_${I} : ${UNROLL_TARGET_${CST}_${I}}")
add_custom_command(OUTPUT ${UNROLL_TARGET_${CST}_${I}}
COMMENT "Generating ${UNROLL_TARGET_${CST}_${I}}"
COMMAND $<TARGET_FILE:${TARGET_MKUNROLL}> unroll1.cpp.in ${LT} ${CST} ${I} ${ISALIST_${CST}}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${TARGET_MKUNROLL} unroll1.cpp.in.copied
)
add_custom_target(unroll_target_${ST}_${I} DEPENDS ${UNROLL_TARGET_${CST}_${I}})
endforeach()
endif()
endforeach()
@ -359,43 +384,38 @@ foreach(T ${LIST_SUPPORTED_FPTYPE})
foreach(E ${ISALIST_${CST}}) # E is "sse2dp"
foreach(N ${NLIST})
string(CONCAT U unroll_ ${N} _ ${E}) # U is "unroll_0_sse2dp"
string(CONCAT U unroll_ ${N} _ ${E}) # U is "unroll_0_sse2dp"
string(CONCAT UG ${U} "_obj") # UG is "unroll_0_sse2dp_obj"
string(CONCAT UC ${U} ".c") # UC is "unroll_0_sse2dp.c"
string(CONCAT UC ${U} ".cpp") # UC is "unroll_0_sse2dp.cpp"
add_library(${UG} OBJECT ${UC})
set_target_properties(${UG} PROPERTIES ${COMMON_TARGET_PROPERTIES})
target_include_directories(${UG} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(${UG} PRIVATE ${COMMON_TARGET_DEFINITIONS} ${MACRODEF_${E}})
target_compile_options(${UG} PRIVATE ${CFLAGS_${E}})
add_dependencies(${UG} ${TARGET_HEADERS} unroll_target_${ST})
list(APPEND UNROLL_OBJECTS $<TARGET_OBJECTS:${UG}>)
foreach(I ${LISTSHIFTSTR})
string(CONCAT U unroll_ ${N} _ ${E} _ ${I}) # U is "unroll_0_sse2dp_1"
string(CONCAT UG ${U} "_obj") # UG is "unroll_0_sse2dp_1_obj"
string(CONCAT UC ${U} ".cpp") # UC is "unroll_0_sse2dp_1.cpp"
add_library(${UG} OBJECT ${UC})
set_target_properties(${UG} PROPERTIES ${COMMON_TARGET_PROPERTIES})
target_include_directories(${UG} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(${UG} PRIVATE ${COMMON_TARGET_DEFINITIONS} ${MACRODEF_${E}})
target_compile_options(${UG} PRIVATE ${CFLAGS_${E}})
add_dependencies(${UG} ${TARGET_HEADERS} unroll_target_${ST}_${I})
list(APPEND UNROLL_OBJECTS $<TARGET_OBJECTS:${UG}>)
endforeach()
endforeach()
endforeach()
endforeach()
# Target libdft
add_library(${TARGET_LIBDFT} $<TARGET_OBJECTS:dftcommon_obj> $<TARGET_OBJECTS:${TARGET_LIBARRAYMAP_OBJ}>)
add_library(${TARGET_LIBDFT} $<TARGET_OBJECTS:dftcommon_obj> $<TARGET_OBJECTS:dft_obj> ${UNROLL_OBJECTS})
target_link_libraries(${TARGET_LIBDFT} ${TARGET_LIBSLEEF} ${LIBM})
foreach(T ${LIST_SUPPORTED_FPTYPE})
list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example
string(CONCAT G "dft" ${ST} "_obj") # G is "dftdp_obj"
target_sources(${TARGET_LIBDFT} PRIVATE $<TARGET_OBJECTS:${G}>)
endforeach()
foreach(T ${LIST_SUPPORTED_FPTYPE})
list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example
string(TOUPPER ${ST} CST) # CST is "DP"
foreach(E ${ISALIST_${CST}}) # E is "sse2dp"
foreach(N ${NLIST})
string(CONCAT UG unroll_ ${N} _ ${E} "_obj") # U is "unroll_0_sse2dp_obj"
target_sources(${TARGET_LIBDFT} PRIVATE $<TARGET_OBJECTS:${UG}>)
endforeach()
endforeach()
endforeach()
set_target_properties(${TARGET_LIBDFT} PROPERTIES
VERSION ${SLEEF_VERSION}
SOVERSION ${SLEEF_SOVERSION}

View File

@ -0,0 +1,45 @@
#if !(defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER))
#include <unistd.h>
#include <sys/types.h>
#include <sys/file.h>
#include <signal.h>
#include <setjmp.h>
static void FLOCK(FILE *fp) { flock(fileno(fp), LOCK_EX); }
static void FUNLOCK(FILE *fp) { flock(fileno(fp), LOCK_UN); }
static void FTRUNCATE(FILE *fp, off_t z) {
if (ftruncate(fileno(fp), z))
;
}
static FILE *OPENTMPFILE() { return tmpfile(); }
static void CLOSETMPFILE(FILE *fp) { fclose(fp); }
static sigjmp_buf sigjmp;
#define SETJMP(x) sigsetjmp(x, 1)
#define LONGJMP siglongjmp
#else
#include <Windows.h>
#include <io.h>
#include <signal.h>
#include <setjmp.h>
static void FLOCK(FILE *fp) { }
static void FUNLOCK(FILE *fp) { }
static void FTRUNCATE(FILE *fp, long z) {
fseek(fp, 0, SEEK_SET);
SetEndOfFile((HANDLE)_get_osfhandle(_fileno(fp)));
}
static FILE *OPENTMPFILE() { return fopen("tmpfile.txt", "w+"); }
static void CLOSETMPFILE(FILE *fp) {
fclose(fp);
remove("tmpfile.txt");
}
static jmp_buf sigjmp;
#define SETJMP(x) setjmp(x)
#define LONGJMP longjmp
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,423 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <inttypes.h>
#include <assert.h>
#include <math.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "misc.h"
#include "sleef.h"
#define IMPORT_IS_EXPORT
#include "sleefdft.h"
#include "dispatchparam.h"
#include "dftcommon.h"
#include "common.h"
#include "arraymap.h"
#define MAGIC_FLOAT 0x31415926
#define MAGIC_DOUBLE 0x27182818
#define MAGIC2D_FLOAT 0x22360679
#define MAGIC2D_DOUBLE 0x17320508
const char *configStr[] = { "ST", "ST stream", "MT", "MT stream" };
static int parsePathStr(char *p, int *path, int *config, int pathLenMax, int log2len) {
int pathLen = 0, l2l = 0;
for(;;) {
while(*p == ' ') p++;
if (*p == '\0') break;
if (!isdigit((int)*p)) return -1;
pathLen++;
if (pathLen >= pathLenMax) return -2;
int n = 0;
while(isdigit((int)*p)) n = n * 10 + *p++ - '0';
if (n > MAXBUTWIDTH) return -6;
path[pathLen-1] = n;
l2l += n;
config[pathLen-1] = 0;
if (*p != '(') continue;
int c;
for(c=3;c>=0;c--) if (strncmp(p+1, configStr[c], strlen(configStr[c])) == 0) break;
if (c == -1) return -3;
p += strlen(configStr[c]) + 1;
if (*p != ')') return -4;
p++;
config[pathLen-1] = c;
}
if (l2l != log2len) return -5;
return pathLen;
}
EXPORT void SleefDFT_setPath(SleefDFT *p, char *pathStr) {
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));
int path[32], config[32];
int pathLen = parsePathStr(pathStr, path, config, 31, p->log2len);
if (pathLen < 0) {
if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("Error %d in parsing path string : %s\n", pathLen, pathStr);
return;
}
for(uint32_t j = 0;j <= p->log2len;j++) p->bestPath[j] = 0;
for(int level = p->log2len, j=0;level > 0 && j < pathLen;) {
p->bestPath[level] = path[j];
p->bestPathConfig[level] = config[j];
level -= path[j];
j++;
}
p->pathLen = 0;
for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++;
if ((p->mode & SLEEF_MODE_VERBOSE) != 0) {
printf("Set path : ");
for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]);
printf("\n");
}
}
void freeTables(SleefDFT *p) {
for(int N=1;N<=MAXBUTWIDTH;N++) {
for(uint32_t level=N;level<=p->log2len;level++) {
Sleef_free(p->tbl[N][level]);
}
free(p->tbl[N]);
p->tbl[N] = NULL;
}
}
EXPORT void SleefDFT_dispose(SleefDFT *p) {
if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE)) {
Sleef_free(p->tBuf);
SleefDFT_dispose(p->instH);
if (p->hlen != p->vlen) SleefDFT_dispose(p->instV);
p->magic = 0;
free(p);
return;
}
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));
if (p->log2len <= 1) {
p->magic = 0;
free(p);
return;
}
if ((p->mode & SLEEF_MODE_REAL) != 0) {
Sleef_free(p->rtCoef1);
Sleef_free(p->rtCoef0);
p->rtCoef0 = p->rtCoef1 = NULL;
}
for(int level = p->log2len;level >= 1;level--) {
Sleef_free(p->perm[level]);
}
free(p->perm);
p->perm = NULL;
freeTables(p);
p->magic = 0;
free(p);
}
uint32_t ilog2(uint32_t q) {
static const uint32_t tab[] = {0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4};
uint32_t r = 0,qq;
if (q & 0xffff0000) r = 16;
q >>= r;
qq = q | (q >> 1);
qq |= (qq >> 2);
qq = ((qq & 0x10) >> 4) | ((qq & 0x100) >> 7) | ((qq & 0x1000) >> 10);
return r + tab[qq] * 4 + tab[q >> (tab[qq] * 4)] - 1;
}
//
char *dftPlanFilePath = NULL;
char *archID = NULL;
uint64_t planMode = SLEEF_PLAN_REFERTOENVVAR;
ArrayMap *planMap = NULL;
int planFilePathSet = 0, planFileLoaded = 0;
#ifdef _OPENMP
omp_lock_t planMapLock;
int planMapLockInitialized = 0;
#endif
static void initPlanMapLock() {
#ifdef _OPENMP
#pragma omp critical
{
if (!planMapLockInitialized) {
planMapLockInitialized = 1;
omp_init_lock(&planMapLock);
}
}
#endif
}
static void planMap_clear() {
if (planMap != NULL) ArrayMap_dispose(planMap);
planMap = NULL;
}
EXPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode) {
initPlanMapLock();
if ((mode & SLEEF_PLAN_RESET) != 0) {
planMap_clear();
planFileLoaded = 0;
planFilePathSet = 0;
}
if (dftPlanFilePath != NULL) free(dftPlanFilePath);
if (path != NULL) {
dftPlanFilePath = malloc(strlen(path)+10);
strcpy(dftPlanFilePath, path);
} else {
dftPlanFilePath = NULL;
}
if (archID != NULL) free(archID);
if (arch == NULL) arch = Sleef_getCpuIdString();
archID = malloc(strlen(arch)+10);
strcpy(archID, arch);
planMode = mode;
planFilePathSet = 1;
}
static void loadPlanFromFile() {
if (planFilePathSet == 0 && (planMode & SLEEF_PLAN_REFERTOENVVAR) != 0) {
char *s = getenv(ENVVAR);
if (s != NULL) SleefDFT_setPlanFilePath(s, NULL, planMode);
}
if (planMap != NULL) ArrayMap_dispose(planMap);
if (dftPlanFilePath != NULL && (planMode & SLEEF_PLAN_RESET) == 0) {
planMap = ArrayMap_load(dftPlanFilePath, archID, PLANFILEID, (planMode & SLEEF_PLAN_NOLOCK) == 0);
}
if (planMap == NULL) planMap = initArrayMap();
planFileLoaded = 1;
}
static void savePlanToFile() {
assert(planFileLoaded);
if ((planMode & SLEEF_PLAN_READONLY) == 0 && dftPlanFilePath != NULL) {
ArrayMap_save(planMap, dftPlanFilePath, archID, PLANFILEID);
}
}
#define CATBIT 8
#define BASETYPEIDBIT 2
#define LOG2LENBIT 8
#define DIRBIT 1
#define BUTSTATBIT 16
static uint64_t keyButStat(int baseTypeID, int log2len, int dir, int butStat) {
dir = (dir & SLEEF_MODE_BACKWARD) == 0;
int cat = 0;
uint64_t k = 0;
k = (k << BUTSTATBIT) | (butStat & ~(~(uint64_t)0 << BUTSTATBIT));
k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT));
k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT));
return k;
}
#define LEVELBIT LOG2LENBIT
#define BUTCONFIGBIT 8
#define TRANSCONFIGBIT 8
static uint64_t keyTrans(int baseTypeID, int hlen, int vlen, int transConfig) {
int max = MAX(hlen, vlen), min = MIN(hlen, vlen);
int cat = 2;
uint64_t k = 0;
k = (k << TRANSCONFIGBIT) | (transConfig & ~(~(uint64_t)0 << TRANSCONFIGBIT));
k = (k << LOG2LENBIT) | (max & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << LOG2LENBIT) | (min & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT));
k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT));
return k;
}
static uint64_t keyPath(int baseTypeID, int log2len, int dir, int level, int config) {
dir = (dir & SLEEF_MODE_BACKWARD) == 0;
int cat = 3;
uint64_t k = 0;
k = (k << BUTCONFIGBIT) | (config & ~(~(uint64_t)0 << BUTCONFIGBIT));
k = (k << LEVELBIT) | (level & ~(~(uint64_t)0 << LEVELBIT));
k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT));
k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT));
return k;
}
static uint64_t keyPathConfig(int baseTypeID, int log2len, int dir, int level, int config) {
dir = (dir & SLEEF_MODE_BACKWARD) == 0;
int cat = 4;
uint64_t k = 0;
k = (k << BUTCONFIGBIT) | (config & ~(~(uint64_t)0 << BUTCONFIGBIT));
k = (k << LEVELBIT) | (level & ~(~(uint64_t)0 << LEVELBIT));
k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT));
k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT));
k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT));
return k;
}
static uint64_t planMap_getU64(uint64_t key) {
char *s = ArrayMap_get(planMap, key);
if (s == NULL) return 0;
uint64_t ret;
if (sscanf(s, "%" SCNx64, &ret) != 1) return 0;
return ret;
}
static void planMap_putU64(uint64_t key, uint64_t value) {
char *s = malloc(100);
sprintf(s, "%" PRIx64, value);
s = ArrayMap_put(planMap, key, s);
if (s != NULL) free(s);
}
int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) {
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));
initPlanMapLock();
#ifdef _OPENMP
omp_set_lock(&planMapLock);
#endif
if (!planFileLoaded) loadPlanFromFile();
int stat = planMap_getU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10));
if (stat == 0) {
#ifdef _OPENMP
omp_unset_lock(&planMapLock);
#endif
return 0;
}
int ret = 1;
for(int j = p->log2len;j >= 0;j--) {
p->bestPath[j] = planMap_getU64(keyPath(p->baseTypeID, p->log2len, p->mode, j, pathCat));
p->bestPathConfig[j] = planMap_getU64(keyPathConfig(p->baseTypeID, p->log2len, p->mode, j, pathCat));
if (p->bestPath[j] > MAXBUTWIDTH) ret = 0;
}
p->pathLen = 0;
for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++;
#ifdef _OPENMP
omp_unset_lock(&planMapLock);
#endif
return ret;
}
void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) {
assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE));
initPlanMapLock();
#ifdef _OPENMP
omp_set_lock(&planMapLock);
#endif
if (!planFileLoaded) loadPlanFromFile();
if (planMap_getU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10)) != 0) {
#ifdef _OPENMP
omp_unset_lock(&planMapLock);
#endif
return;
}
for(int j = p->log2len;j >= 0;j--) {
planMap_putU64(keyPath(p->baseTypeID, p->log2len, p->mode, j, pathCat), p->bestPath[j]);
planMap_putU64(keyPathConfig(p->baseTypeID, p->log2len, p->mode, j, pathCat), p->bestPathConfig[j]);
}
planMap_putU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10), 1);
if ((planMode & SLEEF_PLAN_READONLY) == 0) savePlanToFile();
#ifdef _OPENMP
omp_unset_lock(&planMapLock);
#endif
}
int PlanManager_loadMeasurementResultsT(SleefDFT *p) {
assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE));
initPlanMapLock();
#ifdef _OPENMP
omp_set_lock(&planMapLock);
#endif
if (!planFileLoaded) loadPlanFromFile();
p->tmNoMT = planMap_getU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 0));
p->tmMT = planMap_getU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 1));
#ifdef _OPENMP
omp_unset_lock(&planMapLock);
#endif
return p->tmNoMT != 0;
}
void PlanManager_saveMeasurementResultsT(SleefDFT *p) {
assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE));
initPlanMapLock();
#ifdef _OPENMP
omp_set_lock(&planMapLock);
#endif
if (!planFileLoaded) loadPlanFromFile();
planMap_putU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 0), p->tmNoMT);
planMap_putU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 1), p->tmMT );
if ((planMode & SLEEF_PLAN_READONLY) == 0) savePlanToFile();
#ifdef _OPENMP
omp_unset_lock(&planMapLock);
#endif
}

View File

@ -0,0 +1,517 @@
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include <cctype>
#include <cinttypes>
#include <cassert>
#include <cmath>
#include <omp.h>
#include <vector>
#include "compat.h"
#include "misc.h"
#include "sleef.h"
#define IMPORT_IS_EXPORT
#include "sleefdft.h"
#include "dftcommon.hpp"
#include "common.h"
#include "serializer.hpp"
const char *configStr[] = { "ST", "ST stream", "MT", "MT stream" };
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
vector<Action> SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::parsePathStr(const char *p) {
vector<Action> v;
int level = log2len;
for(;;) {
while(isspace((int)*p)) p++;
if (*p == '\0') break;
if (!isdigit((int)*p)) throw(runtime_error("Unexpected character"));
int N = 0;
while(isdigit((int)*p)) N = N * 10 + *p++ - '0';
if (N > MAXBUTWIDTHALL) throw(runtime_error("N too large"));
if (N > level) throw(runtime_error("N larger than level"));
int config = 0;
if (*p == '(') {
p++;
for(config=3;config>=0;config--) {
if (strncmp(p, configStr[config], strlen(configStr[config])) == 0) break;
}
if (config == -1) throw(runtime_error("Unknown config"));
p += strlen(configStr[config]);
if (*p++ != ')') throw(runtime_error("No ')' after config"));
}
v.push_back(Action(config, level, N));
level -= N;
}
if (level != 0) throw(runtime_error("Sum of N less than level"));
return v;
}
static string to_string(vector<Action> v) {
string s = "";
for(auto e : v) {
string c = "? " + to_string(e.config);
if (0 <= e.config && e.config < 4) c = configStr[e.config];
s += to_string(e.N) + "(" + c + ") ";
}
return s;
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
void SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::setPath(const char *pathStr) {
assert(magic == MAGIC_FLOAT || magic == MAGIC_DOUBLE);
try {
bestPath = parsePathStr(pathStr);
if ((mode & SLEEF_MODE_VERBOSE) != 0) fprintf(verboseFP, "Set path : %s\n", to_string(bestPath).c_str());
} catch(exception &ex) {
if ((mode & SLEEF_MODE_VERBOSE) != 0) fprintf(verboseFP, "Parse error : %s\n", ex.what());
}
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
void SleefDFT2DXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::setPath(const char *pathStr) {
assert(magic == MAGIC2D_FLOAT || magic == MAGIC2D_DOUBLE);
int planMT_ = 0;
if (sscanf(pathStr, "%d", &planMT_) != 1) return;
planMT = planMT_;
string pathH = pathStr;
size_t cpos = pathH.find_first_of(':');
if (cpos == string::npos) return;
pathH = pathH.substr(cpos + 1);
cpos = pathH.find_first_of(',');
if (cpos == string::npos) return;
string pathV = pathH.substr(cpos+1);
pathH = pathH.substr(0, cpos);
instH->setPath(pathH.c_str());
instV->setPath(pathV.c_str());
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
string SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::getPath() {
assert(magic == MAGIC_FLOAT || magic == MAGIC_DOUBLE);
return to_string(bestPath);
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
string SleefDFT2DXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::getPath() {
assert(magic == MAGIC2D_FLOAT || magic == MAGIC2D_DOUBLE);
return to_string((int)planMT) + ":" +
instH->getPath() + "," + instV->getPath();
}
EXPORT void SleefDFT_setPath(SleefDFT *p, char *pathStr) {
assert(p != NULL);
switch(p->magic) {
case MAGIC_DOUBLE:
p->double_->setPath(pathStr);
break;
case MAGIC_FLOAT:
p->float_->setPath(pathStr);
break;
case MAGIC2D_DOUBLE:
p->double2d_->setPath(pathStr);
break;
case MAGIC2D_FLOAT:
p->float2d_->setPath(pathStr);
break;
default: abort();
}
}
EXPORT int SleefDFT_getPath(SleefDFT *p, char *pathStr, int pathStrSize) {
assert(p != NULL);
string str;
switch(p->magic) {
case MAGIC_DOUBLE:
str = p->double_->getPath();
break;
case MAGIC_FLOAT:
str = p->float_->getPath();
break;
case MAGIC2D_DOUBLE:
str = p->double2d_->getPath();
break;
case MAGIC2D_FLOAT:
str = p->float2d_->getPath();
break;
default: abort();
}
strncpy(pathStr, str.c_str(), pathStrSize);
return pathStrSize == 0 ? 0 : strlen(pathStr);
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
void SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::freeTables() {
for(int N=1;N<=MAXBUTWIDTH;N++) {
for(uint32_t level=N;level<=log2len;level++) {
Sleef_free(tbl[N][level]);
tbl[N][level] = nullptr;
}
free(tbl[N]);
tbl[N] = NULL;
}
for(int i=0;i<nThread;i++) {
Sleef_free(x1[i]);
x1[i] = nullptr;
Sleef_free(x0[i]);
x0[i] = nullptr;
}
free(x1);
x1 = nullptr;
free(x0);
x0 = nullptr;
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::~SleefDFTXX() {
assert(magic == MAGIC_FLOAT || magic == MAGIC_DOUBLE);
if (log2len <= 1) {
magic = 0;
return;
}
if ((mode & SLEEF_MODE_REAL) != 0) {
Sleef_free(rtCoef1);
rtCoef1 = nullptr;
Sleef_free(rtCoef0);
rtCoef0 = nullptr;
}
for(int level = log2len;level >= 1;level--) {
Sleef_free(perm[level]);
perm[level] = nullptr;
}
free(perm);
perm = NULL;
freeTables();
magic = 0;
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
SleefDFT2DXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::~SleefDFT2DXX() {
assert(magic == MAGIC2D_FLOAT || magic == MAGIC2D_DOUBLE);
Sleef_free(tBuf);
tBuf = nullptr;
delete instH;
instH = nullptr;
if (hlen != vlen) {
delete instV;
instV = nullptr;
}
magic = 0;
}
EXPORT void SleefDFT_dispose(SleefDFT *p) {
assert(p != NULL);
switch(p->magic) {
case MAGIC_DOUBLE:
delete p->double_;
p->magic = 0;
p->double_ = nullptr;
free(p);
break;
case MAGIC2D_DOUBLE:
delete p->double2d_;
p->magic = 0;
p->double_ = nullptr;
free(p);
break;
case MAGIC_FLOAT:
delete p->float_;
p->magic = 0;
p->float_ = nullptr;
free(p);
break;
case MAGIC2D_FLOAT:
delete p->float2d_;
p->magic = 0;
p->float_ = nullptr;
free(p);
break;
default: abort();
}
}
// PlanManager
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
string SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::planKeyString(string suffix) {
string s;
s += baseTypeID == 1 ? "D" : "S";
s += (mode & SLEEF_MODE_REAL) ? "r" : "c";
s += (mode & SLEEF_MODE_BACKWARD) ? "b" : "f";
s += (mode & SLEEF_MODE_ALT) ? "o" : "w";
s += (mode & SLEEF_MODE_NO_MT) ? "s" : "m";
s += to_string(log2len) + "," + "0";
if (suffix != "") s += ":" + suffix;
return s;
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
string SleefDFT2DXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::planKeyString(string suffix) {
string s;
s += baseTypeID == 1 ? "D" : "S";
s += (mode & SLEEF_MODE_REAL) ? "r" : "c";
s += (mode & SLEEF_MODE_BACKWARD) ? "b" : "f";
s += (mode & SLEEF_MODE_ALT) ? "o" : "w";
s += (mode & SLEEF_MODE_NO_MT) ? "s" : "m";
s += to_string(log2hlen) + "," + to_string(log2vlen);
if (suffix != "") s += ":" + suffix;
return s;
}
static string getPlanIdPrefix() {
string s;
#ifdef ENABLE_STREAM
s += "s";
#else
s += "n";
#endif
s += to_string(CONFIGMAX) + ",";
s += to_string(ISAMAX) + ",";
s += to_string(MAXBUTWIDTHDP) + ",";
s += to_string(MAXBUTWIDTHSP) + ",";
s += to_string(MINSHIFTDP) + ",";
s += to_string(MAXSHIFTDP) + ",";
s += to_string(MINSHIFTSP) + ",";
s += to_string(MAXSHIFTSP) + ":";
return s;
}
PlanManager::PlanManager() {
planID = getPlanIdPrefix() + Sleef_getCpuIdString();
}
void PlanManager::setPlanFilePath(const char *path, const char *arch, uint64_t mode) {
planMode_ = mode;
dftPlanFilePath = "";
if (path != NULL) dftPlanFilePath = path;
planID = Sleef_getCpuIdString();
if (arch != NULL) planID = arch;
planID = getPlanIdPrefix() + planID;
if ((mode & SLEEF_PLAN_RESET) != 0) std::get<0>(thePlan)[planID].clear();
}
void PlanManager::loadPlanFromFile() {
if ((planMode_ & SLEEF_PLAN_REFERTOENVVAR) != 0) {
char *s = std::getenv(ENVVAR);
if (s != NULL) SleefDFT_setPlanFilePath(s, NULL, planMode_);
}
if (dftPlanFilePath != "") {
FILE *fp = fopen(dftPlanFilePath.c_str(), "rb");
if (fp) {
if (!(planMode_ & SLEEF_PLAN_NOLOCK)) FLOCK(fp);
FileDeserializer d(fp);
tuple<unordered_map<string, unordered_map<string, string>>, string> plan;
try {
d >> plan;
} catch(exception &ex) {}
if (!(planMode_ & SLEEF_PLAN_NOLOCK)) FUNLOCK(fp);
fclose(fp);
if (std::get<1>(plan) == PLANFILEID) thePlan = plan;
}
}
}
bool PlanManager::savePlanToFile(const string &fn) {
if (fn != "") {
FILE *fp = fopen(fn.c_str(), "wb");
if (fp) {
FLOCK(fp);
FileSerializer s(fp);
std::get<1>(thePlan) = PLANFILEID;
s << thePlan;
FUNLOCK(fp);
fclose(fp);
return true;
}
}
return false;
}
bool PlanManager::savePlanToFile() {
if ((planMode_ & SLEEF_PLAN_READONLY) != 0) return false;
return savePlanToFile(dftPlanFilePath);
}
bool PlanManager::loadAndPutToFile(const string& key, const string& value) {
if ((planMode_ & SLEEF_PLAN_REFERTOENVVAR) != 0) {
char *s = std::getenv(ENVVAR);
if (s != NULL) SleefDFT_setPlanFilePath(s, NULL, planMode_);
}
if (dftPlanFilePath != "") {
FILE *fp = fopen(dftPlanFilePath.c_str(), "r+b");
if (!fp) fp = fopen(dftPlanFilePath.c_str(), "w+b");
if (fp) {
if (!(planMode_ & SLEEF_PLAN_NOLOCK)) FLOCK(fp);
fseek(fp, 0, SEEK_END);
if (ftell(fp) != 0) {
fseek(fp, 0, SEEK_SET);
FileDeserializer d(fp);
tuple<unordered_map<string, unordered_map<string, string>>, string> plan;
try {
d >> plan;
} catch(exception &ex) {}
if (std::get<1>(plan) == PLANFILEID) thePlan = plan;
}
std::get<0>(thePlan)[planID][key] = value;
std::get<1>(thePlan) = PLANFILEID;
fseek(fp, 0, SEEK_SET);
FileSerializer s(fp);
s << thePlan;
if (!(planMode_ & SLEEF_PLAN_NOLOCK)) FUNLOCK(fp);
fclose(fp);
return true;
}
}
return false;
}
EXPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode) {
planManager.setPlanFilePath(path, arch, mode);
}
EXPORT int SleefDFT_savePlan(const char *pathStr) {
return (int)planManager.savePlanToFile(pathStr);
}
string PlanManager::get(const string& key) {
if (std::get<0>(thePlan)[planID].count(key) == 0) return "";
return std::get<0>(thePlan)[planID].at(key);
}
void PlanManager::put(const string& key, const string& value) {
std::get<0>(thePlan)[planID][key] = value;
}
//
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
void SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::saveMeasurementResults() {
assert(magic == MAGIC_FLOAT || magic == MAGIC_DOUBLE);
unique_lock<recursive_mutex> lock(planManager.mtx);
if ((planManager.planMode() & SLEEF_PLAN_AUTOMATIC) != 0) {
if (planManager.loadAndPutToFile(planKeyString(), getPath()) && (mode & SLEEF_MODE_VERBOSE) != 0) {
fprintf(verboseFP, "Saving plan to file\n");
}
} else {
planManager.put(planKeyString(), getPath());
}
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
void SleefDFT2DXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::saveMeasurementResults() {
assert(magic == MAGIC2D_FLOAT || magic == MAGIC2D_DOUBLE);
unique_lock<recursive_mutex> lock(planManager.mtx);
if ((planManager.planMode() & SLEEF_PLAN_AUTOMATIC) != 0) {
if (planManager.loadAndPutToFile(planKeyString(), getPath()) && (mode & SLEEF_MODE_VERBOSE) != 0) {
fprintf(verboseFP, "Saving plan to file\n");
}
} else {
planManager.put(planKeyString(), getPath());
}
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
bool SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::loadMeasurementResults() {
assert(magic == MAGIC_FLOAT || magic == MAGIC_DOUBLE);
unique_lock<recursive_mutex> lock(planManager.mtx);
planManager.loadPlanFromFile();
string path = planManager.get(planKeyString());
if (path == "") return false;
setPath(path.c_str());
return true;
}
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
bool SleefDFT2DXX<real, real2, MAXSHIFT, MAXBUTWIDTH>::loadMeasurementResults() {
assert(magic == MAGIC2D_FLOAT || magic == MAGIC2D_DOUBLE);
unique_lock<recursive_mutex> lock(planManager.mtx);
planManager.loadPlanFromFile();
string path = planManager.get(planKeyString());
if (path == "") return false;
setPath(path.c_str());
return true;
}
// Instantiation
template void SleefDFTXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::freeTables();
template void SleefDFTXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::freeTables();
template SleefDFTXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::~SleefDFTXX();
template SleefDFTXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::~SleefDFTXX();
template SleefDFT2DXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::~SleefDFT2DXX();
template SleefDFT2DXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::~SleefDFT2DXX();
template bool SleefDFTXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::loadMeasurementResults();
template bool SleefDFTXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::loadMeasurementResults();
template void SleefDFTXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::saveMeasurementResults();
template void SleefDFTXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::saveMeasurementResults();
template bool SleefDFT2DXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::loadMeasurementResults();
template bool SleefDFT2DXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::loadMeasurementResults();
template void SleefDFT2DXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP>::saveMeasurementResults();
template void SleefDFT2DXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP>::saveMeasurementResults();
PlanManager planManager;
FILE *defaultVerboseFP = stdout;
EXPORT void SleefDFT_setDefaultVerboseFP(FILE *fp) {
defaultVerboseFP = fp;
}

View File

@ -1,69 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#define CONFIGMAX 4
#define CONFIG_STREAM 1
#define CONFIG_MT 2
#define MAXLOG2LEN 32
typedef struct SleefDFT {
uint32_t magic;
uint64_t mode, mode2, mode3;
int baseTypeID;
const void *in;
void *out;
union {
struct {
uint32_t log2len;
void **tbl[MAXBUTWIDTH+1];
void *rtCoef0, *rtCoef1;
uint32_t **perm;
void **x0, **x1;
int isa;
int planMode;
int vecwidth, log2vecwidth;
int nThread;
uint64_t tm[CONFIGMAX][(MAXBUTWIDTH+1)*32];
uint64_t bestTime;
int16_t bestPath[32], bestPathConfig[32], pathLen;
};
struct {
int32_t hlen, vlen;
int32_t log2hlen, log2vlen;
uint64_t tmNoMT, tmMT;
struct SleefDFT *instH, *instV;
void *tBuf;
};
};
} SleefDFT;
#define SLEEF_MODE2_MT1D (1 << 0)
#define SLEEF_MODE3_MT2D (1 << 0)
#define PLANFILEID "SLEEFDFT0\n"
#define ENVVAR "SLEEFDFTPLAN"
#define SLEEF_MODE_MEASUREBITS (3 << 20)
void freeTables(SleefDFT *p);
uint32_t ilog2(uint32_t q);
//int PlanManager_loadMeasurementResultsB(SleefDFT *p);
//void PlanManager_saveMeasurementResultsB(SleefDFT *p, int butStat);
int PlanManager_loadMeasurementResultsT(SleefDFT *p);
void PlanManager_saveMeasurementResultsT(SleefDFT *p);
int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat);
void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat);
#define GETINT_VECWIDTH 100
#define GETINT_DFTPRIORITY 101

View File

@ -0,0 +1,237 @@
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <string>
#include <vector>
#include <climits>
#include <unordered_map>
#include <tuple>
#include <utility>
#include <mutex>
using namespace std;
#include "dispatchparam.h"
#define MAGIC_FLOAT 0x31415926
#define MAGIC_DOUBLE 0x27182818
#define MAGIC2D_FLOAT 0x53589793
#define MAGIC2D_DOUBLE 0x28459045
#define CONFIG_STREAM 1
#define CONFIG_MT 2
#define SLEEF_MODE2_MT1D (1 << 0)
#define SLEEF_MODE3_MT2D (1 << 0)
#define PLANFILEID "SLEEFDFT1"
#define ENVVAR "SLEEFDFTPLAN"
#define SLEEF_MODE_MEASUREBITS (7 << 20)
#define SLEEF_MODE_INTERNAL_2D (1ULL << 40)
#define GETINT_VECWIDTH 100
#define GETINT_DFTPRIORITY 101
#define MAXLOG2LEN 32
#define INFINITY_ (1e+300 * 1e+300)
class Action {
public:
int config, level, N;
Action(const Action& a) = default;
Action(int config_, int level_, int N_) : config(config_), level(level_), N(N_) {}
bool operator==(const Action& rhs) const {
return config == rhs.config && level == rhs.level && N == rhs.N;
}
bool operator!=(const Action& rhs) const { return !(*this == rhs); }
friend ostream& operator<<(ostream &os, const Action &ac) {
return os << "[" << ac.config << ", " << ac.level << ", " << ac.N << "]";
}
};
template <>
struct std::hash<Action> {
size_t operator()(const Action &a) const {
size_t u = 0;
u ^= a.config;
u = (u << 7) | (u >> ((sizeof(u)*8)-7));
u ^= a.level;
u = (u << 7) | (u >> ((sizeof(u)*8)-7));
u ^= a.N;
return u;
}
};
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
struct SleefDFTXX {
int magic;
const int baseTypeID;
const real * const in;
real * const out;
const int nThread;
const uint32_t log2len;
const uint64_t mode;
const int minshift;
uint64_t mode2 = 0, mode3 = 0;
//
real **tbl[MAXBUTWIDTH+1];
real *rtCoef0, *rtCoef1;
uint32_t **perm;
real **x0, **x1;
int isa = 0;
int planMode = 0;
int vecwidth, log2vecwidth;
bool executable[CONFIGMAX][MAXLOG2LEN][MAXLOG2LEN];
vector<Action> bestPath;
FILE *verboseFP = NULL;
void (*(* const DFTF)[ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int);
void (*(* const DFTB)[ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int);
void (*(* const TBUTF)[ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int);
void (*(* const TBUTB)[ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int);
void (*(* const BUTF)[ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int);
void (*(* const BUTB)[ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int);
void (** const REALSUB0)(real *, const real *, const int, const real *, const real *);
void (** const REALSUB1)(real *, const real *, const int, const real *, const real *, const int);
void (*(* const TBUTFS)[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const real *, const int);
void (*(* const TBUTBS)[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const real *, const int);
SleefDFTXX(uint32_t n, const real *in, real *out, uint64_t mode, const char *baseTypeString, int BASETYPEID_, int MAGIC_, int minshift_,
int (*GETINT_[16])(int), const void *(*GETPTR_[16])(int), real2 (*SINCOSPI_)(real),
void (*DFTF_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int),
void (*DFTB_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int),
void (*TBUTF_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int),
void (*TBUTB_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int),
void (*BUTF_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int),
void (*BUTB_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int),
void (*REALSUB0_[ISAMAX])(real *, const real *, const int, const real *, const real *),
void (*REALSUB1_[ISAMAX])(real *, const real *, const int, const real *, const real *, const int),
void (*TBUTFS_[MAXSHIFT][CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const real *, const int),
void (*TBUTBS_[MAXSHIFT][CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const real *, const int)
);
~SleefDFTXX();
void dispatch(const int N, real *d, const real *s, const int level, const int config);
void execute(const real *s0, real *d0, int MAGIC_, int MAGIC2D_);
void freeTables();
void generatePerm(const vector<Action> &);
void measurementRun(real *d, const real *s, const vector<Action> &path, uint64_t niter);
double measurePath(const vector<Action> &path, uint64_t minTime);
void searchForBestPath(int nPaths);
void searchForRandomPath();
bool measure(bool randomize);
vector<Action> parsePathStr(const char *);
string planKeyString(string = "");
bool loadMeasurementResults();
void saveMeasurementResults();
void setPath(const char *pathStr);
string getPath();
};
template<typename real, typename real2, int MAXSHIFT, int MAXBUTWIDTH>
struct SleefDFT2DXX {
int magic;
uint64_t mode, mode2, mode3;
int baseTypeID;
const real *in;
real *out;
//
int32_t hlen, vlen;
int32_t log2hlen, log2vlen;
bool planMT;
real *tBuf;
SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH> *instH, *instV;
FILE *verboseFP = NULL;
SleefDFT2DXX(uint32_t vlen, uint32_t hlen, const real *in, real *out, uint64_t mode, const char *baseTypeString,
int BASETYPEID_, int MAGIC_, int MAGIC2D_, int minshift_,
int (*GETINT_[16])(int), const void *(*GETPTR_[16])(int), real2 (*SINCOSPI_)(real),
void (*DFTF_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int),
void (*DFTB_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int),
void (*TBUTF_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int),
void (*TBUTB_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int),
void (*BUTF_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int),
void (*BUTB_[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int),
void (*REALSUB0_[ISAMAX])(real *, const real *, const int, const real *, const real *),
void (*REALSUB1_[ISAMAX])(real *, const real *, const int, const real *, const real *, const int),
void (*TBUTFS_[MAXSHIFT][CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const real *, const int),
void (*TBUTBS_[MAXSHIFT][CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const real *, const int)
);
~SleefDFT2DXX();
void execute(const real *s0, real *d0, int MAGIC_, int MAGIC2D_);
pair<uint64_t, uint64_t> measureTranspose();
double measurePath(SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH> *inst, bool mt,
const vector<Action> &path, uint32_t hlen, uint32_t vlen, uint64_t minTime);
pair<vector<Action>, double> searchForBestPath(SleefDFTXX<real, real2, MAXSHIFT, MAXBUTWIDTH> *inst, bool mt, uint32_t hlen, uint32_t vlen, int nPaths);
string planKeyString(string = "");
bool loadMeasurementResults();
void saveMeasurementResults();
void setPath(const char *pathStr);
string getPath();
};
struct SleefDFT {
uint32_t magic;
union {
SleefDFTXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP> *double_;
SleefDFTXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP> *float_;
SleefDFT2DXX<double, Sleef_double2, MAXSHIFTDP, MAXBUTWIDTHDP> *double2d_;
SleefDFT2DXX<float, Sleef_float2, MAXSHIFTSP, MAXBUTWIDTHSP> *float2d_;
};
};
class PlanManager {
string dftPlanFilePath;
uint64_t planMode_ = SLEEF_PLAN_REFERTOENVVAR;
string planID;
tuple<unordered_map<string, unordered_map<string, string>>, string> thePlan;
public:
PlanManager();
recursive_mutex mtx;
uint64_t planMode() { return planMode_; }
void setPlanFilePath(const char *path, const char *arch, uint64_t mode);
void loadPlanFromFile();
bool savePlanToFile(const string &fn);
bool savePlanToFile();
bool loadAndPutToFile(const string& key, const string& value);
string get(const string& key);
void put(const string& key, const string& value);
};
extern PlanManager planManager;
extern FILE *defaultVerboseFP;

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -14,13 +14,16 @@
int main(int argc, char **argv) {
if (argc < 3) {
fprintf(stderr, "Usage : %s <basetype> <unrollmax> <unrollmax2> <maxbutwidth> <isa> ...\n", argv[0]);
fprintf(stderr, "Usage : %s <base type> <base type ID> <maxbutwidth> <minshift> <maxshift> <isa> ...\n", argv[0]);
exit(-1);
}
const char *basetype = argv[1];
const int maxbutwidth = atoi(argv[2]);
const int isastart = 3;
const char *baseType = argv[1];
const char *baseTypeID = argv[2];
const int maxbutwidth = atoi(argv[3]);
const int minshift = atoi(argv[4]);
const int maxshift = atoi(argv[5]);
const int isastart = 6;
const int isamax = argc - isastart;
#if ENABLE_STREAM == 1
@ -29,13 +32,14 @@ int main(int argc, char **argv) {
const int enable_stream = 0;
#endif
printf("#define MAXBUTWIDTH %d\n", maxbutwidth);
printf("#define MAXBUTWIDTH%s %d\n", baseTypeID, maxbutwidth);
printf("#define MINSHIFT%s %d\n", baseTypeID, minshift);
printf("#define MAXSHIFT%s %d\n", baseTypeID, maxshift);
printf("#define CONFIGMAX 4\n");
printf("#define ISAMAX %d\n", isamax);
printf("\n");
if (strcmp(basetype, "paramonly") == 0) exit(0);
printf("#define ISAMAX %d\n", isamax);
printf("#define CONFIGMAX 4\n");
if (strcmp(baseType, "paramonly") == 0) exit(0);
for(int k=isastart;k<argc;k++) {
for(int config=0;config<4;config++) {
@ -43,23 +47,35 @@ int main(int argc, char **argv) {
if ((config & 1) != 0) continue;
#endif
for(int j=1;j<=maxbutwidth;j++) {
printf("void dft%df_%d_%s(real *, const real *, const int);\n", 1 << j, config, argv[k]);
printf("void dft%db_%d_%s(real *, const real *, const int);\n", 1 << j, config, argv[k]);
printf("void tbut%df_%d_%s(real *, uint32_t *, const real *, const int, const real *, const int);\n", 1 << j, config, argv[k]);
printf("void tbut%db_%d_%s(real *, uint32_t *, const real *, const int, const real *, const int);\n", 1 << j, config, argv[k]);
printf("void but%df_%d_%s(real *, uint32_t *, const int, const real *, const int, const real *, const int);\n", 1 << j, config, argv[k]);
printf("void but%db_%d_%s(real *, uint32_t *, const int, const real *, const int, const real *, const int);\n", 1 << j, config, argv[k]);
printf("void dft%df_%d_%s(%s *, const %s *, const int);\n", 1 << j, config, argv[k], baseType, baseType);
printf("void dft%db_%d_%s(%s *, const %s *, const int);\n", 1 << j, config, argv[k], baseType, baseType);
printf("void tbut%df_%d_%s(%s *, uint32_t *, const %s *, const int, const %s *, const int);\n", 1 << j, config, argv[k], baseType, baseType, baseType);
printf("void tbut%db_%d_%s(%s *, uint32_t *, const %s *, const int, const %s *, const int);\n", 1 << j, config, argv[k], baseType, baseType, baseType);
printf("void but%df_%d_%s(%s *, uint32_t *, const int, const %s *, const int, const %s *, const int);\n", 1 << j, config, argv[k], baseType, baseType, baseType);
printf("void but%db_%d_%s(%s *, uint32_t *, const int, const %s *, const int, const %s *, const int);\n", 1 << j, config, argv[k], baseType, baseType, baseType);
for(int s=minshift;s<maxshift;s++) {
printf("void dft%df_%d_%d_%s(%s *, const %s *);\n", 1 << j, s, config, argv[k], baseType, baseType);
printf("void dft%db_%d_%d_%s(%s *, const %s *);\n", 1 << j, s, config, argv[k], baseType, baseType);
printf("void tbut%df_%d_%d_%s(%s *, uint32_t *, const %s *, const %s *, const int);\n", 1 << j, s, config, argv[k], baseType, baseType, baseType);
printf("void tbut%db_%d_%d_%s(%s *, uint32_t *, const %s *, const %s *, const int);\n", 1 << j, s, config, argv[k], baseType, baseType, baseType);
}
for(int s=0;s<maxshift;s++) {
printf("void but%df_%d_%d_%s(%s *, uint32_t *, const %s *, const int, const %s *, const int);\n", 1 << j, s, config, argv[k], baseType, baseType, baseType);
printf("void but%db_%d_%d_%s(%s *, uint32_t *, const %s *, const int, const %s *, const int);\n", 1 << j, s, config, argv[k], baseType, baseType, baseType);
}
}
}
printf("void realSub0_%s(real *, const real *, const int, const real *, const real *);\n", argv[k]);
printf("void realSub1_%s(real *, const real *, const int, const real *, const real *, const int);\n", argv[k]);
printf("void realSub0_%s(%s *, const %s *, const int, const %s *, const %s *);\n", argv[k], baseType, baseType, baseType, baseType);
printf("void realSub1_%s(%s *, const %s *, const int, const %s *, const %s *, const int);\n", argv[k], baseType, baseType, baseType, baseType);
printf("int getInt_%s(int);\n", argv[k]);
printf("const void *getPtr_%s(int);\n", argv[k]);
}
printf("\n");
printf("void (*dftf_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int) = {\n", basetype);
printf("void (*dftf_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, const %s *, const int) = {\n", baseType, baseTypeID, baseType, baseType);
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
@ -77,7 +93,7 @@ int main(int argc, char **argv) {
}
printf("};\n\n");
printf("void (*dftb_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, const real *, const int) = {\n", basetype);
printf("void (*dftb_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, const %s *, const int) = {\n", baseType, baseTypeID, baseType, baseType);
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
@ -99,7 +115,7 @@ int main(int argc, char **argv) {
}
printf("};\n\n");
printf("void (*tbutf_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int) = {\n", basetype);
printf("void (*tbutf_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, uint32_t *, const %s *, const int, const %s *, const int) = {\n", baseType, baseTypeID, baseType, baseType, baseType);
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
@ -117,7 +133,7 @@ int main(int argc, char **argv) {
}
printf("};\n\n");
printf("void (*tbutb_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const real *, const int, const real *, const int) = {\n", basetype);
printf("void (*tbutb_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, uint32_t *, const %s *, const int, const %s *, const int) = {\n", baseType, baseTypeID, baseType, baseType, baseType);
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
@ -135,7 +151,7 @@ int main(int argc, char **argv) {
}
printf("};\n\n");
printf("void (*butf_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int) = {\n", basetype);
printf("void (*butf_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, uint32_t *, const int, const %s *, const int, const %s *, const int) = {\n", baseType, baseTypeID, baseType, baseType, baseType);
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
@ -153,7 +169,7 @@ int main(int argc, char **argv) {
}
printf("};\n\n");
printf("void (*butb_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH+1])(real *, uint32_t *, const int, const real *, const int, const real *, const int) = {\n", basetype);
printf("void (*butb_%s[CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, uint32_t *, const int, const %s *, const int, const %s *, const int) = {\n", baseType, baseTypeID, baseType, baseType, baseType);
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
@ -171,22 +187,66 @@ int main(int argc, char **argv) {
}
printf("};\n\n");
printf("void (*tbutfs_%s[MAXSHIFT%s][CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, uint32_t *, const %s *, const %s *, const int) = {\n", baseType, baseTypeID, baseTypeID, baseType, baseType, baseType);
for(int s=0;s<maxshift;s++) {
printf(" {\n");
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
printf(" {NULL, ");
for(int i=1;i<=maxbutwidth;i++) {
if ((enable_stream || (config & 1) == 0) && s >= minshift) {
printf("tbut%df_%d_%d_%s, ", 1 << i, s, config, argv[k]);
} else {
printf("NULL, ");
}
}
printf("},\n");
}
printf(" },\n");
}
printf(" },\n");
}
printf("};\n\n");
printf("void (*tbutbs_%s[MAXSHIFT%s][CONFIGMAX][ISAMAX][MAXBUTWIDTH%s+1])(%s *, uint32_t *, const %s *, const %s *, const int) = {\n", baseType, baseTypeID, baseTypeID, baseType, baseType, baseType);
for(int s=0;s<maxshift;s++) {
printf(" {\n");
for(int config=0;config<4;config++) {
printf(" {\n");
for(int k=isastart;k<argc;k++) {
printf(" {NULL, ");
for(int i=1;i<=maxbutwidth;i++) {
if ((enable_stream || (config & 1) == 0) && s >= minshift) {
printf("tbut%db_%d_%d_%s, ", 1 << i, s, config, argv[k]);
} else {
printf("NULL, ");
}
}
printf("},\n");
}
printf(" },\n");
}
printf(" },\n");
}
printf("};\n\n");
//
printf("void (*realSub0_%s[ISAMAX])(real *, const real *, const int, const real *, const real *) = {\n ", basetype);
printf("void (*realSub0_%s[ISAMAX])(%s *, const %s *, const int, const %s *, const %s *) = {\n ", baseType, baseType, baseType, baseType, baseType);
for(int k=isastart;k<argc;k++) printf("realSub0_%s, ", argv[k]);
printf("\n};\n\n");
printf("void (*realSub1_%s[ISAMAX])(real *, const real *, const int, const real *, const real *, const int) = {\n ", basetype);
printf("void (*realSub1_%s[ISAMAX])(%s *, const %s *, const int, const %s *, const %s *, const int) = {\n ", baseType, baseType, baseType, baseType, baseType);
for(int k=isastart;k<argc;k++) printf("realSub1_%s, ", argv[k]);
printf("\n};\n\n");
printf("int (*getInt_%s[16])(int) = {\n ", basetype);
printf("int (*getInt_%s[16])(int) = {\n ", baseType);
for(int k=isastart;k<argc;k++) printf("getInt_%s, ", argv[k]);
for(int k=0;k<16-(argc-isastart);k++) printf("NULL, ");
printf("\n};\n\n");
printf("const void *(*getPtr_%s[16])(int) = {\n ", basetype);
printf("const void *(*getPtr_%s[16])(int) = {\n ", baseType);
for(int k=isastart;k<argc;k++) printf("getPtr_%s, ", argv[k]);
for(int k=0;k<16-(argc-isastart);k++) printf("NULL, ");
printf("\n};\n\n");

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -41,13 +41,25 @@ char *replaceAll(const char *in, const char *pat, const char *replace) {
char line[LEN+10];
int main(int argc, char **argv) {
if (argc < 2) {
fprintf(stderr, "Usage : %s <Base type> <ISA> ...\n", argv[0]);
if (argc < 5) {
fprintf(stderr, "Usage : %s <file name> <Base type> <Base type ID> <shift> <ISA> ...\n", argv[0]);
exit(-1);
}
const char *baseType = argv[1];
const int isastart = 2;
const char *fn = argv[1];
const char *baseTypeID = argv[3];
int shift = atoi(argv[4]);
const int isastart = 5;
int mode = 1;
if (strcmp(argv[4], "-") == 0) {
mode = 0;
} else if (shift <= 0) {
mode = 2;
shift = -shift;
}
char shiftstr[21];
snprintf(shiftstr, 20, "%d", shift);
for(int config=0;config<CONFIGMAX;config++) {
#if ENABLE_STREAM == 0
@ -58,13 +70,22 @@ int main(int argc, char **argv) {
char configString[100];
sprintf(configString, "%d", config);
FILE *fpin = fopen("unroll0.org", "r");
FILE *fpin = fopen(fn, "r");
switch(mode) {
case 0:
sprintf(line, "unroll_%d_%s.cpp", config, isaString);
break;
case 1:
sprintf(line, "unroll_%d_%s_%d.cpp", config, isaString, shift);
break;
case 2:
sprintf(line, "unroll2_%d_%s_%d.cpp", config, isaString, shift);
break;
}
sprintf(line, "unroll_%d_%s.c", config, isaString);
FILE *fpout = fopen(line, "w");
fputs("#include \"vectortype.h\"\n\n", fpout);
fprintf(fpout, "extern %s ctbl_%s[];\n", baseType, baseType);
fprintf(fpout, "#define ctbl ctbl_%s\n\n", baseType);
fputs("#include \"vectortype.hpp\"\n\n", fpout);
for(;;) {
if (fgets(line, LEN, fpin) == NULL) break;
@ -82,7 +103,7 @@ int main(int argc, char **argv) {
}
if ((config & 2) == 0) {
char *s0 = replaceAll(s, "#pragma", "//");
char *s0 = replaceAll(s, "#pragma", "//pragma");
free(s);
s = s0;
}
@ -93,6 +114,18 @@ int main(int argc, char **argv) {
s = s0;
}
{
char *s0 = replaceAll(s, "%TYPEID%", baseTypeID);
free(s);
s = s0;
}
{
char *s0 = replaceAll(s, "%SHIFT%", shiftstr);
free(s);
s = s0;
}
fputs(s, fpout);
free(s);
}

View File

@ -0,0 +1,145 @@
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <cstdio>
#include <vector>
#include <unordered_map>
#include <tuple>
using namespace std;
class Serializer {
public:
virtual void write(const void *, size_t) = 0;
virtual void flush() {}
};
class Deserializer {
public:
virtual void read(void *, size_t) = 0;
template<typename T, typename enable_if<(is_trivially_copyable<T>::value), int>::type = 0>
T read() {
T t;
read(&t, sizeof(T));
return t;
}
};
class FileSerializer : public Serializer {
FILE *fp;
public:
FileSerializer(FILE *fp_) : fp(fp_) {}
void write(const void *p, size_t z) {
fwrite(p, z, 1, fp);
}
void flush() { fflush(fp); }
};
class FileDeserializer : public Deserializer {
FILE *fp;
public:
FileDeserializer(FILE *fp_) : fp(fp_) {}
void read(void *p, size_t z) {
if (!fread(p, z, 1, fp)) throw(runtime_error("FileDeserializer::read : could not read"));
}
};
template<typename T, typename enable_if<(is_trivially_copyable<T>::value), int>::type = 0>
Serializer& operator<<(Serializer &s, const T& v) {
s.write((const char *)&v, sizeof(v));
return s;
}
template<typename T, typename enable_if<(is_trivially_copyable<T>::value), int>::type = 0>
Deserializer& operator>>(Deserializer &s, T& v) {
s.read((char *)&v, sizeof(v));
return s;
}
template<typename T>
Serializer& operator<<(Serializer &s, const vector<T>& v) {
s << v.size();
for(size_t i=0;i<v.size();i++) s << v.data()[i];
return s;
}
template<typename T>
Deserializer& operator>>(Deserializer &d, vector<T>& v) {
size_t z = d.read<size_t>();
for(size_t i=0;i<z;i++) {
T t;
d >> t;
v.push_back(t);
}
return d;
}
Serializer& operator<<(Serializer &s, const string& str) {
s << (str.size() + 1);
s.write(str.c_str(), str.size() + 1);
return s;
}
Deserializer& operator>>(Deserializer &d, string& str) {
vector<char> v;
d >> v;
str = v.data();
return d;
}
template<typename KT, typename VT>
Serializer& operator<<(Serializer &s, const unordered_map<KT, VT>& m) {
s << m.size();
for(auto a : m) s << a.first << a.second;
return s;
}
template<typename KT, typename VT>
Deserializer& operator>>(Deserializer &d, unordered_map<KT, VT>& m) {
size_t z = d.read<size_t>();
for(size_t i=0;i<z;i++) {
KT key;
d >> key;
VT value;
d >> value;
m[key] = value;
}
return d;
}
template<class tupletype, uint32_t idx=0>
static void serialize_tuple(Serializer &s, const tupletype& t) {
if constexpr (idx < tuple_size_v<tupletype>) {
s << get<idx>(t);
serialize_tuple<tupletype, idx + 1>(s, t);
}
}
template<typename ...Ts>
Serializer& operator<<(Serializer &s, const tuple<Ts...>& t) {
serialize_tuple(s, t);
return s;
}
template<class tupletype, uint32_t idx=0>
static void deserialize_tuple(Deserializer &d, tupletype& t) {
if constexpr (idx < tuple_size_v<tupletype>) {
d >> get<idx>(t);
deserialize_tuple<tupletype, idx + 1>(d, t);
}
}
template<typename ...Ts>
Deserializer& operator>>(Deserializer &d, tuple<Ts...> &t) {
deserialize_tuple(d, t);
return d;
}

View File

@ -1,8 +1,42 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
static const real ctbl[] = {
0.7071067811865475243818940365159164684883L, -0.7071067811865475243818940365159164684883L,
0.9238795325112867561014214079495587839119L, -0.382683432365089771723257530688933059082L,
0.382683432365089771723257530688933059082L, -0.9238795325112867561014214079495587839119L,
0.9807852804032304491190993878113602022495L, -0.1950903220161282678433729148581576851029L,
0.5555702330196022247573058028269343822103L, -0.8314696123025452370808655033762590846891L,
0.8314696123025452370808655033762590846891L, -0.5555702330196022247573058028269343822103L,
0.1950903220161282678433729148581576851029L, -0.9807852804032304491190993878113602022495L,
0.9951847266721968862310254699821143731242L, -0.09801714032956060199569840382660679267701L,
0.6343932841636454982026105398063009488396L, -0.7730104533627369607965383602188325085081L,
0.881921264348355029715105513066220055407L, -0.4713967368259976485449225247492677226546L,
0.2902846772544623676448431737195932100803L, -0.9569403357322088649310892760624369657307L,
0.9569403357322088649310892760624369657307L, -0.2902846772544623676448431737195932100803L,
0.4713967368259976485449225247492677226546L, -0.881921264348355029715105513066220055407L,
0.7730104533627369607965383602188325085081L, -0.6343932841636454982026105398063009488396L,
0.09801714032956060199569840382660679267701L, -0.9951847266721968862310254699821143731242L,
0.9987954562051723927007702841240899260811L, -0.04906767432741801425355085940205324135377L,
0.6715589548470184006194634573905233310143L, -0.7409511253549590911932944126139233276263L,
0.9039892931234433315823215138173907234886L, -0.427555093430282094315230886905077056781L,
0.336889853392220050702686798271834334173L, -0.9415440651830207783906830087961026265475L,
0.9700312531945439926159106824865574481009L, -0.2429801799032638899447731489766866275204L,
0.5141027441932217266072797923204262815489L, -0.8577286100002720698929313536407192941624L,
0.8032075314806449097991200569701675249235L, -0.5956993044924333434615715265891822127742L,
0.1467304744553617516588479505190711904561L, -0.9891765099647809734561415551112872890371L,
0.9891765099647809734561415551112872890371L, -0.1467304744553617516588479505190711904561L,
0.5956993044924333434615715265891822127742L, -0.8032075314806449097991200569701675249235L,
0.8577286100002720698929313536407192941624L, -0.5141027441932217266072797923204262815489L,
0.2429801799032638899447731489766866275204L, -0.9700312531945439926159106824865574481009L,
0.9415440651830207783906830087961026265475L, -0.336889853392220050702686798271834334173L,
0.427555093430282094315230886905077056781L, -0.9039892931234433315823215138173907234886L,
0.7409511253549590911932944126139233276263L, -0.6715589548470184006194634573905233310143L,
0.04906767432741801425355085940205324135377L, -0.9987954562051723927007702841240899260811L,
};
ALIGNED(8192) void dft2f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) {
const int k = 1 << (shift - LOG2VECWIDTH);
int i=0;
@ -241,7 +275,7 @@ ALIGNED(8192) void tbut4b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const
}
}
#if MAXBUTWIDTH >= 3
#if MAXBUTWIDTH%TYPEID% >= 3
ALIGNED(8192) void dft8f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) {
const int k = 1 << (shift - LOG2VECWIDTH);
int i=0;
@ -551,7 +585,7 @@ ALIGNED(8192) void tbut8b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const
}
#endif
#if MAXBUTWIDTH >= 4
#if MAXBUTWIDTH%TYPEID% >= 4
ALIGNED(8192) void dft16f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) {
const int k = 1 << (shift - LOG2VECWIDTH);
int i=0;
@ -1217,7 +1251,7 @@ ALIGNED(8192) void tbut16b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, cons
}
#endif
#if MAXBUTWIDTH >= 5
#if MAXBUTWIDTH%TYPEID% >= 5
ALIGNED(8192) void dft32f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) {
const int k = 1 << (shift - LOG2VECWIDTH);
int i=0;
@ -2727,7 +2761,7 @@ ALIGNED(8192) void tbut32b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, cons
}
#endif
#if MAXBUTWIDTH >= 6
#if MAXBUTWIDTH%TYPEID% >= 6
ALIGNED(8192) void dft64f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) {
const int k = 1 << (shift - LOG2VECWIDTH);
int i=0;
@ -6191,7 +6225,7 @@ ALIGNED(8192) void tbut64b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, cons
//
#if MAXBUTWIDTH >= 7
#if MAXBUTWIDTH%TYPEID% >= 7
ALIGNED(8192) void dft128f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) {
const int k = 1 << (shift - LOG2VECWIDTH);
int i=0;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -6,6 +6,10 @@
#ifndef __VECTORTYPE_H__
#define __VECTORTYPE_H__
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#include <math.h>
#include "sleef.h"
@ -57,10 +61,6 @@
#include "helpers390x_128.h"
#endif
#ifdef ENABLE_VECEXT
#include "helpervecext.h"
#endif
#ifdef ENABLE_PUREC
#include "helperpurec.h"
#endif

View File

@ -194,13 +194,12 @@ int main(int argc, char **argv)
mpfr_zinit(result[i]);
}
mpfr_t fra, frb, frc, frd, fre;
mpfr_t fra, frb, frc, frd;
mpfr_zinit(fra);
mpfr_zinit(frb);
mpfr_zinit(frc);
mpfr_zinit(frd);
mpfr_zinit(fre);
mpfr_init(fra);
mpfr_init(frb);
mpfr_init(frc);
mpfr_init(frd);
for(i=0;i<n;i++) {
double b = 1.0 - pow((double)i / (n-1), p);

View File

@ -1,4 +1,4 @@
// The original code for simplex algorithm is taken from Haruhiko Okumura's book.
// The original code for simplex algorithm is taken from Haruhiko Okumura's book.
// https://oku.edu.mie-u.ac.jp/~okumura/algo/
// The code is distributed under the Creative Commons Attribution 4.0 International License.
// https://creativecommons.org/licenses/by/4.0/
@ -156,7 +156,7 @@ static void tableau(mpfr_t ret, int i, int j) {
if (j <= n) {
mpfr_t s;
mpfr_zinit(s);
mpfr_init(s);
mpfr_set_d(s, 0, GMP_RNDN);
mpfr_t *tab = malloc(sizeof(mpfr_t) * (m + 1));
@ -190,9 +190,7 @@ static void tableau(mpfr_t ret, int i, int j) {
static void pivot(int ipivot, int jpivot) {
int i, j;
mpfr_t u;
mpfr_zinit(u);
mpfr_init(u);
mpfr_set(u, pivotcolumn[ipivot], GMP_RNDN);
for (j = 1; j <= m; j++) {
@ -254,7 +252,8 @@ static int minimize() {
static int phase1() {
int i, j;
mpfr_t u;
mpfr_zinit(u);
mpfr_init(u);
mpfr_set_d(u, 0, GMP_RNDN);
jmax = n3;
for (i = 0; i <= m; i++) {
@ -309,7 +308,8 @@ int solve_fr(mpfr_t *result, int n0, int m0, mpfr_t **a0, int *ineq0, mpfr_t *c0
init(n, m);
mpfr_t csum;
mpfr_zinit(csum);
mpfr_init(csum);
mpfr_set_d(csum, 0, GMP_RNDN);
for(j=0;j<n0+1;j++) {
mpfr_set(c[j], c0[j], GMP_RNDN);
@ -370,7 +370,8 @@ int solve_fr(mpfr_t *result, int n0, int m0, mpfr_t **a0, int *ineq0, mpfr_t *c0
}
mpfr_t cs;
mpfr_zinit(cs);
mpfr_init(cs);
mpfr_set_d(cs, 0, GMP_RNDN);
if (row[n] != 0) tableau(cs, row[n], 0);
for (j = 1; j < n; j++) {

View File

@ -1,153 +0,0 @@
ICCAVAILABLE := $(shell command -v icc 2> /dev/null)
ARCH := $(shell uname -p)
all :
ifndef BUILDDIR
@echo
@echo Please set the build directory to BUILDDIR environment variable and run make once again.
@echo e.g. export BUILDDIR='`pwd`'/../../build
@echo
else
@echo
@echo You can start measurement by "'"make measure"'".
ifdef ICCAVAILABLE
@echo You can start measurement with SVML by "'"make measureSVML"'".
endif
@echo Then, you can plot the results of measurement by "'"make plot"'".
@echo
@echo You have to install java and gnuplot to do plotting.
@echo Stop all tasks on the computer before starting measurement.
@echo
endif
benchsvml128_10.o : benchsvml128.c bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_10.o
benchsvml128_40.o : benchsvml128.c bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_40.o
benchsvml256_10.o : benchsvml256.c bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_10.o
benchsvml256_40.o : benchsvml256.c bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_40.o
benchsvml512_10.o : benchsvml512.c bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_10.o
benchsvml512_40.o : benchsvml512.c bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_40.o
benchsvml_10 : benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_10
benchsvml_40 : benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o bench.h
-command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_40
#
ifeq ($(ARCH),aarch64)
benchsleef : benchsleef.c benchsleef128.o bench.h
$(CC) benchsleef.c benchsleef128.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef
benchsleef128.o : benchsleef128.c bench.h
$(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c
else ifeq ($(ARCH),s390x)
benchsleef : benchsleef.c benchsleef128.o bench.h
$(CC) benchsleef.c benchsleef128.o -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef
benchsleef128.o : benchsleef128.c bench.h
$(CC) benchsleef128.c -Wall -mzvector -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c
else ifeq ($(ARCH),ppc64le)
benchsleef : benchsleef.c benchsleef128.o bench.h
$(CC) benchsleef.c benchsleef128.o -Wall -mcpu=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef
benchsleef128.o : benchsleef128.c bench.h
$(CC) benchsleef128.c -Wall -mcpu=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c
else
benchsleef : benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o bench.h
$(CC) benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef
benchsleef128.o : benchsleef128.c bench.h
$(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c
benchsleef256.o : benchsleef256.c bench.h
$(CC) benchsleef256.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c
benchsleef512.o : benchsleef512.c bench.h
$(CC) benchsleef512.c -Wall -mavx512f -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c
endif
#
ProcessData.class : ProcessData.java
javac ProcessData.java
#
ifndef BUILDDIR
measure :
@echo
@echo Please set the build directory to BUILDDIR environment variable and run make once again.
@echo e.g. export BUILDDIR='`pwd`'/../../build
@echo
else
measure : benchsleef
chmod +x ./measure.sh
LD_LIBRARY_PATH=$(BUILDDIR)/lib ./measure.sh ./benchsleef
@echo
@echo Now, you can plot the results of measurement by "'"make plot"'".
@echo You can do another measurement by "'"make measure"'".
ifdef ICCAVAILABLE
@echo You can start another measurement with SVML by "'"make measureSVML"'".
endif
@echo You can start over by "'"make restart"'".
@echo
endif
measureSVML : all benchsvml_10 benchsvml_40
chmod +x ./measure.sh
./measure.sh ./benchsvml_10 ./benchsvml_40
@echo
@echo Now, you can plot the results of measurement by "'"make plot"'".
@echo You can do another measurement by "'"make measure"'".
ifdef ICCAVAILABLE
@echo You can start another measurement with SVML by "'"make measureSVML"'".
endif
@echo You can start over by "'"make restart"'".
@echo
plot : ProcessData.class counter.txt
java ProcessData *dptrig*.out
gnuplot script.out
mv output.png trigdp.png
java ProcessData *dpnontrig*.out
gnuplot script.out
mv output.png nontrigdp.png
java ProcessData *sptrig*.out
gnuplot script.out
mv output.png trigsp.png
java ProcessData *spnontrig*.out
gnuplot script.out
mv output.png nontrigsp.png
@echo
@echo Plotted results are in trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png.
@echo
clean :
rm -f *~ a.out *.so *.so.* *.a *.s *.o
rm -rf *.dSYM *.dylib
rm -f *.obj *.lib *.dll *.exp *.exe *.stackdump
rm -f *.class *.png benchsleef benchsvml_10 benchsvml_40 *.out counter.txt
restart :
rm -f *.out counter.txt

View File

@ -1,193 +0,0 @@
import java.util.*;
import java.io.*;
public class ProcessData {
static final int DP = 64, SP = 32;
static LinkedHashMap<String, Integer> funcNameOrder = new LinkedHashMap<String, Integer>();
static class Key {
final String funcName;
final int prec, bits;
final ArrayList<Double> range = new ArrayList<Double>();
final double ulps;
Key(String s) {
String[] a = s.split(",");
funcName = a[0].trim();
if (funcNameOrder.get(funcName) == null) {
funcNameOrder.put(funcName, funcNameOrder.size());
}
prec =
a[1].trim().equals("DP") ? DP :
a[1].trim().equals("SP") ? SP :
0;
bits = Integer.parseInt(a[2].trim());
int c;
for(c = 3;;c++) {
if (a[c].trim().endsWith("ulps")) break;
range.add(Double.parseDouble(a[c]));
}
ulps = Double.parseDouble(a[c].trim().replace("ulps", ""));
}
public int hashCode() {
int h = funcName.hashCode();
h ^= prec ^ bits;
return h;
}
public boolean equals(Object o) {
if (this == o) return true;
Key k = (Key) o;
if (funcName.compareTo(k.funcName) != 0) return false;
if (prec != k.prec) return false;
if (bits != k.bits) return false;
if (range.size() != k.range.size()) return false;
for(int i=0;i<range.size();i++) {
if ((double)range.get(i) != (double)k.range.get(i)) return false;
}
if (ulps != k.ulps) return false;
return true;
}
public String toString() {
String s = funcName + " ";
s += prec == DP ? "DP " : "SP ";
s += bits + "bit ";
s += String.format(" %.0fulp ", ulps);
for(int i=0;i<range.size();i+=2) {
s += "[" + String.format("%.3g", range.get(i)) + ", " + String.format("%.3g", range.get(i+1)) + "]";
if (i + 2 < range.size()) s += " ";
}
return s;
}
}
static class KeyComparator implements Comparator<Key> {
public int compare(Key d0, Key d1) {
if (d0 == d1) return 0;
if (d0.prec < d1.prec) return 1;
if (d0.prec > d1.prec) return -1;
if (d0.ulps > d1.ulps) return 1;
if (d0.ulps < d1.ulps) return -1;
int fc = (int)funcNameOrder.get(d0.funcName) - (int)funcNameOrder.get(d1.funcName);
if (fc != 0) return fc;
if (d0.bits > d1.bits) return 1;
if (d0.bits < d1.bits) return -1;
if (d0.range.size() > d1.range.size()) return 1;
if (d0.range.size() < d1.range.size()) return -1;
for(int i=0;i<d0.range.size();i++) {
if (d0.range.get(i) > d1.range.get(i)) return 1;
if (d0.range.get(i) < d1.range.get(i)) return -1;
}
return 0;
}
}
public static void main(String[] args) throws Exception {
LinkedHashMap<Key, LinkedHashMap<String, Double>> allData = new LinkedHashMap<Key, LinkedHashMap<String, Double>>();
TreeSet<Key> allKeys = new TreeSet<Key>(new KeyComparator());
LinkedHashSet<String> allColumnTitles = new LinkedHashSet<String>();
double maximum = 0;
for(int i=0;i<args.length;i++) {
LineNumberReader lnr = new LineNumberReader(new FileReader(args[i]));
String columnTitle = lnr.readLine();
allColumnTitles.add(columnTitle);
for(;;) {
String s = lnr.readLine();
if (s == null) break;
Key key = new Key(s);
allKeys.add(key);
LinkedHashMap<String, Double> v = allData.get(key);
if (v == null) {
v = new LinkedHashMap<String, Double>();
allData.put(key, v);
}
String[] a = s.split(",");
double time = Double.parseDouble(a[a.length-1]);
v.put(columnTitle, time);
maximum = Math.max(maximum, time);
}
lnr.close();
}
PrintStream ps = new PrintStream("data.out");
for(Key k : allKeys) {
ps.print("\"" + k + "\" ");
LinkedHashMap<String, Double> v = allData.get(k);
for(String s : allColumnTitles) {
Double d = v.get(s);
if (d != null) ps.print(d);
if (d == null) ps.print("0");
ps.print("\t");
}
ps.println();
}
ps.close();
ps = new PrintStream("script.out");
ps.println("set terminal pngcairo size 1280, 800 font \",10\"");
ps.println("set output \"output.png\"");
ps.println("color00 = \"#FF5050\";"); // red
ps.println("color01 = \"#0066FF\";"); // blue
ps.println("color02 = \"#00FF00\";"); // green
ps.println("color03 = \"#FF9900\";"); // orange
ps.println("color04 = \"#CC00CC\";"); // purple
ps.println("color05 = \"#880000\";"); // brown
ps.println("color06 = \"#003300\";"); // dark green
ps.println("color07 = \"#000066\";"); // dark blue
ps.println("set style data histogram");
ps.println("set style histogram cluster gap 1");
ps.println("set style fill solid 1.00");
ps.println("set boxwidth 0.9");
ps.println("set xtics format \"\"");
ps.println("set xtics rotate by -90");
ps.println("set grid ytics");
ps.println("set ylabel \"Execution time in micro sec.\"");
ps.println("set yrange [0:*]");
ps.println("set bmargin 24");
ps.println("set title \"Single execution time in micro sec.\"");
ps.print("plot");
int i = 0;
for(String s : allColumnTitles) {
ps.print("\"data.out\" using " + (i+2) + ":xtic(1) title \"" + s +
"\" linecolor rgb color" + String.format("%02d", i));
if (i != allColumnTitles.size()-1) ps.print(", ");
i++;
}
ps.println();
ps.close();
}
}

View File

@ -1,58 +0,0 @@
#define NITER1 100000
#define NITER2 10000
#define NITER (NITER1 * NITER2)
#define callFuncSLEEF1_1(funcName, name, xmin, xmax, ulp, arg, type) ({ \
printf("%s\n", #funcName); \
uint64_t t = Sleef_currentTimeMicros(); \
for(int j=0;j<NITER2;j++) { \
type *p = (type *)(arg); \
for(int i=0;i<NITER1;i++) funcName(*p++); \
} \
fprintf(fp, name ", %.3g, %.3g, %gulps, %g\n", \
(double)xmin, (double)xmax, ulp, (double)(Sleef_currentTimeMicros() - t) / NITER); \
})
#define callFuncSLEEF1_2(funcName, name, xmin, xmax, ymin, ymax, ulp, arg1, arg2, type) ({ \
printf("%s\n", #funcName); \
uint64_t t = Sleef_currentTimeMicros(); \
for(int j=0;j<NITER2;j++) { \
type *p1 = (type *)(arg1), *p2 = (type *)(arg2); \
for(int i=0;i<NITER1;i++) funcName(*p1++, *p2++); \
} \
fprintf(fp, name ", %.3g, %.3g, %.3g, %.3g, %gulps, %g\n", \
(double)xmin, (double)xmax, (double)ymin, (double)ymax, ulp, (double)(Sleef_currentTimeMicros() - t) / NITER); \
})
#define callFuncSVML1_1(funcName, name, xmin, xmax, arg, type) ({ \
printf("%s\n", #funcName); \
uint64_t t = Sleef_currentTimeMicros(); \
for(int j=0;j<NITER2;j++) { \
type *p = (type *)(arg); \
for(int i=0;i<NITER1;i++) funcName(*p++); \
} \
fprintf(fp, name ", %.3g, %.3g, %gulps, %g\n", \
(double)xmin, (double)xmax, (double)SVMLULP, (double)(Sleef_currentTimeMicros() - t) / NITER); \
})
#define callFuncSVML2_1(funcName, name, xmin, xmax, arg, type) ({ \
printf("%s\n", #funcName); \
uint64_t t = Sleef_currentTimeMicros(); \
for(int j=0;j<NITER2;j++) { \
type *p = (type *)(arg), c; \
for(int i=0;i<NITER1;i++) funcName(&c, *p++); \
} \
fprintf(fp, name ", %.3g, %.3g, %gulps, %g\n", \
(double)xmin, (double)xmax, (double)SVMLULP, (double)(Sleef_currentTimeMicros() - t) / NITER); \
})
#define callFuncSVML1_2(funcName, name, xmin, xmax, ymin, ymax, arg1, arg2, type) ({ \
printf("%s\n", #funcName); \
uint64_t t = Sleef_currentTimeMicros(); \
for(int j=0;j<NITER2;j++) { \
type *p1 = (type *)(arg1), *p2 = (type *)(arg2); \
for(int i=0;i<NITER1;i++) funcName(*p1++, *p2++); \
} \
fprintf(fp, name ", %.3g, %.3g, %.3g, %.3g, %gulps, %g\n", \
(double)xmin, (double)xmax, (double)ymin, (double)ymax, (double)SVMLULP, (double)(Sleef_currentTimeMicros() - t) / NITER); \
})

View File

@ -1,144 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <sleef.h>
#include "bench.h"
int veclen = 16;
double *abufdp, *bbufdp;
float *abufsp, *bbufsp;
FILE *fp;
#if defined(__i386__) || defined(__x86_64__)
void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) {
uint32_t a, b, c, d;
__asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx));
out[0] = a; out[1] = b; out[2] = c; out[3] = d;
}
int cpuSupportsAVX() {
int32_t reg[4];
x86CpuID(reg, 1, 0);
return (reg[2] & (1 << 28)) != 0;
}
int cpuSupportsAVX512F() {
int32_t reg[4];
x86CpuID(reg, 7, 0);
return (reg[1] & (1 << 16)) != 0;
}
#endif
void fillDP(double *buf, double min, double max) {
for(int i=0;i<NITER1*veclen;i++) {
double r = ((double)random() + RAND_MAX * (double)random()) / (RAND_MAX * (double)RAND_MAX);
buf[i] = r * (max - min) + min;
}
}
void fillSP(float *buf, double min, double max) {
for(int i=0;i<NITER1*veclen;i++) {
double r = ((double)random() + RAND_MAX * (double)random()) / (RAND_MAX * (double)RAND_MAX);
buf[i] = r * (max - min) + min;
}
}
void benchSleef128_DPTrig();
void benchSleef256_DPTrig();
void benchSleef512_DPTrig();
void benchSleef128_DPNontrig();
void benchSleef256_DPNontrig();
void benchSleef512_DPNontrig();
void benchSleef128_SPTrig();
void benchSleef256_SPTrig();
void benchSleef512_SPTrig();
void benchSleef128_SPNontrig();
void benchSleef256_SPNontrig();
void benchSleef512_SPNontrig();
//
int main(int argc, char **argv) {
char *columnTitle = "SLEEF", *fnBase = "sleef";
char fn[1024];
if (argc != 1) columnTitle = argv[1];
if (argc >= 3) fnBase = argv[2];
srandom(time(NULL));
#if defined(__i386__) || defined(__x86_64__)
int do128bit = 1;
int do256bit = cpuSupportsAVX();
int do512bit = cpuSupportsAVX512F();
#elif defined(__ARM_NEON) || defined(__VSX__) || defined(__VX__)
int do128bit = 1;
#else
#error Unsupported architecture
#endif
posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double));
posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double));
abufsp = (float *)abufdp;
bbufsp = (float *)bbufdp;
sprintf(fn, "%sdptrig.out", fnBase);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do128bit) benchSleef128_DPTrig();
#if defined(__i386__) || defined(__x86_64__)
if (do256bit) benchSleef256_DPTrig();
if (do512bit) benchSleef512_DPTrig();
#endif
fclose(fp);
sprintf(fn, "%sdpnontrig.out", fnBase);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do128bit) benchSleef128_DPNontrig();
#if defined(__i386__) || defined(__x86_64__)
if (do256bit) benchSleef256_DPNontrig();
if (do512bit) benchSleef512_DPNontrig();
#endif
fclose(fp);
sprintf(fn, "%ssptrig.out", fnBase);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do128bit) benchSleef128_SPTrig();
#if defined(__i386__) || defined(__x86_64__)
if (do256bit) benchSleef256_SPTrig();
if (do512bit) benchSleef512_SPTrig();
#endif
fclose(fp);
sprintf(fn, "%sspnontrig.out", fnBase);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do128bit) benchSleef128_SPNontrig();
#if defined(__i386__) || defined(__x86_64__)
if (do256bit) benchSleef256_SPNontrig();
if (do512bit) benchSleef512_SPNontrig();
#endif
fclose(fp);
exit(0);
}

View File

@ -1,195 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <sleef.h>
void fillDP(double *buf, double min, double max);
void fillSP(float *buf, double min, double max);
extern char x86BrandString[256], versionString[1024];
extern int veclen;
extern double *abufdp, *bbufdp;
extern float *abufsp, *bbufsp;
extern FILE *fp;
#include "bench.h"
#ifdef __SSE2__
#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
typedef __m128d vdouble;
typedef __m128 vfloat;
#define ENABLED
#elif defined(__ARM_NEON)
#include <arm_neon.h>
typedef float64x2_t vdouble;
typedef float32x4_t vfloat;
#define ENABLED
#elif defined(__VSX__)
#include <altivec.h>
typedef __vector double vdouble;
typedef __vector float vfloat;
#define ENABLED
#elif defined(__VX__)
#include <vecintrin.h>
typedef __vector double vdouble;
typedef __vector float vfloat;
#define ENABLED
#endif
#ifdef ENABLED
void benchSleef128_DPTrig() {
fillDP(abufdp, 0, 6.28);
callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble);
fillDP(abufdp, 0, 1e+6);
callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble);
fillDP(abufdp, 0, 1e+100);
callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble);
}
void benchSleef128_DPNontrig() {
fillDP(abufdp, 0, 1e+300);
callFuncSLEEF1_1(Sleef_logd2_u10 , "log, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_log10d2_u10, "log10, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_log1pd2_u10, "log1p, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_logd2_u35 , "log, DP, 128", 0, 1e+300, 4.0, abufdp, vdouble);
fillDP(abufdp, -700, 700);
callFuncSLEEF1_1(Sleef_expd2_u10 , "exp, DP, 128", -700, 700, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_exp2d2_u10 , "exp2, DP, 128", -700, 700, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_exp10d2_u10, "exp10, DP, 128", -700, 700, 1.0, abufdp, vdouble);
fillDP(abufdp, -30, 30);
fillDP(bbufdp, -30, 30);
callFuncSLEEF1_2(Sleef_powd2_u10, "pow, DP, 128", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble);
fillDP(abufdp, -1.0, 1.0);
callFuncSLEEF1_1(Sleef_asind2_u10, "asin, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_acosd2_u10, "acos, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_asind2_u35, "asin, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_acosd2_u35, "acos, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble);
fillDP(abufdp, -10, 10);
fillDP(bbufdp, -10, 10);
callFuncSLEEF1_1(Sleef_atand2_u10, "atan, DP, 128", -10, 10, 1.0, abufdp, vdouble);
callFuncSLEEF1_2(Sleef_atan2d2_u10, "atan2, DP, 128", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble);
callFuncSLEEF1_1(Sleef_atand2_u35, "atan, DP, 128", -10, 10, 4.0, abufdp, vdouble);
callFuncSLEEF1_2(Sleef_atan2d2_u35, "atan2, DP, 128", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble);
}
void benchSleef128_SPTrig() {
fillSP(abufsp, 0, 6.28);
callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat);
fillSP(abufsp, 0, 1e+20);
callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat);
}
void benchSleef128_SPNontrig() {
fillSP(abufsp, 0, 1e+38);
callFuncSLEEF1_1(Sleef_logf4_u10 , "log, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_log10f4_u10, "log10, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log1pf4_u10, "log1p, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_logf4_u35 , "log, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log10f4_u35, "log10, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log1pf4_u35, "log1p, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat);
fillSP(abufsp, -100, 100);
callFuncSLEEF1_1(Sleef_expf4_u10 , "exp, SP, 128", -100, 100, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_exp2f4_u10 , "exp2, SP, 128", -100, 100, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_exp10f4_u10, "exp10, SP, 128", -100, 100, 1.0, abufsp, vfloat);
fillSP(abufsp, -30, 30);
fillSP(bbufsp, -30, 30);
callFuncSLEEF1_2(Sleef_powf4_u10, "pow, SP, 128", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat);
fillSP(abufsp, -1.0, 1.0);
callFuncSLEEF1_1(Sleef_asinf4_u10, "asin, SP, 128", -1.0, 1, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_acosf4_u10, "acos, SP, 128", -1.0, 1, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_asinf4_u35, "asin, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_acosf4_u35, "acos, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat);
fillSP(abufsp, -10, 10);
fillSP(bbufsp, -10, 10);
callFuncSLEEF1_1(Sleef_atanf4_u10, "atan, SP, 128", -10, 10, 1.0, abufsp, vfloat);
callFuncSLEEF1_2(Sleef_atan2f4_u10, "atan2, SP, 128", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat);
callFuncSLEEF1_1(Sleef_atanf4_u35, "atan, SP, 128", -10, 10, 4.0, abufsp, vfloat);
callFuncSLEEF1_2(Sleef_atan2f4_u35, "atan2, SP, 128", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat);
}
#else // #ifdef ENABLED
void benchSleef128_DPTrig() {}
void benchSleef128_DPNontrig() {}
void benchSleef128_SPTrig() {}
void benchSleef128_SPNontrig() {}
#endif // #ifdef ENABLED

View File

@ -1,181 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <sleef.h>
void fillDP(double *buf, double min, double max);
void fillSP(float *buf, double min, double max);
extern char x86BrandString[256], versionString[1024];
extern int veclen;
extern double *abufdp, *bbufdp;
extern float *abufsp, *bbufsp;
extern FILE *fp;
#include "bench.h"
#ifdef __AVX__
#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
typedef __m256d vdouble;
typedef __m256 vfloat;
#define ENABLED
#endif
#ifdef ENABLED
void benchSleef256_DPTrig() {
fillDP(abufdp, 0, 6.28);
callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble);
fillDP(abufdp, 0, 1e+6);
callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble);
fillDP(abufdp, 0, 1e+100);
callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble);
}
void benchSleef256_DPNontrig() {
fillDP(abufdp, 0, 1e+300);
callFuncSLEEF1_1(Sleef_logd4_u10 , "log, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_log10d4_u10, "log10, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_log1pd4_u10, "log1p, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_logd4_u35 , "log, DP, 256", 0, 1e+300, 4.0, abufdp, vdouble);
fillDP(abufdp, -700, 700);
callFuncSLEEF1_1(Sleef_expd4_u10 , "exp, DP, 256", -700, 700, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_exp2d4_u10 , "exp2, DP, 256", -700, 700, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_exp10d4_u10, "exp10, DP, 256", -700, 700, 1.0, abufdp, vdouble);
fillDP(abufdp, -30, 30);
fillDP(bbufdp, -30, 30);
callFuncSLEEF1_2(Sleef_powd4_u10, "pow, DP, 256", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble);
fillDP(abufdp, -1.0, 1.0);
callFuncSLEEF1_1(Sleef_asind4_u10, "asin, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_acosd4_u10, "acos, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_asind4_u35, "asin, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_acosd4_u35, "acos, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble);
fillDP(abufdp, -10, 10);
fillDP(bbufdp, -10, 10);
callFuncSLEEF1_1(Sleef_atand4_u10, "atan, DP, 256", -10, 10, 1.0, abufdp, vdouble);
callFuncSLEEF1_2(Sleef_atan2d4_u10, "atan2, DP, 256", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble);
callFuncSLEEF1_1(Sleef_atand4_u35, "atan, DP, 256", -10, 10, 4.0, abufdp, vdouble);
callFuncSLEEF1_2(Sleef_atan2d4_u35, "atan2, DP, 256", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble);
}
void benchSleef256_SPTrig() {
fillSP(abufsp, 0, 6.28);
callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat);
fillSP(abufsp, 0, 1e+20);
callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat);
}
void benchSleef256_SPNontrig() {
fillSP(abufsp, 0, 1e+38);
callFuncSLEEF1_1(Sleef_logf8_u10 , "log, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_log10f8_u10, "log10, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log1pf8_u10, "log1p, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_logf8_u35 , "log, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log10f8_u35, "log10, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log1pf8_u35, "log1p, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat);
fillSP(abufsp, -100, 100);
callFuncSLEEF1_1(Sleef_expf8_u10 , "exp, SP, 256", -100, 100, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_exp2f8_u10 , "exp2, SP, 256", -100, 100, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_exp10f8_u10, "exp10, SP, 256", -100, 100, 1.0, abufsp, vfloat);
fillSP(abufsp, -30, 30);
fillSP(bbufsp, -30, 30);
callFuncSLEEF1_2(Sleef_powf8_u10, "pow, SP, 256", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat);
fillSP(abufsp, -1.0, 1.0);
callFuncSLEEF1_1(Sleef_asinf8_u10, "asin, SP, 256", -1.0, 1, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_acosf8_u10, "acos, SP, 256", -1.0, 1, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_asinf8_u35, "asin, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_acosf8_u35, "acos, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat);
fillSP(abufsp, -10, 10);
fillSP(bbufsp, -10, 10);
callFuncSLEEF1_1(Sleef_atanf8_u10, "atan, SP, 256", -10, 10, 1.0, abufsp, vfloat);
callFuncSLEEF1_2(Sleef_atan2f8_u10, "atan2, SP, 256", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat);
callFuncSLEEF1_1(Sleef_atanf8_u35, "atan, SP, 256", -10, 10, 4.0, abufsp, vfloat);
callFuncSLEEF1_2(Sleef_atan2f8_u35, "atan2, SP, 256", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat);
}
#else // #ifdef ENABLED
void zeroupper256() {}
void benchSleef256_DPTrig() {}
void benchSleef256_DPNontrig() {}
void benchSleef256_SPTrig() {}
void benchSleef256_SPNontrig() {}
#endif // #ifdef ENABLED

View File

@ -1,180 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <sleef.h>
void fillDP(double *buf, double min, double max);
void fillSP(float *buf, double min, double max);
extern char x86BrandString[256], versionString[1024];
extern int veclen;
extern double *abufdp, *bbufdp;
extern float *abufsp, *bbufsp;
extern FILE *fp;
#include "bench.h"
#ifdef __AVX512F__
#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
typedef __m512d vdouble;
typedef __m512 vfloat;
#define ENABLED
#endif
#ifdef ENABLED
void benchSleef512_DPTrig() {
fillDP(abufdp, 0, 6.28);
callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 6.28, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble);
fillDP(abufdp, 0, 1e+6);
callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble);
fillDP(abufdp, 0, 1e+100);
callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble);
}
void benchSleef512_DPNontrig() {
fillDP(abufdp, 0, 1e+300);
callFuncSLEEF1_1(Sleef_logd8_u10 , "log, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_log10d8_u10, "log10, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_log1pd8_u10, "log1p, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_logd8_u35 , "log, DP, 512", 0, 1e+300, 4.0, abufdp, vdouble);
fillDP(abufdp, -700, 700);
callFuncSLEEF1_1(Sleef_expd8_u10 , "exp, DP, 512", -700, 700, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_exp2d8_u10 , "exp2, DP, 512", -700, 700, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_exp10d8_u10, "exp10, DP, 512", -700, 700, 1.0, abufdp, vdouble);
fillDP(abufdp, -30, 30);
fillDP(bbufdp, -30, 30);
callFuncSLEEF1_2(Sleef_powd8_u10, "pow, DP, 512", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble);
fillDP(abufdp, -1.0, 1.0);
callFuncSLEEF1_1(Sleef_asind8_u10, "asin, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_acosd8_u10, "acos, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_asind8_u35, "asin, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble);
callFuncSLEEF1_1(Sleef_acosd8_u35, "acos, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble);
fillDP(abufdp, -10, 10);
fillDP(bbufdp, -10, 10);
callFuncSLEEF1_1(Sleef_atand8_u10, "atan, DP, 512", -10, 10, 1.0, abufdp, vdouble);
callFuncSLEEF1_2(Sleef_atan2d8_u10, "atan2, DP, 512", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble);
callFuncSLEEF1_1(Sleef_atand8_u35, "atan, DP, 512", -10, 10, 4.0, abufdp, vdouble);
callFuncSLEEF1_2(Sleef_atan2d8_u35, "atan2, DP, 512", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble);
}
void benchSleef512_SPTrig() {
fillSP(abufsp, 0, 6.28);
callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 6.28, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat);
fillSP(abufsp, 0, 1e+20);
callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat);
}
void benchSleef512_SPNontrig() {
fillSP(abufsp, 0, 1e+38);
callFuncSLEEF1_1(Sleef_logf16_u10 , "log, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_log10f16_u10, "log10, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log1pf16_u10, "log1p, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_logf16_u35 , "log, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log10f16_u35, "log10, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat);
//callFuncSLEEF1_1(Sleef_log1pf16_u35, "log1p, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat);
fillSP(abufsp, -100, 100);
callFuncSLEEF1_1(Sleef_expf16_u10 , "exp, SP, 512", -100, 100, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_exp2f16_u10 , "exp2, SP, 512", -100, 100, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_exp10f16_u10, "exp10, SP, 512", -100, 100, 1.0, abufsp, vfloat);
fillSP(abufsp, -30, 30);
fillSP(bbufsp, -30, 30);
callFuncSLEEF1_2(Sleef_powf16_u10, "pow, SP, 512", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat);
fillSP(abufsp, -1.0, 1.0);
callFuncSLEEF1_1(Sleef_asinf16_u10, "asin, SP, 512", -1.0, 1, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_acosf16_u10, "acos, SP, 512", -1.0, 1, 1.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_asinf16_u35, "asin, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat);
callFuncSLEEF1_1(Sleef_acosf16_u35, "acos, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat);
fillSP(abufsp, -10, 10);
fillSP(bbufsp, -10, 10);
callFuncSLEEF1_1(Sleef_atanf16_u10, "atan, SP, 512", -10, 10, 1.0, abufsp, vfloat);
callFuncSLEEF1_2(Sleef_atan2f16_u10, "atan2, SP, 512", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat);
callFuncSLEEF1_1(Sleef_atanf16_u35, "atan, SP, 512", -10, 10, 4.0, abufsp, vfloat);
callFuncSLEEF1_2(Sleef_atan2f16_u35, "atan2, SP, 512", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat);
}
#else // #ifdef ENABLED
void benchSleef512_DPTrig() {}
void benchSleef512_DPNontrig() {}
void benchSleef512_SPTrig() {}
void benchSleef512_SPNontrig() {}
#endif // #ifdef ENABLED

View File

@ -1,153 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <x86intrin.h>
#include "bench.h"
int veclen = 16;
int enableLogExp;
double *abufdp, *bbufdp;
float *abufsp, *bbufsp;
FILE *fp;
#if defined(__i386__) || defined(__x86_64__)
void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) {
uint32_t a, b, c, d;
__asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx));
out[0] = a; out[1] = b; out[2] = c; out[3] = d;
}
int cpuSupportsAVX() {
int32_t reg[4];
x86CpuID(reg, 1, 0);
return (reg[2] & (1 << 28)) != 0;
}
int cpuSupportsAVX512F() {
int32_t reg[4];
x86CpuID(reg, 7, 0);
return (reg[1] & (1 << 16)) != 0;
}
#endif
uint64_t Sleef_currentTimeMicros() {
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (uint64_t)tp.tv_sec * 1000000LL + ((uint64_t)tp.tv_nsec/1000);
}
void fillDP(double *buf, double min, double max) {
for(int i=0;i<NITER1*veclen;i++) {
double r = ((double)random() + RAND_MAX * (double)random()) / (RAND_MAX * (double)RAND_MAX);
buf[i] = r * (max - min) + min;
}
}
void fillSP(float *buf, double min, double max) {
for(int i=0;i<NITER1*veclen;i++) {
double r = ((double)random() + RAND_MAX * (double)random()) / (RAND_MAX * (double)RAND_MAX);
buf[i] = r * (max - min) + min;
}
}
void zeroupper256();
void benchSVML128_DPTrig();
void benchSVML256_DPTrig();
void benchSVML512_DPTrig();
void benchSVML128_DPNontrig();
void benchSVML256_DPNontrig();
void benchSVML512_DPNontrig();
void benchSVML128_SPTrig();
void benchSVML256_SPTrig();
void benchSVML512_SPTrig();
void benchSVML128_SPNontrig();
void benchSVML256_SPNontrig();
void benchSVML512_SPNontrig();
//
int main(int argc, char **argv) {
char *columnTitle = "SVML", *fnBase = "svml";
char fn[1024];
if (argc != 1) columnTitle = argv[1];
if (argc >= 3) fnBase = argv[2];
srandom(time(NULL));
#if defined(__i386__) || defined(__x86_64__)
int do128bit = 1;
int do256bit = cpuSupportsAVX();
int do512bit = cpuSupportsAVX512F();
#elif defined(__ARM_NEON)
int do128bit = 1;
int do256bit = 0;
int do512bit = 0;
#else
#error Unsupported architecture
#endif
posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double));
posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double));
abufsp = (float *)abufdp;
bbufsp = (float *)bbufdp;
enableLogExp = SVMLULP < 2;
sprintf(fn, "%sdptrig%gulp.out", fnBase, (double)SVMLULP);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do256bit) zeroupper256();
if (do128bit) benchSVML128_DPTrig();
if (do256bit) benchSVML256_DPTrig();
if (do512bit) benchSVML512_DPTrig();
fclose(fp);
sprintf(fn, "%sdpnontrig%gulp.out", fnBase, (double)SVMLULP);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do256bit) zeroupper256();
if (do128bit) benchSVML128_DPNontrig();
if (do256bit) benchSVML256_DPNontrig();
if (do512bit) benchSVML512_DPNontrig();
fclose(fp);
sprintf(fn, "%ssptrig%gulp.out", fnBase, (double)SVMLULP);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do256bit) zeroupper256();
if (do128bit) benchSVML128_SPTrig();
if (do256bit) benchSVML256_SPTrig();
if (do512bit) benchSVML512_SPTrig();
fclose(fp);
sprintf(fn, "%sspnontrig%gulp.out", fnBase, (double)SVMLULP);
fp = fopen(fn, "w");
fprintf(fp, "%s\n", columnTitle);
if (do256bit) zeroupper256();
if (do128bit) benchSVML128_SPNontrig();
if (do256bit) benchSVML256_SPNontrig();
if (do512bit) benchSVML512_SPNontrig();
fclose(fp);
exit(0);
}

View File

@ -1,144 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <x86intrin.h>
uint64_t Sleef_currentTimeMicros();
void fillDP(double *buf, double min, double max);
void fillSP(float *buf, double min, double max);
extern char x86BrandString[256], versionString[1024];
extern int veclen;
extern int enableLogExp;
extern double *abufdp, *bbufdp;
extern float *abufsp, *bbufsp;
extern FILE *fp;
#include "bench.h"
#ifdef __SSE2__
typedef __m128d vdouble;
typedef __m128 vfloat;
#define ENABLED
#endif
#ifdef ENABLED
void benchSVML128_DPTrig() {
fillDP(abufdp, 0, 6.28);
callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 6.28, abufdp, vdouble);
callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 6.28, abufdp, vdouble);
callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 6.28, abufdp, vdouble);
callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 6.28, abufdp, vdouble);
fillDP(abufdp, 0, 1e+6);
callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+6, abufdp, vdouble);
callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+6, abufdp, vdouble);
callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+6, abufdp, vdouble);
callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+6, abufdp, vdouble);
fillDP(abufdp, 0, 1e+100);
callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+100, abufdp, vdouble);
callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+100, abufdp, vdouble);
callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+100, abufdp, vdouble);
callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+100, abufdp, vdouble);
}
void benchSVML128_DPNontrig() {
fillDP(abufdp, 0, 1e+300);
callFuncSVML1_1(_mm_log_pd , "log, DP, 128", 0, 1e+300, abufdp, vdouble);
if (enableLogExp) {
callFuncSVML1_1(_mm_log10_pd, "log10, DP, 128", 0, 1e+300, abufdp, vdouble);
callFuncSVML1_1(_mm_log1p_pd, "log1p, DP, 128", 0, 1e+300, abufdp, vdouble);
fillDP(abufdp, -700, 700);
callFuncSVML1_1(_mm_exp_pd , "exp, DP, 128", -700, 700, abufdp, vdouble);
callFuncSVML1_1(_mm_exp2_pd , "exp2, DP, 128", -700, 700, abufdp, vdouble);
callFuncSVML1_1(_mm_exp10_pd, "exp10, DP, 128", -700, 700, abufdp, vdouble);
fillDP(abufdp, -30, 30);
fillDP(bbufdp, -30, 30);
callFuncSVML1_2(_mm_pow_pd, "pow, DP, 128", -30, 30, -30, 30, abufdp, bbufdp, vdouble);
}
fillDP(abufdp, -1.0, 1.0);
callFuncSVML1_1(_mm_asin_pd, "asin, DP, 128", -1.0, 1.0, abufdp, vdouble);
callFuncSVML1_1(_mm_acos_pd, "acos, DP, 128", -1.0, 1.0, abufdp, vdouble);
fillDP(abufdp, -10, 10);
fillDP(bbufdp, -10, 10);
callFuncSVML1_1(_mm_atan_pd, "atan, DP, 128", -10, 10, abufdp, vdouble);
callFuncSVML1_2(_mm_atan2_pd, "atan2, DP, 128", -10, 10, -10, 10, abufdp, bbufdp, vdouble);
}
void benchSVML128_SPTrig() {
fillSP(abufsp, 0, 6.28);
callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 6.28, abufsp, vfloat);
callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 6.28, abufsp, vfloat);
callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 6.28, abufsp, vfloat);
callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 6.28, abufsp, vfloat);
fillSP(abufsp, 0, 1e+20);
callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 1e+20, abufsp, vfloat);
callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 1e+20, abufsp, vfloat);
callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 1e+20, abufsp, vfloat);
callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 1e+20, abufsp, vfloat);
}
void benchSVML128_SPNontrig() {
fillSP(abufsp, 0, 1e+38);
callFuncSVML1_1(_mm_log_ps , "log, SP, 128", 0, 1e+38, abufsp, vfloat);
if (enableLogExp) {
callFuncSVML1_1(_mm_log10_ps, "log10, SP, 128", 0, 1e+38, abufsp, vfloat);
//callFuncSVML1_1(_mm_log1p_ps, "log1p, SP, 128", 0, 1e+38, abufsp, vfloat);
fillSP(abufsp, -100, 100);
callFuncSVML1_1(_mm_exp_ps , "exp, SP, 128", -100, 100, abufsp, vfloat);
callFuncSVML1_1(_mm_exp2_ps , "exp2, SP, 128", -100, 100, abufsp, vfloat);
callFuncSVML1_1(_mm_exp10_ps, "exp10, SP, 128", -100, 100, abufsp, vfloat);
fillSP(abufsp, -30, 30);
fillSP(bbufsp, -30, 30);
callFuncSVML1_2(_mm_pow_ps, "pow, SP, 128", -30, 30, -30, 30, abufsp, bbufsp, vfloat);
}
fillSP(abufsp, -1.0, 1.0);
callFuncSVML1_1(_mm_asin_ps, "asin, SP, 128", -1.0, 1, abufsp, vfloat);
callFuncSVML1_1(_mm_acos_ps, "acos, SP, 128", -1.0, 1, abufsp, vfloat);
fillSP(abufsp, -10, 10);
fillSP(bbufsp, -10, 10);
callFuncSVML1_1(_mm_atan_ps, "atan, SP, 128", -10, 10, abufsp, vfloat);
callFuncSVML1_2(_mm_atan2_ps, "atan2, SP, 128", -10, 10, -10, 10, abufsp, bbufsp, vfloat);
}
#else // #ifdef ENABLED
void benchSVML128_DPTrig() {}
void benchSVML128_DPNontrig() {}
void benchSVML128_SPTrig() {}
void benchSVML128_SPNontrig() {}
#endif // #ifdef ENABLED

View File

@ -1,147 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <x86intrin.h>
uint64_t Sleef_currentTimeMicros();
void fillDP(double *buf, double min, double max);
void fillSP(float *buf, double min, double max);
extern char x86BrandString[256], versionString[1024];
extern int veclen;
extern int enableLogExp;
extern double *abufdp, *bbufdp;
extern float *abufsp, *bbufsp;
extern FILE *fp;
#include "bench.h"
#ifdef __AVX__
typedef __m256d vdouble;
typedef __m256 vfloat;
#define ENABLED
#endif
#ifdef ENABLED
void zeroupper256() { _mm256_zeroupper(); }
void benchSVML256_DPTrig() {
fillDP(abufdp, 0, 6.28);
callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 6.28, abufdp, vdouble);
callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 6.28, abufdp, vdouble);
callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 6.28, abufdp, vdouble);
callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 6.28, abufdp, vdouble);
fillDP(abufdp, 0, 1e+6);
callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+6, abufdp, vdouble);
callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+6, abufdp, vdouble);
callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+6, abufdp, vdouble);
callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+6, abufdp, vdouble);
fillDP(abufdp, 0, 1e+100);
callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+100, abufdp, vdouble);
callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+100, abufdp, vdouble);
callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+100, abufdp, vdouble);
callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+100, abufdp, vdouble);
}
void benchSVML256_DPNontrig() {
fillDP(abufdp, 0, 1e+300);
callFuncSVML1_1(_mm256_log_pd , "log, DP, 256", 0, 1e+300, abufdp, vdouble);
if (enableLogExp) {
callFuncSVML1_1(_mm256_log10_pd, "log10, DP, 256", 0, 1e+300, abufdp, vdouble);
callFuncSVML1_1(_mm256_log1p_pd, "log1p, DP, 256", 0, 1e+300, abufdp, vdouble);
fillDP(abufdp, -700, 700);
callFuncSVML1_1(_mm256_exp_pd , "exp, DP, 256", -700, 700, abufdp, vdouble);
callFuncSVML1_1(_mm256_exp2_pd , "exp2, DP, 256", -700, 700, abufdp, vdouble);
callFuncSVML1_1(_mm256_exp10_pd, "exp10, DP, 256", -700, 700, abufdp, vdouble);
fillDP(abufdp, -30, 30);
fillDP(bbufdp, -30, 30);
callFuncSVML1_2(_mm256_pow_pd, "pow, DP, 256", -30, 30, -30, 30, abufdp, bbufdp, vdouble);
}
fillDP(abufdp, -1.0, 1.0);
callFuncSVML1_1(_mm256_asin_pd, "asin, DP, 256", -1.0, 1.0, abufdp, vdouble);
callFuncSVML1_1(_mm256_acos_pd, "acos, DP, 256", -1.0, 1.0, abufdp, vdouble);
fillDP(abufdp, -10, 10);
fillDP(bbufdp, -10, 10);
callFuncSVML1_1(_mm256_atan_pd, "atan, DP, 256", -10, 10, abufdp, vdouble);
callFuncSVML1_2(_mm256_atan2_pd, "atan2, DP, 256", -10, 10, -10, 10, abufdp, bbufdp, vdouble);
}
void benchSVML256_SPTrig() {
fillSP(abufsp, 0, 6.28);
callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 6.28, abufsp, vfloat);
callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 6.28, abufsp, vfloat);
callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 6.28, abufsp, vfloat);
callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 6.28, abufsp, vfloat);
fillSP(abufsp, 0, 1e+20);
callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 1e+20, abufsp, vfloat);
callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 1e+20, abufsp, vfloat);
callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 1e+20, abufsp, vfloat);
callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 1e+20, abufsp, vfloat);
}
void benchSVML256_SPNontrig() {
fillSP(abufsp, 0, 1e+38);
callFuncSVML1_1(_mm256_log_ps , "log, SP, 256", 0, 1e+38, abufsp, vfloat);
if (enableLogExp) {
callFuncSVML1_1(_mm256_log10_ps, "log10, SP, 256", 0, 1e+38, abufsp, vfloat);
//callFuncSVML1_1(_mm256_log1p_ps, "log1p, SP, 256", 0, 1e+38, abufsp, vfloat);
fillSP(abufsp, -100, 100);
callFuncSVML1_1(_mm256_exp_ps , "exp, SP, 256", -100, 100, abufsp, vfloat);
callFuncSVML1_1(_mm256_exp2_ps , "exp2, SP, 256", -100, 100, abufsp, vfloat);
callFuncSVML1_1(_mm256_exp10_ps, "exp10, SP, 256", -100, 100, abufsp, vfloat);
fillSP(abufsp, -30, 30);
fillSP(bbufsp, -30, 30);
callFuncSVML1_2(_mm256_pow_ps, "pow, SP, 256", -30, 30, -30, 30, abufsp, bbufsp, vfloat);
}
fillSP(abufsp, -1.0, 1.0);
callFuncSVML1_1(_mm256_asin_ps, "asin, SP, 256", -1.0, 1, abufsp, vfloat);
callFuncSVML1_1(_mm256_acos_ps, "acos, SP, 256", -1.0, 1, abufsp, vfloat);
fillSP(abufsp, -10, 10);
fillSP(bbufsp, -10, 10);
callFuncSVML1_1(_mm256_atan_ps, "atan, SP, 256", -10, 10, abufsp, vfloat);
callFuncSVML1_2(_mm256_atan2_ps, "atan2, SP, 256", -10, 10, -10, 10, abufsp, bbufsp, vfloat);
}
#else // #ifdef ENABLED
void zeroupper256() {}
void benchSVML256_DPTrig() {}
void benchSVML256_DPNontrig() {}
void benchSVML256_SPTrig() {}
void benchSVML256_SPNontrig() {}
#endif // #ifdef ENABLED

View File

@ -1,144 +0,0 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <unistd.h>
#include <x86intrin.h>
uint64_t Sleef_currentTimeMicros();
void fillDP(double *buf, double min, double max);
void fillSP(float *buf, double min, double max);
extern char x86BrandString[256], versionString[1024];
extern int veclen;
extern int enableLogExp;
extern double *abufdp, *bbufdp;
extern float *abufsp, *bbufsp;
extern FILE *fp;
#include "bench.h"
#ifdef __AVX512F__
typedef __m512d vdouble;
typedef __m512 vfloat;
#define ENABLED
#endif
#ifdef ENABLED
void benchSVML512_DPTrig() {
fillDP(abufdp, 0, 6.28);
callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 6.28, abufdp, vdouble);
callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 6.28, abufdp, vdouble);
callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 6.28, abufdp, vdouble);
callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 6.28, abufdp, vdouble);
fillDP(abufdp, 0, 1e+6);
callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+6, abufdp, vdouble);
callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+6, abufdp, vdouble);
callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+6, abufdp, vdouble);
callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+6, abufdp, vdouble);
fillDP(abufdp, 0, 1e+100);
callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+100, abufdp, vdouble);
callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+100, abufdp, vdouble);
callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+100, abufdp, vdouble);
callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+100, abufdp, vdouble);
}
void benchSVML512_DPNontrig() {
fillDP(abufdp, 0, 1e+300);
callFuncSVML1_1(_mm512_log_pd , "log, DP, 512", 0, 1e+300, abufdp, vdouble);
if (enableLogExp) {
callFuncSVML1_1(_mm512_log10_pd, "log10, DP, 512", 0, 1e+300, abufdp, vdouble);
callFuncSVML1_1(_mm512_log1p_pd, "log1p, DP, 512", 0, 1e+300, abufdp, vdouble);
fillDP(abufdp, -700, 700);
callFuncSVML1_1(_mm512_exp_pd , "exp, DP, 512", -700, 700, abufdp, vdouble);
callFuncSVML1_1(_mm512_exp2_pd , "exp2, DP, 512", -700, 700, abufdp, vdouble);
callFuncSVML1_1(_mm512_exp10_pd, "exp10, DP, 512", -700, 700, abufdp, vdouble);
fillDP(abufdp, -30, 30);
fillDP(bbufdp, -30, 30);
callFuncSVML1_2(_mm512_pow_pd, "pow, DP, 512", -30, 30, -30, 30, abufdp, bbufdp, vdouble);
}
fillDP(abufdp, -1.0, 1.0);
callFuncSVML1_1(_mm512_asin_pd, "asin, DP, 512", -1.0, 1.0, abufdp, vdouble);
callFuncSVML1_1(_mm512_acos_pd, "acos, DP, 512", -1.0, 1.0, abufdp, vdouble);
fillDP(abufdp, -10, 10);
fillDP(bbufdp, -10, 10);
callFuncSVML1_1(_mm512_atan_pd, "atan, DP, 512", -10, 10, abufdp, vdouble);
callFuncSVML1_2(_mm512_atan2_pd, "atan2, DP, 512", -10, 10, -10, 10, abufdp, bbufdp, vdouble);
}
void benchSVML512_SPTrig() {
fillSP(abufsp, 0, 6.28);
callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 6.28, abufsp, vfloat);
callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 6.28, abufsp, vfloat);
callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 6.28, abufsp, vfloat);
callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 6.28, abufsp, vfloat);
fillSP(abufsp, 0, 1e+20);
callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 1e+20, abufsp, vfloat);
callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 1e+20, abufsp, vfloat);
callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 1e+20, abufsp, vfloat);
callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 1e+20, abufsp, vfloat);
}
void benchSVML512_SPNontrig() {
fillSP(abufsp, 0, 1e+38);
callFuncSVML1_1(_mm512_log_ps , "log, SP, 512", 0, 1e+38, abufsp, vfloat);
if (enableLogExp) {
callFuncSVML1_1(_mm512_log10_ps, "log10, SP, 512", 0, 1e+38, abufsp, vfloat);
//callFuncSVML1_1(_mm512_log1p_ps, "log1p, SP, 512", 0, 1e+38, abufsp, vfloat);
fillSP(abufsp, -100, 100);
callFuncSVML1_1(_mm512_exp_ps , "exp, SP, 512", -100, 100, abufsp, vfloat);
callFuncSVML1_1(_mm512_exp2_ps , "exp2, SP, 512", -100, 100, abufsp, vfloat);
callFuncSVML1_1(_mm512_exp10_ps, "exp10, SP, 512", -100, 100, abufsp, vfloat);
fillSP(abufsp, -30, 30);
fillSP(bbufsp, -30, 30);
callFuncSVML1_2(_mm512_pow_ps, "pow, SP, 512", -30, 30, -30, 30, abufsp, bbufsp, vfloat);
}
fillSP(abufsp, -1.0, 1.0);
callFuncSVML1_1(_mm512_asin_ps, "asin, SP, 512", -1.0, 1, abufsp, vfloat);
callFuncSVML1_1(_mm512_acos_ps, "acos, SP, 512", -1.0, 1, abufsp, vfloat);
fillSP(abufsp, -10, 10);
fillSP(bbufsp, -10, 10);
callFuncSVML1_1(_mm512_atan_ps, "atan, SP, 512", -10, 10, abufsp, vfloat);
callFuncSVML1_2(_mm512_atan2_ps, "atan2, SP, 512", -10, 10, -10, 10, abufsp, bbufsp, vfloat);
}
#else // #ifdef ENABLED
void benchSVML512_DPTrig() {}
void benchSVML512_DPNontrig() {}
void benchSVML512_SPTrig() {}
void benchSVML512_SPNontrig() {}
#endif // #ifdef ENABLED

View File

@ -1,17 +0,0 @@
#!/bin/sh
echo
read -p "Enter label of measurement(e.g. My desktop PC) : " label
if [ -f counter.txt ]
then
counter=`cat counter.txt`
else
counter=0
fi
echo Measurement in progress. This may take several minutes.
for i in $*; do
$i "$label" $counter
done
counter=$((counter+1))
echo $counter > counter.txt

View File

@ -65,20 +65,33 @@ include_directories(${sleef_BINARY_DIR}/include) # sleef.h
include_directories(${sleef_SOURCE_DIR}/src/libm) # rename.h
include_directories(${sleef_BINARY_DIR}/src/libm/include) # rename headers
if(NOT LIB_MPFR)
if (SLEEF_ENFORCE_TESTER AND NOT SLEEF_ENABLE_TESTER)
message(FATAL_ERROR "SLEEF_ENFORCE_TESTER is specified but SLEEF_ENABLE_TESTER is false")
endif(SLEEF_ENFORCE_TESTER AND NOT SLEEF_ENABLE_TESTER)
if(SLEEF_ENABLE_TESTER AND NOT LIB_MPFR)
find_program(TESTER_COMMAND tester)
endif(NOT LIB_MPFR)
endif(SLEEF_ENABLE_TESTER AND NOT LIB_MPFR)
if (SLEEF_ENFORCE_TESTER AND NOT LIB_MPFR AND NOT TESTER_COMMAND)
message(FATAL_ERROR "SLEEF_ENFORCE_TESTER is specified and tester is not available")
endif(SLEEF_ENFORCE_TESTER AND NOT LIB_MPFR AND NOT TESTER_COMMAND)
if (SLEEF_ENFORCE_TESTER4 AND NOT SLEEF_ENABLE_TESTER4)
message(FATAL_ERROR "SLEEF_ENFORCE_TESTER4 is specified but SLEEF_ENABLE_TESTER4 is false")
endif()
if (SLEEF_ENFORCE_TESTER4 AND NOT TLFLOAT_LIBRARIES)
message(FATAL_ERROR "SLEEF_ENFORCE_TESTER4 is specified but TLFloat is not available")
endif()
find_library(LIBRT rt)
if (NOT LIBRT)
set(LIBRT "")
endif()
set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS} ${FLAGS_NOSTRICTALIASING}")
set(CMAKE_CXX_FLAGS "${ORG_CMAKE_CXX_FLAGS} ${SLEEF_C_FLAGS} ${FLAGS_NOSTRICTALIASING}")
set(COMMON_TARGET_PROPERTIES
C_STANDARD 99 # -std=gnu99
@ -90,6 +103,17 @@ endif()
#
function(add_test_with_emu C CMD)
if (SDE_COMMAND)
add_test(NAME ${CMD} COMMAND ${SDE_COMMAND} "--" ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${CMD})
elseif(EMULATOR)
add_test(NAME ${CMD} COMMAND ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${CMD})
else()
add_test(NAME ${CMD} COMMAND ${CMD})
endif()
set_tests_properties(${CMD} PROPERTIES COST ${C})
endfunction()
function(add_test_iut IUT C)
if (LIB_MPFR)
set(TESTER ${TARGET_TESTER})
@ -126,14 +150,19 @@ function(add_test_iut IUT C)
endif()
endfunction()
# Compile executable 'iut'
add_executable(${TARGET_IUT} iut.c testerutil.c)
target_compile_definitions(${TARGET_IUT} PRIVATE ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_IUT} ${TARGET_LIBSLEEF}
${LIBM} ${LIBRT})
set_target_properties(${TARGET_IUT} PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_test_iut(${TARGET_IUT} 1.0)
set(IUT_LIST ${TARGET_IUT})
if (SLEEF_ENABLE_TESTER)
# Compile executable 'iut'
add_executable(${TARGET_IUT} iut.c)
target_compile_definitions(${TARGET_IUT} PRIVATE ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_IUT} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ}
${LIBM} ${LIBRT})
set_target_properties(${TARGET_IUT} PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_test_iut(${TARGET_IUT} 1.0)
set(IUT_LIST ${TARGET_IUT})
# Tests depends on the library
add_dependencies(${TARGET_IUT} ${TARGET_HEADERS})
endif()
# Compile executable 'iutcuda'
if (SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND AND CMAKE_CUDA_COMPILER)
@ -145,97 +174,179 @@ if (SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND AND CMAKE_CUDA_COMPILER)
list(APPEND IUT_LIST iutcuda)
endif()
set(IUT_SRC iutsimd.c ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c testerutil.c)
set(IUT_SRC iutsimd.c ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c)
# Add vector extension `iut`s
macro(test_extension SIMD)
if(COMPILER_SUPPORTS_${SIMD})
string(TOLOWER ${SIMD} LCSIMD)
string(CONCAT TARGET_IUT${SIMD} "iut" ${LCSIMD})
add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC})
target_compile_options(${TARGET_IUT${SIMD}}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${TARGET_IUT${SIMD}}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF}
${LIBM} ${LIBRT})
if (FORCE_AAVPCS)
target_compile_definitions(${TARGET_IUT${SIMD}} PRIVATE ENABLE_AAVPCS=1)
endif(FORCE_AAVPCS)
if (SLEEF_ENABLE_TESTER)
string(CONCAT TARGET_IUT${SIMD} "iut" ${LCSIMD})
add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_HEADERS})
add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF})
set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${TARGET_IUT${SIMD}} ${COSTOVERRIDE_${SIMD}})
else()
add_test_iut(${TARGET_IUT${SIMD}} 1.0)
endif()
list(APPEND IUT_LIST ${TARGET_IUT${SIMD}})
add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC})
target_compile_options(${TARGET_IUT${SIMD}}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${TARGET_IUT${SIMD}}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ}
${LIBM} ${LIBRT})
if (FORCE_AAVPCS)
target_compile_definitions(${TARGET_IUT${SIMD}} PRIVATE ENABLE_AAVPCS=1)
endif(FORCE_AAVPCS)
# The iut programs whose names begin with "iuty" are the iut for the
# deterministic version of functions. By checking the result of
# testing with iutysse2, for example, it can be checked that the
# corresponding deterministic functions passes the accuracy and
# nonnumber tests.
string(CONCAT IUTYNAME "iuty" ${LCSIMD})
add_executable(${IUTYNAME} ${IUT_SRC})
target_compile_options(${IUTYNAME}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${IUTYNAME}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1)
target_link_libraries(${IUTYNAME} ${TARGET_LIBSLEEF}
${LIBM} ${LIBRT})
add_dependencies(${IUTYNAME} ${TARGET_HEADERS})
add_dependencies(${IUTYNAME} ${TARGET_LIBSLEEF})
set_target_properties(${IUTYNAME} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${IUTYNAME} ${COSTOVERRIDE_${SIMD}})
else()
add_test_iut(${IUTYNAME} 1.0)
endif()
list(APPEND IUT_LIST ${IUTYNAME})
# The iut programs whose names begin with "iuti" are the iut for the
# inline version of functions.
if (SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND)
string(CONCAT IUTINAME "iuti" ${LCSIMD})
add_executable(${IUTINAME} ${IUT_SRC})
target_compile_options(${IUTINAME} PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${IUTINAME}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}
USE_INLINE_HEADER="sleefinline_${LCSIMD}.h"
MACRO_ONLY_HEADER="macroonly${SIMD}.h"
SIMD_SUFFIX=_${LCSIMD}_sleef
)
target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/include)
target_link_libraries(${IUTINAME} ${LIBM} ${LIBRT})
add_dependencies(${IUTINAME} ${TARGET_INLINE_HEADERS})
set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99)
add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_HEADERS})
add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF})
set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${IUTINAME} ${COSTOVERRIDE_${SIMD}})
add_test_iut(${TARGET_IUT${SIMD}} ${COSTOVERRIDE_${SIMD}})
else()
add_test_iut(${IUTINAME} 1.0)
add_test_iut(${TARGET_IUT${SIMD}} 1.0)
endif()
list(APPEND IUT_LIST ${IUTINAME})
endif(SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND)
list(APPEND IUT_LIST ${TARGET_IUT${SIMD}})
# The iut programs whose names begin with "iuty" are the iut for the
# deterministic version of functions. By checking the result of
# testing with iutysse2, for example, it can be checked that the
# corresponding deterministic functions passes the accuracy and
# nonnumber tests.
string(CONCAT IUTYNAME "iuty" ${LCSIMD})
add_executable(${IUTYNAME} ${IUT_SRC})
target_compile_options(${IUTYNAME}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${IUTYNAME}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1)
target_link_libraries(${IUTYNAME} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ}
${LIBM} ${LIBRT})
add_dependencies(${IUTYNAME} ${TARGET_HEADERS})
add_dependencies(${IUTYNAME} ${TARGET_LIBSLEEF})
set_target_properties(${IUTYNAME} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${IUTYNAME} ${COSTOVERRIDE_${SIMD}})
else()
add_test_iut(${IUTYNAME} 1.0)
endif()
list(APPEND IUT_LIST ${IUTYNAME})
# The iut programs whose names begin with "iuti" are the iut for the
# inline version of functions.
if (SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND)
string(CONCAT IUTINAME "iuti" ${LCSIMD})
add_executable(${IUTINAME} ${IUT_SRC})
target_compile_options(${IUTINAME} PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${IUTINAME}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}
USE_INLINE_HEADER="sleefinline_${LCSIMD}.h"
MACRO_ONLY_HEADER="macroonly${SIMD}.h"
SIMD_SUFFIX=_${LCSIMD}_sleef
)
target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/include)
target_link_libraries(${IUTINAME} ${TARGET_TESTERUTIL_OBJ} ${LIBM} ${LIBRT})
add_dependencies(${IUTINAME} ${TARGET_INLINE_HEADERS})
set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99)
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${IUTINAME} ${COSTOVERRIDE_${SIMD}})
else()
add_test_iut(${IUTINAME} 1.0)
endif()
list(APPEND IUT_LIST ${IUTINAME})
endif(SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND)
endif(SLEEF_ENABLE_TESTER)
#
if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
set(TESTER4_SRC tester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c)
string(CONCAT TARGET_TESTER4_${SIMD} "tester4" ${LCSIMD})
add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC})
target_compile_options(${TARGET_TESTER4_${SIMD}}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${TARGET_TESTER4_${SIMD}}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(${TARGET_TESTER4_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
if (FORCE_AAVPCS)
target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_AAVPCS=1)
endif(FORCE_AAVPCS)
add_dependencies(${TARGET_TESTER4_${SIMD}} ${TARGET_HEADERS})
add_dependencies(${TARGET_TESTER4_${SIMD}} ${TARGET_LIBSLEEF})
add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat)
set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}})
else()
add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}})
endif()
#
string(CONCAT TARGET_TESTER4Y_${SIMD} "tester4y" ${LCSIMD})
add_executable(${TARGET_TESTER4Y_${SIMD}} ${TESTER4_SRC})
target_compile_options(${TARGET_TESTER4Y_${SIMD}}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${TARGET_TESTER4Y_${SIMD}}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1)
target_link_libraries(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
add_dependencies(${TARGET_TESTER4Y_${SIMD}} ${TARGET_HEADERS})
add_dependencies(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF})
add_dependencies(${TARGET_TESTER4Y_${SIMD}} ext_tlfloat)
set_target_properties(${TARGET_TESTER4Y_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4Y_${SIMD}})
else()
add_test_with_emu(1.0 ${TARGET_TESTER4Y_${SIMD}})
endif()
#
if (SLEEF_BUILD_INLINE_HEADERS)
string(CONCAT TARGET_TESTER4I_${SIMD} "tester4i" ${LCSIMD})
add_executable(${TARGET_TESTER4I_${SIMD}} ${TESTER4_SRC})
target_compile_options(${TARGET_TESTER4I_${SIMD}}
PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_link_libraries(${TARGET_TESTER4I_${SIMD}} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
target_compile_options(${TARGET_TESTER4I_${SIMD}} PRIVATE "-Wno-unknown-pragmas")
endif()
target_compile_definitions(${TARGET_TESTER4I_${SIMD}}
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}
USE_INLINE_HEADER="sleefinline_${LCSIMD}.h"
MACRO_ONLY_HEADER="macroonly${SIMD}.h"
SIMD_SUFFIX=_${LCSIMD}_sleef
)
target_include_directories(${TARGET_TESTER4I_${SIMD}} PRIVATE ${PROJECT_BINARY_DIR}/include)
add_dependencies(${TARGET_TESTER4I_${SIMD}} ${TARGET_INLINE_HEADERS})
add_dependencies(${TARGET_TESTER4I_${SIMD}} ext_tlfloat)
set_target_properties(${TARGET_TESTER4I_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4I_${SIMD}})
else()
add_test_with_emu(1.0 ${TARGET_TESTER4I_${SIMD}})
endif()
endif(SLEEF_BUILD_INLINE_HEADERS)
endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
#
if(LIB_MPFR AND NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4 AND NOT MINGW)
# Build tester2 SIMD
string(TOLOWER ${SIMD} SCSIMD)
foreach(P dp sp)
set(T "tester2${SCSIMD}${P}")
add_executable(${T} tester2simd${P}.c testerutil.c)
add_executable(${T} tester2simd${P}.c)
if(FORCE_AAVPCS)
target_compile_definitions(${T} PRIVATE ENABLE_AAVPCS=1)
endif(FORCE_AAVPCS)
target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${T} PRIVATE ENABLE_${SIMD}=1 USEMPFR=1 ${COMMON_TARGET_DEFINITIONS})
set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES})
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP})
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ} ${LIB_MPFR} ${LIBGMP} ${LIBM})
add_dependencies(${T} ${TARGET_HEADERS})
add_dependencies(${T} ${TARGET_LIBSLEEF})
if (MPFR_INCLUDE_DIR)
@ -246,11 +357,11 @@ macro(test_extension SIMD)
# testing program for the deterministic version of functions.
set(T "tester2y${SCSIMD}${P}")
add_executable(${T} tester2simd${P}.c testerutil.c)
add_executable(${T} tester2simd${P}.c)
target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${T} PRIVATE ENABLE_${SIMD}=1 USEMPFR=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1)
set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES})
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP})
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ} ${LIB_MPFR} ${LIBGMP} ${LIBM})
add_dependencies(${T} ${TARGET_HEADERS})
add_dependencies(${T} ${TARGET_LIBSLEEF})
if (MPFR_INCLUDE_DIR)
@ -259,13 +370,16 @@ macro(test_extension SIMD)
endforeach()
endif()
if(NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4 AND SLEEF_OPENSSL_FOUND)
if(NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4)
# Build tester3
string(TOLOWER ${SIMD} SCSIMD)
set(T "tester3${SCSIMD}")
add_executable(${T} tester3.c ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c testerutil.c)
add_executable(${T} tester3.c ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c)
target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}})
target_compile_definitions(${T} PRIVATE ${COMMON_TARGET_DEFINITIONS} ${TESTER3_DEFINITIONS_${SIMD}})
if (NOT SLEEF_OPENSSL_FOUND)
target_compile_definitions(${T} PRIVATE SLEEF_USE_INTERNAL_SHA256=1)
endif()
set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES})
# Enable Vector PCS for Advanced SIMD (if supported)
@ -273,8 +387,18 @@ macro(test_extension SIMD)
host_target_AAVPCS_definitions(${T})
endif()
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIBM} ${SLEEF_OPENSSL_LIBRARIES})
target_include_directories(${T} PRIVATE ${SLEEF_OPENSSL_INCLUDE_DIR})
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ} ${LIBM})
if(LIB_MPFR)
target_link_libraries(${T} ${LIB_MPFR} ${LIBGMP})
endif()
if (SLEEF_OPENSSL_FOUND)
target_link_libraries(${T} ${SLEEF_OPENSSL_LIBRARIES})
target_include_directories(${T} PRIVATE ${SLEEF_OPENSSL_INCLUDE_DIR})
else()
target_link_libraries(${T} ${TARGET_PSHA_OBJ})
target_include_directories(${T} PRIVATE ${sleef_SOURCE_DIR}/src/common)
endif()
add_dependencies(${T} ${TARGET_HEADERS})
add_dependencies(${T} ${TARGET_LIBSLEEF})
@ -371,53 +495,99 @@ endif(ENABLE_GNUABI)
#
if (SLEEF_ARCH_X86)
# iutdsp128
add_executable(iutdsp128 ${IUT_SRC})
target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_SSE2})
target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT})
add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp128 1.0)
list(APPEND IUT_LIST iutdsp128)
if (SLEEF_ENABLE_TESTER)
# iutdsp128
add_executable(iutdsp128 ${IUT_SRC})
target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_SSE2})
target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_TESTERUTIL_OBJ} ${LIBM})
add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp128 1.0)
list(APPEND IUT_LIST iutdsp128)
# iutdsp256
add_executable(iutdsp256 ${IUT_SRC})
target_compile_definitions(iutdsp256 PRIVATE ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp256 PRIVATE ${FLAGS_ENABLE_AVX})
target_link_libraries(iutdsp256 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT})
add_dependencies(iutdsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp256 1.0)
list(APPEND IUT_LIST iutdsp256)
# iutdsp256
add_executable(iutdsp256 ${IUT_SRC})
target_compile_definitions(iutdsp256 PRIVATE ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp256 PRIVATE ${FLAGS_ENABLE_AVX})
target_link_libraries(iutdsp256 ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_TESTERUTIL_OBJ} ${LIBM})
add_dependencies(iutdsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp256 1.0)
list(APPEND IUT_LIST iutdsp256)
endif(SLEEF_ENABLE_TESTER)
if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
# tester4dsp128
add_executable(tester4dsp128 ${TESTER4_SRC})
target_compile_definitions(tester4dsp128 PRIVATE
ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_SSE2})
target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat)
add_test_with_emu(1.0 tester4dsp128)
# tester4dsp256
add_executable(tester4dsp256 ${TESTER4_SRC})
target_compile_definitions(tester4dsp256 PRIVATE
ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(tester4dsp256 PRIVATE ${FLAGS_ENABLE_AVX})
target_link_libraries(tester4dsp256 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
add_dependencies(tester4dsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat)
add_test_with_emu(1.0 tester4dsp256)
endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
endif(SLEEF_ARCH_X86)
if (SLEEF_ARCH_PPC64)
add_executable(iutdsp128 ${IUT_SRC})
target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSPPOWER_128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_VSX})
target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT})
add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp128 1.0)
list(APPEND IUT_LIST iutdsp128)
if (SLEEF_ENABLE_TESTER)
add_executable(iutdsp128 ${IUT_SRC})
target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSPPOWER_128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_VSX})
target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_TESTERUTIL_OBJ} ${LIBM})
add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp128 1.0)
list(APPEND IUT_LIST iutdsp128)
endif(SLEEF_ENABLE_TESTER)
if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
add_executable(tester4dsp128 ${TESTER4_SRC})
target_compile_definitions(tester4dsp128 PRIVATE ENABLE_DSPPOWER_128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_VSX})
target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat)
add_test_with_emu(1.0 tester4dsp128)
endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
endif(SLEEF_ARCH_PPC64)
if (SLEEF_ARCH_S390X)
add_executable(iutdsp128 ${IUT_SRC})
target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSPS390X_128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_VXE})
target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT})
add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp128 1.0)
list(APPEND IUT_LIST iutdsp128)
if (SLEEF_ENABLE_TESTER)
add_executable(iutdsp128 ${IUT_SRC})
target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSPS390X_128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_VXE})
target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_TESTERUTIL_OBJ} ${LIBM})
add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF})
add_test_iut(iutdsp128 1.0)
list(APPEND IUT_LIST iutdsp128)
endif(SLEEF_ENABLE_TESTER)
if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
add_executable(tester4dsp128 ${TESTER4_SRC})
target_compile_definitions(tester4dsp128 PRIVATE ENABLE_DSPS390X_128=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_VXE})
target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat)
add_test_with_emu(1.0 tester4dsp128)
endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)
endif(SLEEF_ARCH_S390X)
if(SLEEF_BUILD_SCALAR_LIB)
# Compile executable 'iutscalar'
add_executable(iutscalar iut.c testerutil.c)
target_compile_definitions(iutscalar PRIVATE ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(iutscalar sleefscalar ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT})
set_target_properties(iutscalar PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_test_iut(iutscalar 1.0)
list(APPEND IUT_LIST iutscalar)
if (SLEEF_ENABLE_TESTER)
# Compile executable 'iutscalar'
add_executable(iutscalar iut.c)
target_compile_definitions(iutscalar PRIVATE ${COMMON_TARGET_DEFINITIONS})
target_link_libraries(iutscalar sleefscalar ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_TESTERUTIL_OBJ} ${LIBM})
set_target_properties(iutscalar PROPERTIES ${COMMON_TARGET_PROPERTIES})
add_test_iut(iutscalar 1.0)
list(APPEND IUT_LIST iutscalar)
endif(SLEEF_ENABLE_TESTER)
endif()
if(LIB_MPFR AND NOT MINGW)
@ -433,7 +603,7 @@ if(LIB_MPFR AND NOT MINGW)
endif()
foreach(P ${PRECISIONS})
set(T "tester2${P}")
add_executable(${T} tester2${P}.c testerutil.c)
add_executable(${T} tester2${P}.c)
target_compile_definitions(${T} PRIVATE USEMPFR=1 ${ENABLEFLOAT128} ${COMMON_TARGET_DEFINITIONS})
set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES})
if (FORCE_AAVPCS)
@ -442,15 +612,15 @@ if(LIB_MPFR AND NOT MINGW)
if (MPFR_INCLUDE_DIR)
target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR})
endif()
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIBQUADMATH} ${LIB_MPFR} ${LIBM} ${LIBGMP})
target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIBQUADMATH} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIB_MPFR} ${LIBGMP} ${LIBM})
add_dependencies(${T} ${TARGET_HEADERS})
add_dependencies(${T} ${TARGET_LIBSLEEF})
endforeach()
# Compile executable 'tester'
add_host_executable(${TARGET_TESTER} tester.c testerutil.c)
add_host_executable(${TARGET_TESTER} tester.c)
if (NOT CMAKE_CROSSCOMPILING)
target_link_libraries(${TARGET_TESTER} ${LIB_MPFR} ${TARGET_LIBSLEEF} ${LIBM} ${LIBGMP})
target_link_libraries(${TARGET_TESTER} ${TARGET_LIBSLEEF} ${TARGET_TESTERUTIL_OBJ} ${LIB_MPFR} ${LIBGMP} ${LIBM})
target_compile_definitions(${TARGET_TESTER}
PRIVATE USEMPFR=1 ${COMMON_TARGET_DEFINITIONS})
target_compile_options(${TARGET_TESTER} PRIVATE -Wno-unused-result)
@ -512,6 +682,3 @@ if (FILECHECK_COMMAND AND COMPILER_SUPPORTS_OPENMP AND SLEEF_ARCH_X86 AND CMAKE_
add_test(NAME testervecabi-sse2 COMMAND sh -c "${CMAKE_C_COMPILER} -Wno-attributes -fopenmp -msse2 -O3 ${CMAKE_CURRENT_SOURCE_DIR}/testervecabi.c -I${sleef_BINARY_DIR}/include -S -o- | ${FILECHECK_COMMAND} ${CMAKE_CURRENT_SOURCE_DIR}/testervecabi.c -check-prefix=CHECK-SSE2")
add_test(NAME testervecabi-avx2 COMMAND sh -c "${CMAKE_C_COMPILER} -Wno-attributes -fopenmp -mavx2 -O3 ${CMAKE_CURRENT_SOURCE_DIR}/testervecabi.c -I${sleef_BINARY_DIR}/include -S -o- | ${FILECHECK_COMMAND} ${CMAKE_CURRENT_SOURCE_DIR}/testervecabi.c -check-prefix=CHECK-AVX2")
endif()
# Tests depends on the library
add_dependencies(${TARGET_IUT} ${TARGET_HEADERS})

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -118,148 +118,148 @@ typedef svint32_t vint2;
#define __MAKE_FN_NAME(name, t, vl, p) _ZGV##t##N##vl##p##_##name
#define __DECLARE_vd_vd(name, t, vl, p) \
#define __DECLARE_vd_vd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble)
#define __CALL_vd_vd(name, t, vl, p) \
#define __CALL_vd_vd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1); } while(0)
#define __DECLARE_vi_vd(name, t, vl, p) \
#define __DECLARE_vi_vd(name, t, vl, p) \
extern vint VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble)
#define __CALL_vi_vd(name, t, vl, p) \
#define __CALL_vi_vd(name, t, vl, p) \
do { vi0 = __MAKE_FN_NAME(name, t, vl, p)(vd1); } while(0)
#define __DECLARE_vd_vd_vi(name, t, vl, p) \
#define __DECLARE_vd_vd_vi(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vint)
#define __CALL_vd_vd_vi(name, t, vl, p) \
#define __CALL_vd_vd_vi(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, vi2); } while(0)
#define __DECLARE_vd_vd_vd(name, t, vl, p) \
#define __DECLARE_vd_vd_vd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble)
#define __CALL_vd_vd_vd(name, t, vl, p) \
#define __CALL_vd_vd_vd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, vd2); } while(0)
#define __DECLARE_vd_vd_vd_vd(name, t, vl, p) \
#define __DECLARE_vd_vd_vd_vd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble, vdouble)
#define __CALL_vd_vd_vd_vd(name, t, vl, p) \
#define __CALL_vd_vd_vd_vd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, vd2, vd3); } while(0)
#define __DECLARE_vd_vd_pvd(name, t, vl, p) \
#define __DECLARE_vd_vd_pvd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble *)
#define __CALL_vd_vd_pvd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, &vd2); } while(0)
#define __DECLARE_v_vd_pvd_pvd(name, t, vl, p) \
#define __DECLARE_v_vd_pvd_pvd(name, t, vl, p) \
extern void VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble *, vdouble *)
#define __CALL_v_vd_pvd_pvd(name, t, vl, p) \
#define __CALL_v_vd_pvd_pvd(name, t, vl, p) \
do { __MAKE_FN_NAME(name, t, vl, p)(vd0, &vd1, &vd2); } while(0)
#define __DECLARE_vf_vf(name, t, vl, p) \
#define __DECLARE_vf_vf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat)
#define __CALL_vf_vf(name, t, vl, p) \
#define __CALL_vf_vf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1); } while(0)
#define __DECLARE_vf_vf_vf(name, t, vl, p) \
#define __DECLARE_vf_vf_vf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat)
#define __CALL_vf_vf_vf(name, t, vl, p) \
#define __CALL_vf_vf_vf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, vf2); } while(0)
#define __DECLARE_vf_vf_vf_vf(name, t, vl, p) \
#define __DECLARE_vf_vf_vf_vf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat, vfloat)
#define __CALL_vf_vf_vf_vf(name, t, vl, p) \
#define __CALL_vf_vf_vf_vf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, vf2, vf3); } while(0)
#define __DECLARE_vf_vf_pvf(name, t, vl, p) \
#define __DECLARE_vf_vf_pvf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat *)
#define __CALL_vf_vf_pvf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, &vf2); } while(0)
#define __DECLARE_vi_vf(name, t, vl, p) \
#define __DECLARE_vi_vf(name, t, vl, p) \
extern vint2 VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat)
#define __CALL_vi_vf(name, t, vl, p) \
#define __CALL_vi_vf(name, t, vl, p) \
do { vi20 = __MAKE_FN_NAME(name, t, vl, p)(vf1); } while(0)
#define __DECLARE_vf_vf_vi(name, t, vl, p) \
#define __DECLARE_vf_vf_vi(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vint2)
#define __CALL_vf_vf_vi(name, t, vl, p) \
#define __CALL_vf_vf_vi(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, vi22); } while(0)
#define __DECLARE_v_vf_pvf_pvf(name, t, vl, p) \
#define __DECLARE_v_vf_pvf_pvf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat *, vfloat*)
#define __CALL_v_vf_pvf_pvf(name, t, vl, p) \
#define __CALL_v_vf_pvf_pvf(name, t, vl, p) \
do { __MAKE_FN_NAME(name, t, vl, p)(vf0, &vf1, &vf2); } while(0)
#else /******************** MASKED_GNUABI *****************************/
#define __MAKE_FN_NAME(name, t, vl, p) _ZGV##t##M##vl##p##_##name
#define __DECLARE_vd_vd(name, t, vl, p) \
#define __DECLARE_vd_vd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vopmask)
#define __CALL_vd_vd(name, t, vl, p) \
#define __CALL_vd_vd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, mask); } while(0)
#define __DECLARE_vi_vd(name, t, vl, p) \
#define __DECLARE_vi_vd(name, t, vl, p) \
extern vint VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vopmask)
#define __CALL_vi_vd(name, t, vl, p) \
#define __CALL_vi_vd(name, t, vl, p) \
do { vi0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, mask); } while(0)
#define __DECLARE_vd_vd_vi(name, t, vl, p) \
#define __DECLARE_vd_vd_vi(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vint, vopmask)
#define __CALL_vd_vd_vi(name, t, vl, p) \
#define __CALL_vd_vd_vi(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, vi2, mask); } while(0)
#define __DECLARE_vd_vd_vd(name, t, vl, p) \
#define __DECLARE_vd_vd_vd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble, vopmask)
#define __CALL_vd_vd_vd(name, t, vl, p) \
#define __CALL_vd_vd_vd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, vd2, mask); } while(0)
#define __DECLARE_vd_vd_vd_vd(name, t, vl, p) \
#define __DECLARE_vd_vd_vd_vd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble, vdouble, vopmask)
#define __CALL_vd_vd_vd_vd(name, t, vl, p) \
#define __CALL_vd_vd_vd_vd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, vd2, vd3, mask); } while(0)
#define __DECLARE_vd_vd_pvd(name, t, vl, p) \
#define __DECLARE_vd_vd_pvd(name, t, vl, p) \
extern vdouble VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble *, vopmask)
#define __CALL_vd_vd_pvd(name, t, vl, p) \
do { vd0 = __MAKE_FN_NAME(name, t, vl, p)(vd1, &vd2, mask); } while(0)
#define __DECLARE_v_vd_pvd_pvd(name, t, vl, p) \
#define __DECLARE_v_vd_pvd_pvd(name, t, vl, p) \
extern void VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vdouble, vdouble *, vdouble *, vopmask)
#define __CALL_v_vd_pvd_pvd(name, t, vl, p) \
#define __CALL_v_vd_pvd_pvd(name, t, vl, p) \
do { __MAKE_FN_NAME(name, t, vl, p)(vd0, &vd1, &vd2, mask); } while(0)
#define __DECLARE_vf_vf(name, t, vl, p) \
#define __DECLARE_vf_vf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vopmask)
#define __CALL_vf_vf(name, t, vl, p) \
#define __CALL_vf_vf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, mask); } while(0)
#define __DECLARE_vf_vf_vf(name, t, vl, p) \
#define __DECLARE_vf_vf_vf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat, vopmask)
#define __CALL_vf_vf_vf(name, t, vl, p) \
#define __CALL_vf_vf_vf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, vf2, mask); } while(0)
#define __DECLARE_vf_vf_vf_vf(name, t, vl, p) \
#define __DECLARE_vf_vf_vf_vf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat, vfloat, vopmask)
#define __CALL_vf_vf_vf_vf(name, t, vl, p) \
#define __CALL_vf_vf_vf_vf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, vf2, vf3, mask); } while(0)
#define __DECLARE_vf_vf_pvf(name, t, vl, p) \
#define __DECLARE_vf_vf_pvf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat *, vopmask)
#define __CALL_vf_vf_pvf(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, &vf2, mask); } while(0)
#define __DECLARE_vi_vf(name, t, vl, p) \
#define __DECLARE_vi_vf(name, t, vl, p) \
extern vint2 VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vopmask)
#define __CALL_vi_vf(name, t, vl, p) \
#define __CALL_vi_vf(name, t, vl, p) \
do { vi20 = __MAKE_FN_NAME(name, t, vl, p)(vf1, mask); } while(0)
#define __DECLARE_vf_vf_vi(name, t, vl, p) \
#define __DECLARE_vf_vf_vi(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vint2, vopmask)
#define __CALL_vf_vf_vi(name, t, vl, p) \
#define __CALL_vf_vf_vi(name, t, vl, p) \
do { vf0 = __MAKE_FN_NAME(name, t, vl, p)(vf1, vi22, mask); } while(0)
#define __DECLARE_v_vf_pvf_pvf(name, t, vl, p) \
#define __DECLARE_v_vf_pvf_pvf(name, t, vl, p) \
extern vfloat VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(vfloat, vfloat *, vfloat*, vopmask)
#define __CALL_v_vf_pvf_pvf(name, t, vl, p) \
#define __CALL_v_vf_pvf_pvf(name, t, vl, p) \
do { __MAKE_FN_NAME(name, t, vl, p)(vf0, &vf1, &vf2, mask); } while(0)
#endif /* MASKED_GNUABI */

View File

@ -1,129 +1,129 @@
sin u35 bc50dfbcbd8ef534541d1babe90860c7
sin u10 dbc2cf81f292ef50fa0119e222c6c9f9
cos u35 506e34a809b80ad3603ed46ba2a574b0
cos u10 a0f69df5937152b8f8f0e671f3676289
tan u35 970b5cd7f0e05defa22ebb155ab61a40
tan u10 5fd08e0552e3ab853439bf5fd2bd344d
sincos u10 7c164edcaa45988f6165b653fc76c495
sincos u35 38fe7e261e184ed8dbf432ce6bedc5c4
sincospi u05 0c6fc00c7aaf0b6e67d66542d1be833d
sincospi u35 c428b0fc3e6c5be4d2c03dcd8bb27a7c
log u10 4855b27222d900bea47a27cadba71727
log u35 c95484de57c167da3d8d6d1baadf9ffa
log2 u10 2662df9af919680ca62e1752fb1b7539
log2 u35 1cd6d7f194a5e8364191497adc5c5cec
log10 u10 36645e8031d873d66fd0ec2c5959f273
log1p u10 1383924fb56cf2e7eda27de21320c591
exp u10 13692a48edf2cf7a3e047b16ddfb7b81
exp2 u10 436146f8d6dcaa4a754837108a9aa3e1
exp2 u35 8881d075d9101a1dfa3f6a10b9ee8373
exp10 u10 9d704b310f683872a6446cfc97726a4d
exp10 u35 bc07745ebc22a7ee97679154c24b23cc
expm1 u10 cd3f0b8e86943d52c278394b60e2d22e
pow u10 a0ea63b27d33262346a35c9439741075
cbrt u10 5d8bf28ac74624594fd1be9217817690
cbrt u10 3c896e03746bcf1b3f70182dfec3d93b
cbrt u35 73daa306764e208aab1627ac110b10d7
cbrt u35 c29b7bf200215425b4ba948c8cc94c42
hypot u05 cc2f18e409e19a02cadf7b91fd869120
hypot u35 5194e0a554174a6145511ce3df9c1f46
asin u10 86c061caec3fa2e1bc71bda4dad29f4c
asin u35 31303b88bdc00206265002d6cc5e89e4
acos u10 0a1a403590f2ac8364f132b334920945
acos u35 493f960c1cce57931d95a5a22a0587a3
atan u10 c97624a24ec034cc0c8985acb61d13cd
atan u10 0be0f550406923016cfeb5ef62c25b15
atan u35 9d6d83e066b5a4851d44771418c9948c
atan u35 f32c1aa4caa08c6945afd1125ba8b113
atan2 u10 6b1d9d25fcd96053acc19d1633fab36a
atan2 u35 afb07894347062a96dab705b34eb1763
sinh u10 61d459b1f368087f6f23ebf8e9f0ea01
cosh u10 f77eb95f79e274c12b4e92dc0389259b
tanh u10 2bb9dd54ed0fa22bb5f3b6d557eb58a3
asinh u10 01136e54e2a434839530dda54f33cfdb
acosh u10 2f3c28c9ee2eb2b3d5659c6cb2a58e3e
atanh u10 601a77ba8c1d5175f2808b48a41260c1
lgamma u10 90cdc41063f4198c6ad592c0cdd0f5da
tgamma u10 6f864c3a1f17fbdf914cac7ffcd82cb7
erf u10 f4ae148b59bb7501d8f5746300850376
erfc u15 5e116a4316dafa742769f71e18f6f9fe
fabs bef2f2ac8a4789357e580b4da4f9b9fe
copysign 3219022f267464e3704f90558e8df3bc
fmax 4e4f5220ccfef191864c316df0d18fc0
fmin c0f8effb6c611e2b3b91b820ad943f62
fdim e876d103931f18ceede5bfd7e3df7ab0
fmod 618aa751e13012afdb41ec80dd35e6ba
remainder 8d692dbb44bbc9be5af0c0657d3008b8
modf f03ce73cd4f9ea7f69c017f6e53355d5
nextafter 9eba4e30d12d74dc4e8003fcff0f1582
trunc 1bc7e909eba121dcef7f0e4046937ae5
floor 2cff66b499dc8a30cec9467de659b774
ceil b080e632dcb8f8134d8715752be12917
round 8907e21687ca9c2a539297536e754950
rint e49f837096bc661fe1c742801dd99a30
sinf u35 833d845950b9cbb025629fe4c040f8f6
sinf u10 9c21afa4d7d6af3fc666309c3cd647fe
cosf u35 74d7f871a6553cd0019087895e2052ad
cosf u10 35349e94c323c1614f22093959288010
tanf u35 bbb7c092d017e96d2454a38a20687735
tanf u10 227423bc04f42d76a8f68082ba696126
sincosf u10 83ecc4e3d5295056e9d8c52bc196b666
sincosf u35 533319caa49a961e4909bd6dcab40721
sincospif u05 8b3762b67a661957c1414c351ec49034
sincospif u35 cec15ed76a358091632634166fa77b66
logf u10 c5a90119943acc4199e1cc7030b5def8
logf u35 af2fbe4bfa2caaf59c734e3749dd15be
log2f u10 ba8acae369bbb7b6404cccbc633fe25b
log2f u35 ba32ebaa8c470899ebd433d190c00f03
log10f u10 7e235a82d960e4434575dd39648d8bb7
log1pf u10 350fc4f13502b36bb1107e1b1122acb1
expf u10 ee4adaabefa3fac6c0f1925b2a948eea
exp2f u10 b0d283dbae0f36f1b3c7eed9871f0d0d
exp2f u35 522cc30f722f77fceb07015830b351a3
exp10f u10 b0564be151965600f5744ff2e4992bc9
exp10f u35 d142f1fb40e44f0c9e042718f27ee3e0
expm1f u10 ebfd6498cb40f61b609882de8a7f3c74
powf u10 a7cba3239c87969662e8b41a4dd8b4ab
cbrtf u10 01c5cac23fe21638be1c3eab6e368fd6
cbrtf u10 2a245b03f83e9114644d03b40dac707b
cbrtf u35 3ce62350fd585f0524a12c974fbe6cf5
cbrtf u35 2aca0404626a28f7af7f60105ad6e217
hypotf u05 bc5971cbeebee27b4c0d91fbe3f6bf30
hypotf u35 a6f0f774b346a6bba08889ff9ba3f193
asinf u10 7f77f7453b961512c89e87e49c549cfe
asinf u35 22ed8760aa328e1f714031eec592a4d8
acosf u10 15617dd0429b90e59d2923415934c2a6
acosf u35 af0b132d9e263721f9296187dbf9b9bf
atanf u10 26b77fb423104b45633cf24500237d6e
atanf u10 4313d0bc2708de53f74d804aac6564d4
atanf u35 97a1797897955643c722c7d291987331
atanf u35 7d3f47169415058e8578f11d899bfd10
atan2f u10 098a33f730fe95ce4774a991db4cee14
atan2f u35 56fc6bd8349979f0d0b1dcdb57f68363
sinhf u10 0780a2f57df3a831718195d1ee5c19ef
coshf u10 cfbb6aed408e43a7b7f053474100ff2d
tanhf u10 d19f254d41e8726c748df87b95bc9acd
asinhf u10 260d129221468a86bbfd609c27bfea6a
acoshf u10 24ced7e5631c78b20a5716faeedbaa92
atanhf u10 164fd77b8372b8c131baaacab1c9e650
lgammaf u10 3bf6d824175c4f4d86f3073064e41e84
tgammaf u10 f3a8d25c852068622bdfcae4cb813583
erff u10 f34af3814153de040b93e573ca7d21d8
erfcf u15 915ab9830de89a5a504b3ce7cd2fecda
fabsf a3c72220bc0ade68fe22e0a15eb730d4
copysignf 6b35517b8e1da78d9c9b52915d9a9b19
fmaxf 9833a60a2080e8fd9ae8de32c758966f
fminf 2dcfa19e1f1ab4973a7dec9f2cc09fa0
fdimf c5c0fe7b095eb8ccbb19fbf934a36b24
fmodf 77aa84a9703e202a56e5f4609bd2482b
remainderf 5a453b1217c173e4dc0b0211066750be
modff 5fa4f044f20478216aa085a01b189697
nextafterf 517c1c8f072e9024518d3d9ead98b85b
truncf 6937050850be63c44d4b7dbd666febe6
floorf 9341be69ee345c8554bf3ab4e9316133
ceilf c70874771cbe9741f1f05fedd4b629e9
roundf 0cf52f6b8015099771e9a7dfa6b090bc
rintf bed68e788e2b11543c09c9d52198abf8
fastsinf u3500 8eb51f86fb40414dd21284f020f24b6c
fastcosf u3500 69cbc3703f1d2c68695b00b1b09287b2
fastpowf u3500 e02e6a692cfa22a6b7149168c67ea1d2
sin u35 7ddf50bfc76c34f8640e1d48368a4807046ed09a7cd9f4e092364c0ece567420
sin u10 2dec8ff3f5d3f0601ee7d5d8cda65777b3b31d86f522b1306cf50d0a7820bdba
cos u35 26a6889b13864c87e41500246afd02ec626529b122a1622ab5b4d915342fd981
cos u10 094594b432e3f6f7695f21a9eac5f48adfc2b52729a0b7f6dcc73d56572896d4
tan u35 9e4884d3079d52edb120d080ae609bc94dea6de36b91f9c41f7a69fb424cb7bf
tan u10 ae386240aec3b3ce4b7d5a13b1f69759f54fc57378439b9801c65de4e7c8f5c6
sincos u10 dccd728b97586cd65da3998eb225c3b59634b360acb56ea74d1d45d61fea4f4e
sincos u35 2c16ec6ba4050808419fd5b9c995606412a0fd41f2a7e109c1a8cab5adf0b11b
sincospi u05 9fffb591dd38190f8dd61d0f9dcaf7843606d4c3f6717bfac9835471178600a4
sincospi u35 b362c2f22c2475715d0933caa5ee1400ae1639da9e60c83eeca676e3b2be12d7
log u10 a25704431659d3f451536556bd81a2b9c2abc82203e23539df2ecd899436a9e2
log u35 83476779543cb9f3a038e478e8fee0d6ee0060227a2433363d221d71ddc72ac7
log2 u10 bf2467410af2c29e30ebf509bc066759c17b31fc409120382898a6979fbbad2e
log2 u35 2d416462682e561a2bab83d5b11ea235cfb991675e3777fa50da75d755b08774
log10 u10 1aa2fb18c8ae9a19f8f9be331f72cb3f842188b705d73e86bde47ecf661297cd
log1p u10 e21e7518e09b85f0adaf1d0d3cff362364e925fd07aa3163d77b818cb644d942
exp u10 c21df57b84d8c9010aae562e21daf7b1c3f7df277db9cff2999d74bfb517e60d
exp2 u10 451209f52083f022f30793abcf7761eae138642bf8d5a252ca8c83489088bff3
exp2 u35 0661d1afebb47f2755e97337d6b065cf925219aba48e192b9fbb56f696f17d84
exp10 u10 9881cd7b6c7c2eeb7b8b5d297277d1d0f4276ea74835672a94fbcade8e604d34
exp10 u35 5a8d99078d3ca904dad9fc3ac4ec7c90d2bcd216417022dcb38df30293e1cdf5
expm1 u10 609ae579ed99b4c8ff7ccaead9c3a2216bfbc1d156dc05a6b401de066b0a079c
pow u10 a0034cc77ecd21a809265f76e67528217357f2ef3d2883ff017512f92bbf9360
cbrt u10 e128b321cd05dca403a7b0633424cad82600ceb5b61966f70ff3cf425bd6b3f9
cbrt u10 b722d767ae6dd66d3d1dfa9d5d2aedaed3c652020dab5fcfdd729b3f2c803e98
cbrt u35 5ecd857b96a17ecf71808a53416e0f40d0935f236e307dd5e43587b12db375cb
cbrt u35 c46da13b1a71174922de04a844b1b303ac5fd2d0da98a6352b234292cf7e42e9
hypot u05 9f4275e06e1ce269722162c4bc521f159906a448ee05f9619037706cd3e54b72
hypot u35 de0c1ae1ea4c9eda164e0dca28c293cc72caf3b12b2d15f757bbb4bb347f257b
asin u10 c51e0211bc0a1a422982df89d38f48ef0b0af1d90588a1715fd4ce966c701b66
asin u35 405410e624265daa84c0837c55ccf2d45d8c4f6086b6f6a744c4c6e133cbcc1d
acos u10 8e8c6e984110c0decc1ce21bf71505195f029a935064bc3692997b400cb15edc
acos u35 bc99071767af3d4bf23c3d828284a6950ae205898a6b3773a5aca0b59d6d6a0d
atan u10 c96690351d5df7745fed2004b1c72dc7aceaa32c4d400f296c32efc9ecddab0e
atan u10 9f64e9a576084542e1fa4a4064055af79b4ae20ced35ca617c4327a30a4a70e4
atan u35 a0852efacaa91625350cf104f8fe0dcbb5936d2b9ebbd3cf8cd6234ccaf8a0d3
atan u35 e61f1f4917e474cbc7ca5ada17c31bdece04c6a86210a472c53cf5e8faeac882
atan2 u10 9b6c9b875a9c841259fca8d718778a1895a5b434ab4b95d284c4345249c2f853
atan2 u35 895dfae0dbce6c2aff81b986ebc732fb0323b267f57c7b1e0d5c8ec522da6af4
sinh u10 d3859e3dc1ca924f11dc7b464cb0bb535d4ad71d1ec6f416a82db6e0e2390367
cosh u10 e6fd1172e97fa9341028299dd8a00379f1313170b8444a6a3c291230e4f178b7
tanh u10 5e2c1ce9d160d1a5dcc5ef8fd74f860751764f5dc14124075f848074ee386618
asinh u10 37d0df9811cc871b1dde4d762cc0eb53ec6c71c7bcf13100b9b5302ba1a85b99
acosh u10 158fb84af679aea2ab411fb84cd0b12ff876d897722ff84c54fa567c35705033
atanh u10 32253ae4f643e56a3d25a6d96d316ed94cd3a9e5ea16ad7180ff96e68571dc34
lgamma u10 4663f72dcb58a53bedefe071de51f0fccb9b73db12f5b53d5acea347d4de06cd
tgamma u10 87e21460a2a991b677416b39a85d391051e4327a39baa7bfb93f2e27965567af
erf u10 56488fa7013635a233d05787e9a681c1c8775b6d9aace07f0d1dd16fc34c5875
erfc u15 0e5e1126a0eb4cce30f6cb164b33330ac4d792c21b8bfbe33cc9a828b4f9f047
fabs ff336faed535e34a082752839c9e957ba069ffdf0b046215bd415ce9120f29a3
copysign 67a7a162bfc2f15b76ded0470f938ab000edf8f8566d5a19fa99d4ea4d29fff3
fmax 57f39d5440fadb2a7387a47c00b067d5fc57ceabd7e5d64943b033acb5212063
fmin 87e131762ec9c46badd6105ab66f09d99d65776e2719f6af9befd8d6d3f59b6b
fdim 3331d6a17f289f54d429bdda9374d7d2574e0cd173e930a57436e8e484f271e9
fmod 89d26af516be177c55ba9fcec972416c35e229456b053271548021e9b070c193
remainder 2db01bb12776ec14d4a15469c31b49e759d74a3c8ed30d14fe88af3b27b5c398
modf 7780d1e6448f21bec6504e398a4e826f304da10aaec3c4e210bed86abdaecedf
nextafter 60a6c07477f6d07cd938ba6361d020175193a934a2714132615dae0bcedf785a
trunc ee43b2f9d897428885cb039f85259ea5ffe4efbfe4bf0dba16ee19829d198ac6
floor 29f8be9b8ad5795e65ed4f34878a85f5f8a1be707489345c4ad04b36d4da54bf
ceil bf267441867b261f8dcfca61b55fdc7ac0ff7a017b150da1b532776894962208
round 5d7d57a50d9860a7d145d428884df0341564dec7f14c24d5c319c8bce5565f9d
rint 834f8e41e3a28f43b26bc9a5836882cbc0fceeaec5774202cb6df473d995f5a1
sinf u35 0b91688d57e650a50dff113cae51be6088e067e877baf0fc50675528432d1539
sinf u10 d6ccd197ac5534b74a04340e62e38fc5ec9fb1cbffef80fb1782e659a1832260
cosf u35 c5d48802983d4673bf3961453a3b02f13b894b83144f067d93b1d804de722aa2
cosf u10 420ba2e57ee0bae63e995ffb85aac07a5f1758d76f824d24193f75af349fca8c
tanf u35 ec5bcbe8a93d2a5f59365656ba15a10af2f24375bf265663f762730674a656b9
tanf u10 2d4c53018daf572ce2e20fc7bbe1435b04746db6b0cee9c33304cef94f14dcde
sincosf u10 b0390e1d3554fd469d53d5e45146e9e1f440d46fc0a9b8f9ea334071af369f55
sincosf u35 c4967d888e7713ff231c3fa3372a0d89c5df220585054156256bc3d4f0917f3a
sincospif u05 66ccd831fa4c215b71cc791f3d0cb31babeadd34539867df8029cddf45539ded
sincospif u35 9fadd97cd2996c6601079869248a59772bbd5b23b625177ef0351120f0759fc2
logf u10 a43f52f3ce728ebd9ec9e2e84c901f6012fe0d6b83029c8380036404f59cd3ea
logf u35 fd05264b52e29af9f0907b98af57f0cc0737b506a6290c259d3eff92123add86
log2f u10 c732f1b5c7f5147d1576d4d858db46952d42ec229117dffce8b82e798799d2b6
log2f u35 d2e637436e49d04e7747258946075b715033e925ca589696b4577a4f96632a9b
log10f u10 c616f9465c071c42532255e9a49ba4305e0a588fc8d87ac31fceb30d2c59391e
log1pf u10 fbfce7374fd3e030b5678fa31e99bba2aa4e68e60e8eeb15a10e41fb34ed1cda
expf u10 d75ce19c93fb038cfdd8059f816a7912481b26f7d90cbd554545f21a0b873861
exp2f u10 4a579f3f572362629acd563e55d765a7d83cbc625584f26e0a36163e80bffe87
exp2f u35 90c3bb433051b828f081de99c3d3e1d731a718de306d0c9937478f2b57e981ce
exp10f u10 57856cab0911b80ebeeded0c30b9e978ca6d17314ca2e7522c02ff6b6e904f57
exp10f u35 e14dfb56cd4798e675b751c6cd4ddc073e9a5e8f59a97638bc8a9b766f564a96
expm1f u10 c0066ace0274e83dfce6b6f806ad89ef4c8b0919011477934d43c88dc42e0db2
powf u10 b380319c0b9bad2cf717f8c31a09361b869d49c1e58ee5e1f0b987f96e3acffa
cbrtf u10 3589ce3bce26b796ddc4c6ec177cdb0ed05ece414530f4c22c77452b37432050
cbrtf u10 cfe7b512f728e60f99e14f597d34c94279b96e6cc897fc5ad1377365afb164c8
cbrtf u35 57902935bad6d5f45565d447e82ac2fd673442b8fb01fa178079376ff1220b27
cbrtf u35 172785fb38220b147078c16b7b203edf4e879f853e335522074ae0103cddc472
hypotf u05 efc46c07c1bff7caf4f1d52fbe0db4ab70100601c114acea3f4ecf7b2aeaf826
hypotf u35 f09d3b29f563e599ea2d5e6434ff84de3e72ae277fce5055ee2bbf9ce6aa4214
asinf u10 82e645be1e4e8216be262cf67eac586a8d8a0e962ae5d34cb14c55ad177883d7
asinf u35 1010918bc615b794d532b8643b60a315f2bc8e2248020b4a6024ffbd593c54b1
acosf u10 886eb790a1d46f29fe04d470a1e71ee565951d22383cfd67eca92d3f3437db6a
acosf u35 75ebefc2d532049af4234e3247b311782aa60a776c53d669956f578e5b2e76cd
atanf u10 540a69391b28afe8d067cc99ac86abbffe08bb3c24f8962be4b7aef0677562de
atanf u10 2c12f291846249ca41d6a9c4108bd93a6b30246ef776bc282ad8cbb9e6c05890
atanf u35 d8d7c1156fd61d138ccb88d435097be739c7bf4806ff605c0d39216380b55e96
atanf u35 6985b58dddf827aa610029c51aaa204952589175efb607e2b135a1dc666b3fca
atan2f u10 7756cae9e0b7ebe7e5180f9714e49c6403ead4182ebacbb89dc0cb3cc386e998
atan2f u35 a645f681b04876451d8f0de0dd28958303b2b7f3b51957883b09588776111ddf
sinhf u10 d8094aaed987d20b0c4e8eccb63ed5cc00f4ad8bf46c67888f5ab87c21b15681
coshf u10 26d59cb9ec0a6f5965dfe66df3f89fd2bb348ce75f811ee580426df42f1ebdc3
tanhf u10 3b715185ce7c39d70ff17dceb539380b8ac9c80303c9796e41d1ebda6f2b0ece
asinhf u10 281dfc8d6f3a9cad40276392b21e48d14ae05986d9a97ce21cf122adf5d14ce0
acoshf u10 9a5809171d6a8c4a3e39fd32a71d5dd83d7a55ae8c2c352dc453e59b01c4a42a
atanhf u10 66540cd17454f09a95ef5adbeef6e9413ca31cb0446edc879447838f7b8c079c
lgammaf u10 b26a90f8b782f2a91132d5c12dcd56d749e301bf51e275085df2c4579639fa44
tgammaf u10 e3ba7f95b002555d655e07e8906d29e0f867c28c3abe6513d32c20468cdce05c
erff u10 81041541f31e72a7745d6fca4b208d4e332af8fc2366df5372b6cb38755369c9
erfcf u15 88205a29a679f22867bf078202e68f2a8f5557780f0b8366db2f0f20c1e23151
fabsf 560d13e463bce4448d733798c5818b13e5634f893211047bc2fad9f4c613797d
copysignf 74f7af06376f4c79d7af9ce4e50bde2fb8f22b56d741bdc67624ef7d1989e76b
fmaxf 7474be750857fe400beb2bc14fd1b1113a2e365ae7b45b0acc508436b4c32a94
fminf 4e22c453645f3c108c27e2c2fea65cd6a6b535f8236fd7382ff1082db3b31b5a
fdimf 16c17ff31778c7d63ec7f65c3b2a8561b79be62b1bc1b399ac0ffc43285b6cd0
fmodf 9fa4ece68b16803e6c47fa5cb280f8c246a2ef5731b0609bec71e1db27906f0a
remainderf f32761a428b1336051ee773e470b74ada93a611cafbc08f6a9aff36957c84f64
modff e976f223f2f4d380e9955392cb010920d5439665ae8eaf0fd6abbc889700a4f7
nextafterf ac05b3fd824c3ce73eb3946c7e3dec94ce4b1ec4efd0237bcfb4578d3d422cc6
truncf a014cd8206fb15c21b1cc773b951cf7f673e8be9e6e697ca0cf7293becb3d55c
floorf f17658ab95f73a1b4cfec0417e82b1b071cb97a3aac0110e289ba6449b2aeb55
ceilf e173e35cc97a85629ceb5025bd5b1abad52e4e153166cbf9672396b4ca23b59a
roundf 3e67b087b019c806d87593850bf0cd106718cf34f50684784296ef040f301fc0
rintf 827acf1e7d253c4fe9fdd4c5c9e53f35c80af5550ff6cbbb7aaac67577630c70
fastsinf u3500 6c68502acd4bde521daad91a0947faea0bd4b15c8e1d8adf4614351eca60f7dd
fastcosf u3500 64cb4ab04eca2de35df084ac4c3c7285553301474357783f96ee6467e21f9144
fastpowf u3500 a908509f84693183aabb532aef9c26f42e340bd0a0253d1e40cab44358c6b76a

View File

@ -1,129 +1,129 @@
sin u35 c163e4a7e9ccebb2181dcc8653367d8c
sin u10 0d6bf6f2c935db82588222da95659019
cos u35 52f902bd939d751b5b544ac70181fcff
cos u10 afcdba92a75a76d56b8cf2f22d4bec9e
tan u35 906cc42b6755fe514c5e185fcb4d2f55
tan u10 c98f29a62067fa63646d9bcc29a310c6
sincos u10 3fe37f4eb805505152f2b14a22a9f94e
sincos u35 95a7b7f48c71febf10ec6eff796dd391
sincospi u05 0c6fc00c7aaf0b6e67d66542d1be833d
sincospi u35 c428b0fc3e6c5be4d2c03dcd8bb27a7c
log u10 4855b27222d900bea47a27cadba71727
log u35 015f8ae899c9b921d48919dd12ef19a9
log2 u10 2662df9af919680ca62e1752fb1b7539
log2 u35 908b1949db34ea855944f00089b21e23
log10 u10 36645e8031d873d66fd0ec2c5959f273
log1p u10 1383924fb56cf2e7eda27de21320c591
exp u10 084e5be89c2ad03e356078ea4f287bab
exp2 u10 6e36db9ae2cf9eca82e3d9157c622351
exp2 u35 6e36db9ae2cf9eca82e3d9157c622351
exp10 u10 0cc08bc6a3d08d6e61450b5370c6161e
exp10 u35 6904d5509ca794747aa249c13886f90f
expm1 u10 cd3f0b8e86943d52c278394b60e2d22e
pow u10 7e19796027d7c1d1999be948f90e6181
cbrt u10 5d8bf28ac74624594fd1be9217817690
cbrt u10 3c896e03746bcf1b3f70182dfec3d93b
cbrt u35 fc7ee3e3e6c54365d708b752c242a947
cbrt u35 2408714a56d74f8c82389ca6772cdbc1
hypot u05 cc2f18e409e19a02cadf7b91fd869120
hypot u35 be7bbd41dffd746b70261ee773cbd4b2
asin u10 8a21b7c28cdaffc9d3e53f415367932e
asin u35 9c9e8107782898e9faed6924ad1b3cb1
acos u10 28261e4eb8331865660c814676d5c6bc
acos u35 310911130bfc45b10dabe3a072939331
atan u10 f931de72f2f6a7928f307a8a382ae255
atan u10 453f9ef62f58f9829320baf482a1d457
atan u35 6161b6189609f105b017d8768d0a41f1
atan u35 6face71d8d93c69448d49ed6140e361d
atan2 u10 469babaeee9bd30e17af2f473b3ea500
atan2 u35 6a3e764125aab2a0a13e7a0d9ec02f7f
sinh u10 61d459b1f368087f6f23ebf8e9f0ea01
cosh u10 f77eb95f79e274c12b4e92dc0389259b
tanh u10 2bb9dd54ed0fa22bb5f3b6d557eb58a3
asinh u10 01136e54e2a434839530dda54f33cfdb
acosh u10 2f3c28c9ee2eb2b3d5659c6cb2a58e3e
atanh u10 601a77ba8c1d5175f2808b48a41260c1
lgamma u10 90cdc41063f4198c6ad592c0cdd0f5da
tgamma u10 cb9a93844ad1713d2ab92ff5b6398150
erf u10 8a0bc2146a5c67b6bebc58f4b0076568
erfc u15 3e247a54183eeddedc33e99c50118995
fabs bef2f2ac8a4789357e580b4da4f9b9fe
copysign 3219022f267464e3704f90558e8df3bc
fmax 4e4f5220ccfef191864c316df0d18fc0
fmin c0f8effb6c611e2b3b91b820ad943f62
fdim e876d103931f18ceede5bfd7e3df7ab0
fmod 618aa751e13012afdb41ec80dd35e6ba
remainder 8d692dbb44bbc9be5af0c0657d3008b8
modf f03ce73cd4f9ea7f69c017f6e53355d5
nextafter 9eba4e30d12d74dc4e8003fcff0f1582
trunc 1bc7e909eba121dcef7f0e4046937ae5
floor 2cff66b499dc8a30cec9467de659b774
ceil b080e632dcb8f8134d8715752be12917
round 8907e21687ca9c2a539297536e754950
rint e49f837096bc661fe1c742801dd99a30
sinf u35 f8f804eae1d9443103e81fec96293477
sinf u10 3f12a7381f1cbb1830d92b4ec72d21fe
cosf u35 f2f3d1c9f090cde9c02439608dc7066e
cosf u10 dc35f27fae65f63f0aa6ad241f8b387b
tanf u35 68d42ad1fb412e6b8be3853461e61213
tanf u10 97df301d4f59e67d5318b5356b703f06
sincosf u10 a97124d810ec461c135dc4fb0c059b6f
sincosf u35 0cc521e52ae1227d311012c2919c1ff2
sincospif u05 8b3762b67a661957c1414c351ec49034
sincospif u35 8720757f221c00cc8de24b7dc4949144
logf u10 c5a90119943acc4199e1cc7030b5def8
logf u35 b6234302d534d6ccd48155dd6b9a4293
log2f u10 ba8acae369bbb7b6404cccbc633fe25b
log2f u35 74174c90717c86642b71284452a8aef6
log10f u10 7e235a82d960e4434575dd39648d8bb7
log1pf u10 e53dbfa80bcc1a7bcfd21000e6950475
expf u10 9597388315e4b3e89c4c97ce46374dcf
exp2f u10 42d66e5e4cb88feb29c5b36c632159a5
exp2f u35 42d66e5e4cb88feb29c5b36c632159a5
exp10f u10 954f0824b6d949d0da03b49950dc6642
exp10f u35 6fb0e9a829e12a06679d379d05b53ede
expm1f u10 ebfd6498cb40f61b609882de8a7f3c74
powf u10 2ed84af40d03e307a620365f172d010d
cbrtf u10 01c5cac23fe21638be1c3eab6e368fd6
cbrtf u10 2a245b03f83e9114644d03b40dac707b
cbrtf u35 6c22a6dc132c5212250970f22f42256d
cbrtf u35 5ab696ae11f9637413d30e6496d5324b
hypotf u05 bc5971cbeebee27b4c0d91fbe3f6bf30
hypotf u35 2a7cd97768287084b7fffc7e9fb39072
asinf u10 e2e571a01984c4ffb3f6e38e0328d90e
asinf u35 70df2dfc3a3569868cce60c38e7b1962
acosf u10 5180fde4b02a0ca4cd75f0a786a1bfeb
acosf u35 72b0e2f9791f90f1c43570b9e9ba893f
atanf u10 fa672e387a204055f735b7af98dd8a35
atanf u10 d017670c13bc221b68bc9ee5f41c4b5e
atanf u35 f592e46eaa5d29583f86d3e336f20b6b
atanf u35 e7087fe40de46921826b373d10c40954
atan2f u10 275b2fa8ee554c45551bb142db9f8197
atan2f u35 44b187851195d24bab2561eb8f4ff5d0
sinhf u10 45bc228a14c3e39eeb35e9764394a23e
coshf u10 838d441e85d415ef4fb1e5c5ea966a71
tanhf u10 d19f254d41e8726c748df87b95bc9acd
asinhf u10 927eeb621a3e2d5039f1a07fcf150901
acoshf u10 932520013273174fcabe2be4a55f919f
atanhf u10 164fd77b8372b8c131baaacab1c9e650
lgammaf u10 3bf6d824175c4f4d86f3073064e41e84
tgammaf u10 c3059747811d98846f74a63d3747ac3d
erff u10 f34af3814153de040b93e573ca7d21d8
erfcf u15 687a9c577512d349ddbc0643013d2c56
fabsf a3c72220bc0ade68fe22e0a15eb730d4
copysignf 6b35517b8e1da78d9c9b52915d9a9b19
fmaxf 9833a60a2080e8fd9ae8de32c758966f
fminf 2dcfa19e1f1ab4973a7dec9f2cc09fa0
fdimf c5c0fe7b095eb8ccbb19fbf934a36b24
fmodf 77aa84a9703e202a56e5f4609bd2482b
remainderf 5a453b1217c173e4dc0b0211066750be
modff 5fa4f044f20478216aa085a01b189697
nextafterf 517c1c8f072e9024518d3d9ead98b85b
truncf 6937050850be63c44d4b7dbd666febe6
floorf 9341be69ee345c8554bf3ab4e9316133
ceilf c70874771cbe9741f1f05fedd4b629e9
roundf 0cf52f6b8015099771e9a7dfa6b090bc
rintf bed68e788e2b11543c09c9d52198abf8
fastsinf u3500 5c48081c74cd0316379b580b047dbfc2
fastcosf u3500 6f73d116f109283e5632c31f5988f55b
fastpowf u3500 6dbb3110412df4fed5a71f50d40def89
sin u35 c0c8e53bd8762032e30a6e843131ee80bcb7c6acd3fb299e937be6add5a8d5aa
sin u10 6692fc59b029f7b11a511c21ff2a5e7c01c8b76bcfce80357878b0ac8dc42b29
cos u35 5096992132d8ea8ffdf32f0193b6c6dfa5700bbb64a278ec2e7e5ddf4d0ccd51
cos u10 bb8942ccdf1c86289f2ab560033d38f39b37bcb87d0a2f646f71a9521456e905
tan u35 334507c35c29da824184f60c8318d3d0cab6ec91291768794936a0fd1caa08f3
tan u10 48006a954a296162fe7232ffeb33e602ac54bbf38e2764ab65ea2717f53b7906
sincos u10 042262aeafa5774345a43d75e0aca41d4e8e591ba86a35fb113e9f41c1b1b198
sincos u35 628ebb6a27b6eacff75deddf301f06ec517dde8ba4566f84d765775d4d2cd8d1
sincospi u05 9fffb591dd38190f8dd61d0f9dcaf7843606d4c3f6717bfac9835471178600a4
sincospi u35 b362c2f22c2475715d0933caa5ee1400ae1639da9e60c83eeca676e3b2be12d7
log u10 a25704431659d3f451536556bd81a2b9c2abc82203e23539df2ecd899436a9e2
log u35 b47e57b1afc82b14211b9f3338f41208771b7d971774cf535e9e9bcdb6327db5
log2 u10 bf2467410af2c29e30ebf509bc066759c17b31fc409120382898a6979fbbad2e
log2 u35 61cdc83d0e7de8d132764065fc7ba47bc18dadac441938d7bb0550c18b27956b
log10 u10 1aa2fb18c8ae9a19f8f9be331f72cb3f842188b705d73e86bde47ecf661297cd
log1p u10 e21e7518e09b85f0adaf1d0d3cff362364e925fd07aa3163d77b818cb644d942
exp u10 c7997af9618cab09736d7736614dfe6541c6417b75894474c02849e25c5eb6a4
exp2 u10 43ca5b299c5ef8d38c7ea3594e8925f00ff7dda62788f0ed003ffac026f4aaa4
exp2 u35 43ca5b299c5ef8d38c7ea3594e8925f00ff7dda62788f0ed003ffac026f4aaa4
exp10 u10 b9d8ea0a1bffa2097c84ea57752a00e71e12b0454ced6ce40a56c0d62a05c2f0
exp10 u35 9dd4096b0f0907112a7051e4cd0f8b93f4e56403224f5cb5e0e1a3601b55fc14
expm1 u10 609ae579ed99b4c8ff7ccaead9c3a2216bfbc1d156dc05a6b401de066b0a079c
pow u10 74772c3583d5579f1b28fd322048a40c286595057df623ec65028a9647f7bf46
cbrt u10 e128b321cd05dca403a7b0633424cad82600ceb5b61966f70ff3cf425bd6b3f9
cbrt u10 b722d767ae6dd66d3d1dfa9d5d2aedaed3c652020dab5fcfdd729b3f2c803e98
cbrt u35 96d1ef3aa862044af5cb0ee7fe62e161b61fbb9ab50549925b5f4bc8c1450106
cbrt u35 3d648e8f0e56d75a4765d3fe4ba58578dde6576199dce8a920d4fc74f3fd2077
hypot u05 9f4275e06e1ce269722162c4bc521f159906a448ee05f9619037706cd3e54b72
hypot u35 0473b61c7dd7a4e6a8394bbafdc613f4e1d8eac704830dbc6257ee8f85601149
asin u10 7c466883cd3b6055bff9f8f13e2a8eff00de053f428f88b169fcb18b85f5859e
asin u35 cf291432912ad68a37dccb92882199e11d382b402794d72bf78d467a40ba6911
acos u10 31f80b277ac9dbedb9f4397fa058b11e3e2497adb5ad8dca3055b18bd071b2d4
acos u35 6025e6a4a64608b06709ba1eda3da1a3a697344c27dc1be50aeecb722aed5837
atan u10 561fe325ecfbe2ed5b3761da5f43886ba4081566e12b793f02fb105f57d74cd7
atan u10 6f8ded4d8fba9461e3df9faf8924499424d5910b4e3d7829573efc4b088316e1
atan u35 9408d2aa734a6b0c0bc1c80f4ad34e2b3dacb5eae623366deaa2cc2b9454499f
atan u35 c03ad6398c6992d946f89ff389fcd548be3bd9cb4fd0a1613f686a5a1ea1f0dc
atan2 u10 a3bcea5507555b07f1128585312e7772532dd414dd21588a95405188e4af6af6
atan2 u35 4cdbd13d36484ca540eb04d8854674103107aada4deb662d49dfdae9aa3eb7ca
sinh u10 d3859e3dc1ca924f11dc7b464cb0bb535d4ad71d1ec6f416a82db6e0e2390367
cosh u10 e6fd1172e97fa9341028299dd8a00379f1313170b8444a6a3c291230e4f178b7
tanh u10 5e2c1ce9d160d1a5dcc5ef8fd74f860751764f5dc14124075f848074ee386618
asinh u10 37d0df9811cc871b1dde4d762cc0eb53ec6c71c7bcf13100b9b5302ba1a85b99
acosh u10 158fb84af679aea2ab411fb84cd0b12ff876d897722ff84c54fa567c35705033
atanh u10 32253ae4f643e56a3d25a6d96d316ed94cd3a9e5ea16ad7180ff96e68571dc34
lgamma u10 4663f72dcb58a53bedefe071de51f0fccb9b73db12f5b53d5acea347d4de06cd
tgamma u10 ae094d163ce1ccaf94f5146ce3b147f76a886fee2758c8735328304bbb514b42
erf u10 73867031c0df90a5d060040cd160c7fe14fa6fc0c46104959e574ab6efdd67f7
erfc u15 4632ba9c10e73c7bbb32adf163d48d4cd90aa0c3314de4a7878953da08433f4d
fabs ff336faed535e34a082752839c9e957ba069ffdf0b046215bd415ce9120f29a3
copysign 67a7a162bfc2f15b76ded0470f938ab000edf8f8566d5a19fa99d4ea4d29fff3
fmax 57f39d5440fadb2a7387a47c00b067d5fc57ceabd7e5d64943b033acb5212063
fmin 87e131762ec9c46badd6105ab66f09d99d65776e2719f6af9befd8d6d3f59b6b
fdim 3331d6a17f289f54d429bdda9374d7d2574e0cd173e930a57436e8e484f271e9
fmod 89d26af516be177c55ba9fcec972416c35e229456b053271548021e9b070c193
remainder 2db01bb12776ec14d4a15469c31b49e759d74a3c8ed30d14fe88af3b27b5c398
modf 7780d1e6448f21bec6504e398a4e826f304da10aaec3c4e210bed86abdaecedf
nextafter 60a6c07477f6d07cd938ba6361d020175193a934a2714132615dae0bcedf785a
trunc ee43b2f9d897428885cb039f85259ea5ffe4efbfe4bf0dba16ee19829d198ac6
floor 29f8be9b8ad5795e65ed4f34878a85f5f8a1be707489345c4ad04b36d4da54bf
ceil bf267441867b261f8dcfca61b55fdc7ac0ff7a017b150da1b532776894962208
round 5d7d57a50d9860a7d145d428884df0341564dec7f14c24d5c319c8bce5565f9d
rint 834f8e41e3a28f43b26bc9a5836882cbc0fceeaec5774202cb6df473d995f5a1
sinf u35 5667c75091aaa7f6cad0b8e1ff80c5470cb5bfcbeb37ca089597a42bb89d21f9
sinf u10 4749c75d58eb24a83df44f86cfc204cd49b00a84472a592adfa5b0dc6ee5920e
cosf u35 c9aa15477ba53c5d4816a63ebca00123ebe9798374b7f93001478baf01f42393
cosf u10 8a8cc7609d7afacff4ff1a075784ad32d891567eb6dcc6ab115b0421c3985359
tanf u35 f7c53052860fa55f44e2fe63af8af15eade5e94951637634ebc5d0ee3c56dd6a
tanf u10 4dcccb3f2c42cf20d9cfa5b5602d86d8242d4d080cfa4f00321333e338cfb9ad
sincosf u10 3643081262b2d43ccedd509daca5d16fb66449aa1774a645a5b1343d4682c81b
sincosf u35 e02f3f1d2848c047d30ad1d89adeab6a9b0aef211fa0d8cd6613a43170e4e0fe
sincospif u05 66ccd831fa4c215b71cc791f3d0cb31babeadd34539867df8029cddf45539ded
sincospif u35 c2a92e1892c9ca12031896177e0dd898cb22b5b8305b42754b1a834485189c9b
logf u10 a43f52f3ce728ebd9ec9e2e84c901f6012fe0d6b83029c8380036404f59cd3ea
logf u35 68ef65827671b86d1fc77d8cb734c49e4c211bfb35990c84a4bbdec6026d8b4d
log2f u10 c732f1b5c7f5147d1576d4d858db46952d42ec229117dffce8b82e798799d2b6
log2f u35 529ca0ddf923543e938ad3663ad572b9addc586e7f1398c13dcde257b3bd65d1
log10f u10 c616f9465c071c42532255e9a49ba4305e0a588fc8d87ac31fceb30d2c59391e
log1pf u10 384577af7f24c0ff0abf3a574bf21e348bceb60a7a26b3a7006b7f1fa7032049
expf u10 1554f1b37125fdf5cf7e516415a04df7547be47dd89d262d24519c0a092593a7
exp2f u10 374572349c0d64862128a5f7e27555d5f7a2768ec20d52cfc73b2dd608128542
exp2f u35 374572349c0d64862128a5f7e27555d5f7a2768ec20d52cfc73b2dd608128542
exp10f u10 240f4207fcca7934627f058b87b2d935a0d5733123a61efa0cee45ed38af6d7b
exp10f u35 3806645d79d1e6ce3cb56f1d1d95689d835e54061b647c8ca8d8c0cb7eb19c97
expm1f u10 c0066ace0274e83dfce6b6f806ad89ef4c8b0919011477934d43c88dc42e0db2
powf u10 d370c629e456bed37684cff089d3f04dbe110d8ea0ba40e5e4f49abf9d874134
cbrtf u10 3589ce3bce26b796ddc4c6ec177cdb0ed05ece414530f4c22c77452b37432050
cbrtf u10 cfe7b512f728e60f99e14f597d34c94279b96e6cc897fc5ad1377365afb164c8
cbrtf u35 30fa2b571dec71ccd9f31607bc26c591036ced33e0ceaf038042e6a162b1ddba
cbrtf u35 a0ee4a56fbe28cc4c922188397c10456a0dd54bc31c54b0bd2cfffc7c5626dba
hypotf u05 efc46c07c1bff7caf4f1d52fbe0db4ab70100601c114acea3f4ecf7b2aeaf826
hypotf u35 e2e71c42bba52629c44960938d5b9961387aff15d92126799dff5e08f351b1e4
asinf u10 151d448af3ece5f8b2b1775b375cc3260895ac76042814d30bcf156f368d3d45
asinf u35 2daf25858c2c889ec4b3920ac12b00d7a1494f35f2abb36a3c7daabad99b751f
acosf u10 d4ea707c8f340c6580ed68072d92065abd8942272fdc048cc0318b02e6d312a4
acosf u35 a7a7a0a8e081e8ef26610c118afc1b7e60b8c6577ca644f49b0aca06f97beb91
atanf u10 c5e2e79af3d422f9ac9424afda4eab64c17ab80903305b3a281580c997a86055
atanf u10 ccea76f6a4c4a8941a5259c9c50c6899d71d0bc13948421333c14a604718c31b
atanf u35 67f3d2ab58989e4f24d6ac4f7106a58043d6a8d3a749a6308f155237d1c38eee
atanf u35 7fbc39fe8698ebd79040c51fbc31356acd27b1988435b96e4191eec8662b27d6
atan2f u10 fa56d1cfea9cbec5de469b1768bd660c19bb079361ec861f3ac0604a0acaee64
atan2f u35 6ed820eb372024d39c6db25a3242c7cc63c1d416fa3df8e0c68638a979c333f8
sinhf u10 18d9bc4d115cc4fb5061fda0e1a6b3aa90bce4fd68aa3000cea10dc94cc907e1
coshf u10 fcbdbe1ebd51db181bad96b3aa08aec5b81858925dd676e3dfd04d679863aa2e
tanhf u10 3b715185ce7c39d70ff17dceb539380b8ac9c80303c9796e41d1ebda6f2b0ece
asinhf u10 1fb7d432a1af3a637e602c9170d73dea5da7e82b57623bfd3b37bbbce1cc9bb1
acoshf u10 c01055933edfe7bcb45e5dea7377d2b2960ee61551a63270d9e7a28b76f3daad
atanhf u10 66540cd17454f09a95ef5adbeef6e9413ca31cb0446edc879447838f7b8c079c
lgammaf u10 b26a90f8b782f2a91132d5c12dcd56d749e301bf51e275085df2c4579639fa44
tgammaf u10 2790e8800bd1a29f564fe35ef8463f90b8566968739026c6b04097bbfa536f57
erff u10 81041541f31e72a7745d6fca4b208d4e332af8fc2366df5372b6cb38755369c9
erfcf u15 e310f5ed2f0c0b32a84280832bffbefec65cc063483497861f3fb684d72f046d
fabsf 560d13e463bce4448d733798c5818b13e5634f893211047bc2fad9f4c613797d
copysignf 74f7af06376f4c79d7af9ce4e50bde2fb8f22b56d741bdc67624ef7d1989e76b
fmaxf 7474be750857fe400beb2bc14fd1b1113a2e365ae7b45b0acc508436b4c32a94
fminf 4e22c453645f3c108c27e2c2fea65cd6a6b535f8236fd7382ff1082db3b31b5a
fdimf 16c17ff31778c7d63ec7f65c3b2a8561b79be62b1bc1b399ac0ffc43285b6cd0
fmodf 9fa4ece68b16803e6c47fa5cb280f8c246a2ef5731b0609bec71e1db27906f0a
remainderf f32761a428b1336051ee773e470b74ada93a611cafbc08f6a9aff36957c84f64
modff e976f223f2f4d380e9955392cb010920d5439665ae8eaf0fd6abbc889700a4f7
nextafterf ac05b3fd824c3ce73eb3946c7e3dec94ce4b1ec4efd0237bcfb4578d3d422cc6
truncf a014cd8206fb15c21b1cc773b951cf7f673e8be9e6e697ca0cf7293becb3d55c
floorf f17658ab95f73a1b4cfec0417e82b1b071cb97a3aac0110e289ba6449b2aeb55
ceilf e173e35cc97a85629ceb5025bd5b1abad52e4e153166cbf9672396b4ca23b59a
roundf 3e67b087b019c806d87593850bf0cd106718cf34f50684784296ef040f301fc0
rintf 827acf1e7d253c4fe9fdd4c5c9e53f35c80af5550ff6cbbb7aaac67577630c70
fastsinf u3500 dbf93ee799553cfb9abf84aaccc458e26113d7d78c4f634db4469bd0d9dd0e19
fastcosf u3500 55893f9b416b8876d022d7f960281efbb4f9241fdff0cbb059c2695d4c666d5b
fastpowf u3500 30b1aaff8eaad36907f99fd027a34bc06f39ffae218deeae10e399f133e72f8e

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -220,113 +220,113 @@ __global__ void xerfcf_u15(float *r, float *a0) { *r = Sleef_erfcf1_u15cuda(*a0)
//
#define func_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
*a0 = u2d(u); \
#define func_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
*a0 = u2d(u); \
funcName<<<1, 1>>>(r, a0); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 "\n", d2u(*r)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
cudaDeviceSynchronize(); \
printf("%" PRIx64 "\n", d2u(*r)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_d2_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
*a0 = u2d(u); \
funcName<<<1, 1>>>(r2, a0); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 " %" PRIx64 "\n", d2u(r2->x), d2u(r2->y)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
#define func_d2_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
*a0 = u2d(u); \
funcName<<<1, 1>>>(r2, a0); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 " %" PRIx64 "\n", d2u(r2->x), d2u(r2->y)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_d_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
*a0 = u2d(u); \
*a1 = u2d(v); \
funcName<<<1, 1>>>(r, a0, a1); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 "\n", d2u(*r)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
#define func_d_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
*a0 = u2d(u); \
*a1 = u2d(v); \
funcName<<<1, 1>>>(r, a0, a1); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 "\n", d2u(*r)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_d_d_i(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
*a0 = u2d(u); \
#define func_d_d_i(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
*a0 = u2d(u); \
*i0 = (int)u2d(v); \
funcName<<<1, 1>>>(r, a0, i0); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 "\n", d2u(*r)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
funcName<<<1, 1>>>(r, a0, i0); \
cudaDeviceSynchronize(); \
printf("%" PRIx64 "\n", d2u(*r)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_i_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
*a0 = u2d(u); \
funcName<<<1, 1>>>(i0, a0); \
cudaDeviceSynchronize(); \
printf("%d\n", *i0); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
#define func_i_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
*a0 = u2d(u); \
funcName<<<1, 1>>>(i0, a0); \
cudaDeviceSynchronize(); \
printf("%d\n", *i0); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
//
#define func_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
*b0 = u2f(u); \
#define func_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
*b0 = u2f(u); \
funcName<<<1, 1>>>(s, b0); \
cudaDeviceSynchronize(); \
printf("%x\n", f2u(*s)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
cudaDeviceSynchronize(); \
printf("%x\n", f2u(*s)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_f2_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
*b0 = u2f(u); \
funcName<<<1, 1>>>(s2, b0); \
cudaDeviceSynchronize(); \
#define func_f2_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
*b0 = u2f(u); \
funcName<<<1, 1>>>(s2, b0); \
cudaDeviceSynchronize(); \
printf("%x %x\n", f2u(s2->x), f2u(s2->y)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_f_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u, v; \
sscanf(buf, funcStr " %x %x", &u, &v); \
*b0 = u2f(u); \
*b1 = u2f(v); \
funcName<<<1, 1>>>(s, b0, b1); \
cudaDeviceSynchronize(); \
printf("%x\n", f2u(*s)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
#define func_f_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u, v; \
sscanf(buf, funcStr " %x %x", &u, &v); \
*b0 = u2f(u); \
*b1 = u2f(v); \
funcName<<<1, 1>>>(s, b0, b1); \
cudaDeviceSynchronize(); \
printf("%x\n", f2u(*s)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
//

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2023.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -474,172 +474,172 @@ static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
//
#define func_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
double s[VECTLENDP]; \
memrand(s, sizeof(s)); \
#define func_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
double s[VECTLENDP]; \
memrand(s, sizeof(s)); \
int idx = xrand() & (VECTLENDP-1); \
s[idx] = u2d(u); \
vdouble a = vloadu_vd_p(s); \
a = funcName(a); \
vstoreu_v_p_vd(s, a); \
u = d2u(s[idx]); \
printf("%" PRIx64 "\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
s[idx] = u2d(u); \
vdouble a = vloadu_vd_p(s); \
a = funcName(a); \
vstoreu_v_p_vd(s, a); \
u = d2u(s[idx]); \
printf("%" PRIx64 "\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_d2_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
#define func_d2_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
sscanf(buf, funcStr " %" PRIx64, &u); \
double s[VECTLENDP], t[VECTLENDP]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
int idx = xrand() & (VECTLENDP-1); \
s[idx] = u2d(u); \
vdouble2 v; \
vdouble a = vloadu_vd_p(s); \
v = funcName(a); \
vstoreu_v_p_vd(s, vd2getx_vd_vd2(v)); \
vstoreu_v_p_vd(t, vd2gety_vd_vd2(v)); \
Sleef_double2 d2; \
d2.x = s[idx]; \
d2.y = t[idx]; \
s[idx] = u2d(u); \
vdouble2 v; \
vdouble a = vloadu_vd_p(s); \
v = funcName(a); \
vstoreu_v_p_vd(s, vd2getx_vd_vd2(v)); \
vstoreu_v_p_vd(t, vd2gety_vd_vd2(v)); \
Sleef_double2 d2; \
d2.x = s[idx]; \
d2.y = t[idx]; \
printf("%" PRIx64 " %" PRIx64 "\n", d2u(d2.x), d2u(d2.y)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_d_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
#define func_d_d_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
double s[VECTLENDP], t[VECTLENDP]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
int idx = xrand() & (VECTLENDP-1); \
s[idx] = u2d(u); \
t[idx] = u2d(v); \
vdouble a, b; \
a = vloadu_vd_p(s); \
b = vloadu_vd_p(t); \
a = funcName(a, b); \
vstoreu_v_p_vd(s, a); \
u = d2u(s[idx]); \
printf("%" PRIx64 "\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
s[idx] = u2d(u); \
t[idx] = u2d(v); \
vdouble a, b; \
a = vloadu_vd_p(s); \
b = vloadu_vd_p(t); \
a = funcName(a, b); \
vstoreu_v_p_vd(s, a); \
u = d2u(s[idx]); \
printf("%" PRIx64 "\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_d_d_i(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
double s[VECTLENDP]; \
int t[VECTLENDP*2]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
#define func_d_d_i(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u, v; \
sscanf(buf, funcStr " %" PRIx64 " %" PRIx64, &u, &v); \
double s[VECTLENDP]; \
int t[VECTLENDP*2]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
int idx = xrand() & (VECTLENDP-1); \
s[idx] = u2d(u); \
t[idx] = (int)u2d(v); \
vstoreu_v_p_vd(s, funcName(vloadu_vd_p(s), vloadu_vi_p(t))); \
u = d2u(s[idx]); \
printf("%" PRIx64 "\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
s[idx] = u2d(u); \
t[idx] = (int)u2d(v); \
vstoreu_v_p_vd(s, funcName(vloadu_vd_p(s), vloadu_vi_p(t))); \
u = d2u(s[idx]); \
printf("%" PRIx64 "\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_i_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
int i; \
sscanf(buf, funcStr " %" PRIx64, &u); \
double s[VECTLENDP]; \
int t[VECTLENDP*2]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
#define func_i_d(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint64_t u; \
int i; \
sscanf(buf, funcStr " %" PRIx64, &u); \
double s[VECTLENDP]; \
int t[VECTLENDP*2]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
int idx = xrand() & (VECTLENDP-1); \
s[idx] = u2d(u); \
vdouble a = vloadu_vd_p(s); \
vint vi = funcName(a); \
vstoreu_v_p_vi(t, vi); \
i = t[idx]; \
s[idx] = u2d(u); \
vdouble a = vloadu_vd_p(s); \
vint vi = funcName(a); \
vstoreu_v_p_vi(t, vi); \
i = t[idx]; \
printf("%d\n", i); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
//
#define func_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
float s[VECTLENSP]; \
memrand(s, sizeof(s)); \
#define func_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
float s[VECTLENSP]; \
memrand(s, sizeof(s)); \
int idx = xrand() & (VECTLENSP-1); \
s[idx] = u2f(u); \
s[idx] = u2f(u); \
vfloat a = vloadu_vf_p(s); \
a = funcName(a); \
vstoreu_v_p_vf(s, a); \
u = f2u(s[idx]); \
a = funcName(a); \
vstoreu_v_p_vf(s, a); \
u = f2u(s[idx]); \
printf("%x\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_f2_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
float s[VECTLENSP], t[VECTLENSP]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
#define func_f2_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u; \
sscanf(buf, funcStr " %x", &u); \
float s[VECTLENSP], t[VECTLENSP]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
int idx = xrand() & (VECTLENSP-1); \
s[idx] = u2f(u); \
s[idx] = u2f(u); \
vfloat2 v; \
vfloat a = vloadu_vf_p(s); \
v = funcName(a); \
vstoreu_v_p_vf(s, vf2getx_vf_vf2(v)); \
vstoreu_v_p_vf(t, vf2gety_vf_vf2(v)); \
Sleef_float2 d2; \
d2.x = s[idx]; \
d2.y = t[idx]; \
printf("%x %x\n", f2u(d2.x), f2u(d2.y)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
v = funcName(a); \
vstoreu_v_p_vf(s, vf2getx_vf_vf2(v)); \
vstoreu_v_p_vf(t, vf2gety_vf_vf2(v)); \
Sleef_float2 d2; \
d2.x = s[idx]; \
d2.y = t[idx]; \
printf("%x %x\n", f2u(d2.x), f2u(d2.y)); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
#define func_f_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u, v; \
sscanf(buf, funcStr " %x %x", &u, &v); \
float s[VECTLENSP], t[VECTLENSP]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
#define func_f_f_f(funcStr, funcName) { \
while (startsWith(buf, funcStr " ")) { \
uint32_t u, v; \
sscanf(buf, funcStr " %x %x", &u, &v); \
float s[VECTLENSP], t[VECTLENSP]; \
memrand(s, sizeof(s)); \
memrand(t, sizeof(t)); \
int idx = xrand() & (VECTLENSP-1); \
s[idx] = u2f(u); \
t[idx] = u2f(v); \
vfloat a, b; \
a = vloadu_vf_p(s); \
b = vloadu_vf_p(t); \
a = funcName(a, b); \
vstoreu_v_p_vf(s, a); \
u = f2u(s[idx]); \
s[idx] = u2f(u); \
t[idx] = u2f(v); \
vfloat a, b; \
a = vloadu_vf_p(s); \
b = vloadu_vf_p(t); \
a = funcName(a, b); \
vstoreu_v_p_vf(s, a); \
u = f2u(s[idx]); \
printf("%x\n", u); \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
fflush(stdout); \
if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \
} \
}
//

View File

@ -1,4 +1,4 @@
// Copyright Naoki Shibata and contributors 2010 - 2021.
// Copyright Naoki Shibata and contributors 2010 - 2025.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
@ -89,37 +89,37 @@ void startChild(const char *path, char *const argv[]) {
//
#define child_d_d(funcStr, arg) do { \
char str[256]; \
uint64_t u; \
sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \
write(ptoc[1], str, strlen(str)); \
#define child_d_d(funcStr, arg) do { \
char str[256]; \
uint64_t u; \
sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \
write(ptoc[1], str, strlen(str)); \
if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \
sscanf(str, "%" PRIx64, &u); \
return u2d(u); \
return u2d(u); \
} while(0)
#define child_d2_d(funcStr, arg) do { \
char str[256]; \
uint64_t u, v; \
sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \
write(ptoc[1], str, strlen(str)); \
#define child_d2_d(funcStr, arg) do { \
char str[256]; \
uint64_t u, v; \
sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \
write(ptoc[1], str, strlen(str)); \
if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \
sscanf(str, "%" PRIx64 " %" PRIx64, &u, &v); \
Sleef_double2 ret; \
ret.x = u2d(u); \
ret.y = u2d(v); \
return ret; \
Sleef_double2 ret; \
ret.x = u2d(u); \
ret.y = u2d(v); \
return ret; \
} while(0)
#define child_d_d_d(funcStr, arg1, arg2) do { \
char str[256]; \
uint64_t u; \
#define child_d_d_d(funcStr, arg1, arg2) do { \
char str[256]; \
uint64_t u; \
sprintf(str, funcStr " %" PRIx64 " %" PRIx64 "\n", d2u(arg1), d2u(arg2)); \
write(ptoc[1], str, strlen(str)); \
write(ptoc[1], str, strlen(str)); \
if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \
sscanf(str, "%" PRIx64, &u); \
return u2d(u); \
return u2d(u); \
} while(0)
double child_sin(double x) { child_d_d("sin", x); }
@ -224,37 +224,37 @@ int child_ilogb(double x) {
//
#define child_f_f(funcStr, arg) do { \
char str[256]; \
uint32_t u; \
sprintf(str, funcStr " %x\n", f2u(arg)); \
write(ptoc[1], str, strlen(str)); \
#define child_f_f(funcStr, arg) do { \
char str[256]; \
uint32_t u; \
sprintf(str, funcStr " %x\n", f2u(arg)); \
write(ptoc[1], str, strlen(str)); \
if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \
sscanf(str, "%x", &u); \
return u2f(u); \
sscanf(str, "%x", &u); \
return u2f(u); \
} while(0)
#define child_f2_f(funcStr, arg) do { \
char str[256]; \
uint32_t u, v; \
sprintf(str, funcStr " %x\n", f2u(arg)); \
write(ptoc[1], str, strlen(str)); \
#define child_f2_f(funcStr, arg) do { \
char str[256]; \
uint32_t u, v; \
sprintf(str, funcStr " %x\n", f2u(arg)); \
write(ptoc[1], str, strlen(str)); \
if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \
sscanf(str, "%x %x", &u, &v); \
Sleef_float2 ret; \
ret.x = u2f(u); \
ret.y = u2f(v); \
return ret; \
sscanf(str, "%x %x", &u, &v); \
Sleef_float2 ret; \
ret.x = u2f(u); \
ret.y = u2f(v); \
return ret; \
} while(0)
#define child_f_f_f(funcStr, arg1, arg2) do { \
char str[256]; \
uint32_t u; \
sprintf(str, funcStr " %x %x\n", f2u(arg1), f2u(arg2)); \
write(ptoc[1], str, strlen(str)); \
#define child_f_f_f(funcStr, arg1, arg2) do { \
char str[256]; \
uint32_t u; \
sprintf(str, funcStr " %x %x\n", f2u(arg1), f2u(arg2)); \
write(ptoc[1], str, strlen(str)); \
if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \
sscanf(str, "%x", &u); \
return u2f(u); \
sscanf(str, "%x", &u); \
return u2f(u); \
} while(0)
float child_sinf(float x) { child_f_f("sinf", x); }
@ -1142,62 +1142,62 @@ void do_test() {
//
#define cmpDenorm_f(mpfrFunc, childFunc, argx) do { \
#define cmpDenorm_f(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
if (!cmpDenormsp(childFunc((float)flushToZero(argx)), frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
mpfrFunc(frc, frx, GMP_RNDN); \
if (!cmpDenormsp(childFunc((float)flushToZero(argx)), frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
(float)flushToZero(argx), childFunc((float)flushToZero(argx)), flushToZero(mpfr_get_d(frc, GMP_RNDN))); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenormNR_f(mpfrFunc, childFunc, argx) do { \
#define cmpDenormNR_f(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx); \
if (!cmpDenormsp(childFunc((float)flushToZero(argx)), frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
mpfrFunc(frc, frx); \
if (!cmpDenormsp(childFunc((float)flushToZero(argx)), frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
(float)flushToZero(argx), childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenorm_f_f(mpfrFunc, childFunc, argx, argy) do { \
#define cmpDenorm_f_f(mpfrFunc, childFunc, argx, argy) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfr_set_d(fry, (float)flushToZero(argy), GMP_RNDN); \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
if (!cmpDenormsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc)) { \
fprintf(stderr, "arg = %.20g, %.20g, test = %.20g, correct = %.20g\n", \
(float)flushToZero(argx), (float)flushToZero(argy), childFunc((float)flushToZero(argx), (float)flushToZero(argy)), mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenormX_f(mpfrFunc, childFunc, argx) do { \
#define cmpDenormX_f(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
if (!cmpDenormsp(d2.x, frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
if (!cmpDenormsp(d2.x, frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
(float)flushToZero(argx), d2.x, mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenormY_f(mpfrFunc, childFunc, argx) do { \
#define cmpDenormY_f(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
if (!cmpDenormsp(d2.y, frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
if (!cmpDenormsp(d2.y, frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", \
(float)flushToZero(argx), d2.y, mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
//
@ -2157,57 +2157,57 @@ void do_test() {
//
#define cmpDenorm_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
if (!cmpDenormdp(childFunc(argx), frc)) { \
#define cmpDenorm_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
if (!cmpDenormdp(childFunc(argx), frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenormNR_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx); \
if (!cmpDenormdp(childFunc(argx), frc)) { \
#define cmpDenormNR_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx); \
if (!cmpDenormdp(childFunc(argx), frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenorm_d_d(mpfrFunc, childFunc, argx, argy) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfr_set_d(fry, argy, GMP_RNDN); \
#define cmpDenorm_d_d(mpfrFunc, childFunc, argx, argy) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfr_set_d(fry, argy, GMP_RNDN); \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
if (!cmpDenormdp(childFunc(argx, argy), frc)) { \
if (!cmpDenormdp(childFunc(argx, argy), frc)) { \
fprintf(stderr, "arg = %.20g, %.20g, test = %.20g, correct = %.20g\n", argx, argy, childFunc(argx, argy), mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenormX_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
if (!cmpDenormdp(d2.x, frc)) { \
#define cmpDenormX_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
if (!cmpDenormdp(d2.x, frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", argx, d2.x, mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define cmpDenormY_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
if (!cmpDenormdp(d2.y, frc)) { \
#define cmpDenormY_d(mpfrFunc, childFunc, argx) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
if (!cmpDenormdp(d2.y, frc)) { \
fprintf(stderr, "arg = %.20g, test = %.20g, correct = %.20g\n", argx, d2.y, mpfr_get_d(frc, GMP_RNDN)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
//
@ -3435,58 +3435,58 @@ void do_test() {
//
#define checkAccuracy_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
if (countULPdp(childFunc(argx), frc) > bound) { \
#define checkAccuracy_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
if (countULPdp(childFunc(argx), frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx), frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracyNR_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx); \
if (countULPdp(childFunc(argx), frc) > bound) { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx); \
if (countULPdp(childFunc(argx), frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx), frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracy_d_d(mpfrFunc, childFunc, argx, argy, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfr_set_d(fry, argy, GMP_RNDN); \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
if (countULPdp(childFunc(argx, argy), frc) > bound) { \
#define checkAccuracy_d_d(mpfrFunc, childFunc, argx, argy, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfr_set_d(fry, argy, GMP_RNDN); \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
if (countULPdp(childFunc(argx, argy), frc) > bound) { \
fprintf(stderr, "\narg = %.20g, %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \
argx, argy, childFunc(argx, argy), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx, argy), frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracyX_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
#define checkAccuracyX_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
if (countULPdp(d2.x, frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, d2.x, mpfr_get_d(frc, GMP_RNDN), countULPdp(d2.x, frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracyY_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
#define checkAccuracyY_d(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, argx, GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_double2 d2 = childFunc(argx); \
if (countULPdp(d2.y, frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, d2.y, mpfr_get_d(frc, GMP_RNDN), countULPdp(d2.y, frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
//
@ -3903,6 +3903,8 @@ void do_test() {
fprintf(stderr, "exp : ");
for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0);
for(d = -1000;d < 1000 && success;d += 1.1) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0);
// Test for early or late overflow, e.g before or after x = LOG_DBL_MAX
for(d = LOG_DBL_MAX - 0.0001;(d < LOG_DBL_MAX + 0.0001) && success;d += 0.00001) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0);
showResult(success);
//
@ -3914,6 +3916,8 @@ void do_test() {
}
}
for(y = -1000;y < 1000 && success;y += 0.1) checkAccuracy_d_d(mpfr_pow, child_pow, 2.1, y, 1.0);
// Test for early or late overflow (test limited to x = e)
for(d = LOG_DBL_MAX - 0.0001;(d < LOG_DBL_MAX + 0.0001) && success;d += 0.00001) checkAccuracy_d_d(mpfr_pow, child_pow, exp(1.0), d, 1.0);
showResult(success);
//
@ -4141,6 +4145,7 @@ void do_test() {
fprintf(stderr, "log1p : ");
for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0);
for(d = 1.0e+307;d < DBL_MAX && success;d += 1.0e+306) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0);
showResult(success);
//
@ -4222,73 +4227,73 @@ void do_test() {
//
#define checkAccuracy_f(mpfrFunc, childFunc, argx, bound) do { \
#define checkAccuracy_f(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \
mpfrFunc(frc, frx, GMP_RNDN); \
if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \
(float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracyNR_f(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx); \
if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \
mpfrFunc(frc, frx); \
if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \
(float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracy_f_f(mpfrFunc, childFunc, argx, argy, bound) do { \
#define checkAccuracy_f_f(mpfrFunc, childFunc, argx, argy, bound) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfr_set_d(fry, (float)flushToZero(argy), GMP_RNDN); \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
if (countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc) > bound) { \
mpfrFunc(frc, frx, fry, GMP_RNDN); \
if (countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc) > bound) { \
fprintf(stderr, "\narg = %.20g, %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \
(float)flushToZero(argx), (float)flushToZero(argy), childFunc((float)flushToZero(argx), (float)flushToZero(argy)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracyX_f(mpfrFunc, childFunc, argx, bound) do { \
#define checkAccuracyX_f(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
if (countULPsp(d2.x, frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", (float)flushToZero(argx), (double)d2.x, mpfr_get_d(frc, GMP_RNDN), countULPsp(d2.x, frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracyY_f(mpfrFunc, childFunc, argx, bound) do { \
#define checkAccuracyY_f(mpfrFunc, childFunc, argx, bound) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
mpfrFunc(frc, frx, GMP_RNDN); \
Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \
if (countULPsp(d2.y, frc) > bound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", (float)flushToZero(argx), (double)d2.y, mpfr_get_d(frc, GMP_RNDN), countULPsp(d2.y, frc)); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
#define checkAccuracy2_f(mpfrFunc, childFunc, argx, bound, abound) do { \
#define checkAccuracy2_f(mpfrFunc, childFunc, argx, bound, abound) do { \
mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \
mpfrFunc(frc, frx, GMP_RNDN); \
double t = childFunc((float)flushToZero(argx)); \
double ae = fabs(mpfr_get_d(frc, GMP_RNDN) - t); \
if (countULPsp(t, frc) > bound && ae > abound) { \
mpfrFunc(frc, frx, GMP_RNDN); \
double t = childFunc((float)flushToZero(argx)); \
double ae = fabs(mpfr_get_d(frc, GMP_RNDN) - t); \
if (countULPsp(t, frc) > bound && ae > abound) { \
fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf, abserror = %g\n", \
(float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc), ae); \
success = 0; \
break; \
} \
success = 0; \
break; \
} \
} while(0)
//
@ -4825,6 +4830,8 @@ void do_test() {
fprintf(stderr, "atanf : ");
for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5);
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5);
checkAccuracy_f(mpfr_atan, child_atanf, +INFINITY, 3.5);
checkAccuracy_f(mpfr_atan, child_atanf, -INFINITY, 3.5);
showResult(success);
//
@ -5012,6 +5019,7 @@ void do_test() {
fprintf(stderr, "log1pf : ");
for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0);
for(d = 1.0e+38;d < FLT_MAX && success;d += 1.0e+37) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0);
showResult(success);
//

Some files were not shown because too many files have changed in this diff Show More