mirror of
https://github.com/openjdk/jdk.git
synced 2026-07-02 15:20:27 +00:00
8355216: Accelerate P-256 arithmetic on aarch64
Reviewed-by: adinn, aph
This commit is contained in:
parent
05cd2d948c
commit
f1cd7f6ab9
@ -3151,6 +3151,34 @@ public:
|
||||
_pmull(Vd, Ta, Vn, Vm, Tb);
|
||||
}
|
||||
|
||||
//Vector by element variant of UMULL
|
||||
void _umullv(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,
|
||||
SIMD_Arrangement Tb, FloatRegister Vm, SIMD_RegVariant Ts, int lane) {
|
||||
starti;
|
||||
int size = (Ta == T4S) ? 0b01 : 0b10;
|
||||
int q = (Tb == T4H || Tb == T2S) ? 0 : 1;
|
||||
int h = (size == 0b01) ? ((lane >> 2) & 1) : ((lane >> 1) & 1);
|
||||
int l = (size == 0b01) ? ((lane >> 1) & 1) : (lane & 1);
|
||||
assert(Ta == T4S || Ta == T2D, "umull{2}v destination register must have arrangement T4S or T2D");
|
||||
assert(size == 0b10 ? lane < 4 : lane < 8, "umull{2}v assumes lane < 4 when using half-words and lane < 8 otherwise");
|
||||
assert(Ts == H ? Vm->encoding() < 16 : Vm->encoding() < 32, "umull{2}v requires Vm to be in range V0..V15 when Ts is H");
|
||||
f(0, 31), f(q, 30), f(0b101111, 29, 24), f(size, 23, 22), f(l, 21); //f(m, 20);
|
||||
rf(Vm, 16), f(0b1010, 15, 12), f(h, 11), f(0, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
//Vector by element variant of UMULL
|
||||
void umullv(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,
|
||||
SIMD_Arrangement Tb, FloatRegister Vm, SIMD_RegVariant Ts, int lane) {
|
||||
assert(Ta == T4S ? (Tb == T4H && Ts == H) : (Tb == T2S && Ts == S), "umullv register arrangements must adhere to spec");
|
||||
_umullv(Vd, Ta, Vn, Tb, Vm, Ts, lane);
|
||||
}
|
||||
|
||||
void umull2v(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,
|
||||
SIMD_Arrangement Tb, FloatRegister Vm, SIMD_RegVariant Ts, int lane) {
|
||||
assert(Ta == T4S ? (Tb == T8H && Ts == H) : (Tb == T4S && Ts == S), "umull2v register arrangements must adhere to spec");
|
||||
_umullv(Vd, Ta, Vn, Tb, Vm, Ts, lane);
|
||||
}
|
||||
|
||||
void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
|
||||
starti;
|
||||
int size_b = (int)Tb >> 1;
|
||||
|
||||
@ -535,6 +535,17 @@ VSeq<N/2> vs_odd(const VSeq<N>& v) {
|
||||
return VSeq<N/2>(v.base() + v.delta(), v.delta() * 2);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
FloatRegister vs_head(const VSeq<N>& v) {
|
||||
static_assert(N > 1, "sequence length must be greater than 1");
|
||||
return v.base();
|
||||
}
|
||||
|
||||
template<int N>
|
||||
VSeq<N-1> vs_tail(const VSeq<N>& v) {
|
||||
return VSeq<N-1>(v.base() + v.delta(), v.delta());
|
||||
}
|
||||
|
||||
// convenience method to construct a vector register sequence that
|
||||
// indexes its elements in reverse order to the original
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@
|
||||
do_arch_entry, \
|
||||
do_arch_entry_init, \
|
||||
do_arch_entry_array) \
|
||||
do_arch_blob(compiler, 70000) \
|
||||
do_arch_blob(compiler, 75000) \
|
||||
do_stub(compiler, vector_iota_indices) \
|
||||
do_arch_entry_array(aarch64, compiler, vector_iota_indices, \
|
||||
vector_iota_indices, vector_iota_indices, \
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -454,6 +454,10 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
|
||||
UseIntPolyIntrinsics = true;
|
||||
}
|
||||
|
||||
if (supports_feature(CPU_ASIMD)) {
|
||||
if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
|
||||
UseKyberIntrinsics = true;
|
||||
|
||||
@ -299,6 +299,7 @@ public:
|
||||
do_var(bool, UseSHA256Intrinsics) \
|
||||
do_var(bool, UseSHA3Intrinsics) \
|
||||
do_var(bool, UseSHA512Intrinsics) \
|
||||
do_var(bool, UseIntPolyIntrinsics) \
|
||||
do_var(bool, UseVectorizedMismatchIntrinsic) \
|
||||
do_fun(int, CompressedKlassPointers_shift, CompressedKlassPointers::shift()) \
|
||||
do_fun(bool, JavaAssertions_systemClassDefault, JavaAssertions::systemClassDefault()) \
|
||||
@ -342,7 +343,6 @@ public:
|
||||
do_var(int, AVX3Threshold) /* array copy stubs and nmethods */ \
|
||||
do_var(bool, EnableX86ECoreOpts) /* nmethods */ \
|
||||
do_var(bool, UseLibmIntrinsic) \
|
||||
do_var(bool, UseIntPolyIntrinsics) \
|
||||
// END
|
||||
#else
|
||||
#define AOTCODECACHE_CONFIGS_X86_DO(do_var, do_fun)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user