From 0dd26b312ebe2af1b2d1639fc6355022834896eb Mon Sep 17 00:00:00 2001 From: Shawn Emery Date: Fri, 5 Jun 2026 16:20:42 +0000 Subject: [PATCH] 8371305: X25519 should utilize x86 intrinsics Reviewed-by: vpaprotski, ascarpino --- src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 5 + src/hotspot/cpu/x86/stubGenerator_x86_64.hpp | 3 + .../x86/stubGenerator_x86_64_poly25519.cpp | 306 ++++++++++++++++++ .../x86/stubGenerator_x86_64_poly_mont.cpp | 4 +- src/hotspot/cpu/x86/vm_version_x86.cpp | 4 + src/hotspot/share/classfile/vmIntrinsics.cpp | 6 +- src/hotspot/share/classfile/vmIntrinsics.hpp | 7 + src/hotspot/share/opto/c2compiler.cpp | 2 + src/hotspot/share/opto/escape.cpp | 2 + src/hotspot/share/opto/library_call.cpp | 68 ++++ src/hotspot/share/opto/library_call.hpp | 2 + src/hotspot/share/opto/runtime.cpp | 41 ++- src/hotspot/share/opto/runtime.hpp | 16 +- src/hotspot/share/runtime/globals.hpp | 6 +- .../share/runtime/stubDeclarations.hpp | 8 +- .../math/intpoly/IntegerPolynomial25519.java | 3 + 16 files changed, 476 insertions(+), 7 deletions(-) create mode 100644 src/hotspot/cpu/x86/stubGenerator_x86_64_poly25519.cpp diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index b64943fc4de..afd9c126a21 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -4904,6 +4904,11 @@ void StubGenerator::generate_compiler_stubs() { StubRoutines::_intpoly_assign = generate_intpoly_assign(); } + if (UseIntPoly25519Intrinsics) { + StubRoutines::_intpoly_mult_25519 = generate_intpoly_mult_25519(); + StubRoutines::_intpoly_square_25519 = generate_intpoly_square_25519(); + } + if (UseMD5Intrinsics) { StubRoutines::_md5_implCompress = generate_md5_implCompress(StubId::stubgen_md5_implCompress_id); StubRoutines::_md5_implCompressMB = generate_md5_implCompress(StubId::stubgen_md5_implCompressMB_id); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 360b0329d95..6e3da334f11 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -496,6 +496,9 @@ class StubGenerator: public StubCodeGenerator { address generate_intpoly_montgomeryMult_P256(); address generate_intpoly_assign(); + address generate_intpoly_mult_25519(); + address generate_intpoly_square_25519(); + // SHA3 stubs void generate_sha3_stubs(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly25519.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly25519.cpp new file mode 100644 index 00000000000..c7395220d49 --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly25519.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +#define __ _masm-> + +const int32_t term = 19; +const int32_t limbs = 5; +const int32_t bpl = 51; +const int32_t rem = 64 - bpl; +const uint64_t MASK = 0x7FFFFFFFFFFFF; +const uint64_t CARRY_ADD = 0x4000000000000; + +// Multiplication operation for polynomial arithmetic in Curve25519. +// +// This is the same algorithm as used in Java, except we use pseudo-Mersenne +// reduction to reduce register pressure instead of using the full 10 columns +// in Java. +void multiply_25519_scalar(const Register aLimbs, const Register bLimbs, const Register rLimbs, Register c[], Register bArg, Register d, Register b, Register mask, MacroAssembler* _masm) { + + for (int i = 0; i < limbs; i++) { + __ xorq(c[i], c[i]); + } + __ mov64(mask, MASK); + __ movq(bArg, bLimbs); + + // Perform high/low multiplication with signed 5x51 bit limbs + for (int i = 0; i < limbs; i++) { + __ movq(b, Address(bArg, i * 8)); + for (int j = 0; j < limbs; j++) { + __ movq(rax, Address(aLimbs, j * 8)); + __ imulq(b); // rdx:rax = a * b + __ movq(d, rax); + __ andq(d, mask); + __ shrq(rax, bpl); + __ shlq(rdx, rem); + __ orq(rax, rdx); + // Fold in pseudo-Mersenne reduction + if ((i + j + 1) >= limbs) { + __ imulq(rax, rax, term); + } + if ((i + j) >= limbs) { + __ imulq(d, d, term); + } + __ addq(c[(i + j) % limbs], d); + __ addq(c[(i + j + 1) % limbs], rax); + } + } + + // Carry-add with reduction from high limb + Register carry = bArg; + __ mov64(mask, CARRY_ADD); + __ movq(carry, mask); + + // Limb 3 + __ addq(carry, c[3]); + __ sarq(carry, bpl); + __ addq(c[4], carry); + __ shlq(carry, bpl); + __ subq(c[3], carry); + + // Limb 4 + __ movq(carry, mask); + __ addq(carry, c[4]); + __ sarq(carry, bpl); + + // Reduce high order limb and fold back into low order limb + __ mov64(rax, term); + __ imulq(carry); + __ addq(c[0], rax); + + __ shlq(carry, bpl); + __ subq(c[4], carry); + + // Limbs 0 - 3 + for (int i = 0; i < (limbs - 1); i++) { + __ movq(carry, mask); + __ addq(carry, c[i]); + __ sarq(carry, bpl); + __ addq(c[i + 1], carry); + __ shlq(carry, bpl); + __ subq(c[i], carry); + } + + __ pop_ppx(rdx); + + for (int i = 0; i < limbs; i++) { + __ movq(Address(rLimbs, i * 8), c[i]); + } +} + +// Squaring operation for polynomial arithmetic in Curve25519. +// +// This is the same algorithm as used in Java, except we use pseudo-Mersenne +// reduction to reduce register pressure instead of using the full 10 columns +// in Java. +void square_25519_scalar(const Register aLimbs, const Register rLimbs, Register c[], Register aArg, Register d, Register carry, Register mask, MacroAssembler* _masm) { + + for (int i = 0; i < limbs; i++) { + __ xorq(c[i], c[i]); + } + __ mov64(mask, MASK); + + // Perform high/low multiplication with signed 5x51 bit limbs + for (int i = 0; i < limbs; i++) { + __ movq(aArg, Address(aLimbs, i * 8)); + __ movq(rax, aArg); + __ imulq(aArg); // rdx:rax = a[j] * a[i] + __ movq(d, rax); + __ andq(d, mask); + __ shrq(rax, bpl); + __ shlq(rdx, rem); + __ orq(rax, rdx); // rax = dd + if ((i * 2 + 1) >= limbs) { + __ imulq(rax, rax, term); + } + if ((i * 2) >= limbs) { + __ imulq(d, d, term); + } + __ addq(c[(i * 2) % limbs], d); + __ addq(c[(i * 2 + 1) % limbs], rax); + for (int j = i + 1; j < limbs; j++) { + __ movq(rax, Address(aLimbs, j * 8)); + __ imulq(aArg); // rdx:rax = a * a + __ movq(d, rax); + __ andq(d, mask); + __ shlq(d, 1); + __ shrq(rax, bpl); + __ shlq(rdx, rem); + __ orq(rax, rdx); // rax = dd + __ shlq(rax, 1); + if ((j + i + 1) >= limbs) { + __ imulq(rax, rax, term); + } + if ((j + i) >= limbs) { + __ imulq(d, d, term); + } + __ addq(c[(i + j) % limbs], d); + __ addq(c[(i + j + 1) % limbs], rax); + } + } + + // Carry-add with reduction from high limb + // Limb 3 + __ mov64(mask, CARRY_ADD); + __ movq(carry, mask); + __ addq(carry, c[3]); + __ sarq(carry, bpl); + __ addq(c[4], carry); + __ shlq(carry, bpl); + __ subq(c[3], carry); + + // Limb 4 + __ movq(carry, mask); + __ addq(carry, c[4]); + __ sarq(carry, bpl); + + // Reduce high order limb and fold back into low order limb + __ mov64(rax, term); + __ imulq(carry); + __ addq(c[0], rax); + + __ shlq(carry, bpl); + __ subq(c[4], carry); + + // Limbs 0 - 3 + for (int i = 0; i < (limbs - 1); i++) { + __ movq(carry, mask); + __ addq(carry, c[i]); + __ sarq(carry, bpl); + __ addq(c[i + 1], carry); + __ shlq(carry, bpl); + __ subq(c[i], carry); + } + + __ pop_ppx(rdx); + + for (int i = 0; i < limbs; i++) { + __ movq(Address(rLimbs, i * 8), c[i]); + } +} + +address StubGenerator::generate_intpoly_mult_25519() { + StubId stub_id = StubId::stubgen_intpoly_mult_25519_id; + int entry_count = StubInfo::entry_count(stub_id); + assert(entry_count == 1, "sanity check"); + address start = load_archive_data(stub_id); + if (start != nullptr) { + return start; + } + __ align(CodeEntryAlignment); + StubCodeMark mark(this, stub_id); + start = __ pc(); + __ enter(); + + // Register Map + const Register aLimbs = c_rarg0; // rdi | rcx + const Register bLimbs = c_rarg1; // rsi | rdx + const Register rLimbs = c_rarg2; // rdx | r8 + + Register c[] = {r9, r10, r11, r12, r13}; + Register bArg = r14; + Register d = r15; + Register b = rbp; + Register mask = rbx; + + __ push_ppx(rbp); + __ push_ppx(rbx); + __ push_ppx(r12); + __ push_ppx(r13); + __ push_ppx(r14); + __ push_ppx(r15); + __ push_ppx(rdx); + + multiply_25519_scalar(aLimbs, bLimbs, rLimbs, c, bArg, d, b, mask, _masm); + + // __ pop_ppx(rdx); // restored in the helper already + __ pop_ppx(r15); + __ pop_ppx(r14); + __ pop_ppx(r13); + __ pop_ppx(r12); + __ pop_ppx(rbx); + __ pop_ppx(rbp); + + __ leave(); + __ ret(0); + + // Record the stub entry and end + store_archive_data(stub_id, start, __ pc()); + + return start; +} + +address StubGenerator::generate_intpoly_square_25519() { + StubId stub_id = StubId::stubgen_intpoly_square_25519_id; + int entry_count = StubInfo::entry_count(stub_id); + assert(entry_count == 1, "sanity check"); + address start = load_archive_data(stub_id); + if (start != nullptr) { + return start; + } + __ align(CodeEntryAlignment); + StubCodeMark mark(this, stub_id); + start = __ pc(); + __ enter(); + + // Register Map + const Register aLimbs = c_rarg0; // rdi | rcx + const Register rLimbs = c_rarg1; // rsi | rdx + Register c[] = {r9, r10, r11, r12, r13}; + Register aArg = r14; + Register d = r15; + Register carry = rbp; + Register mask = rbx; + + __ push_ppx(rbp); + __ push_ppx(rbx); + __ push_ppx(r12); + __ push_ppx(r13); + __ push_ppx(r14); + __ push_ppx(r15); + __ push_ppx(rdx); + + square_25519_scalar(aLimbs, rLimbs, c, aArg, d, carry, mask, _masm); + + // __ pop_ppx(rdx); // restored in the helper already + __ pop_ppx(r15); + __ pop_ppx(r14); + __ pop_ppx(r13); + __ pop_ppx(r12); + __ pop_ppx(rbx); + __ pop_ppx(rbp); + + __ leave(); + __ ret(0); + + // Record the stub entry and end + store_archive_data(stub_id, start, __ pc()); + + return start; +} +#undef __ diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp index 308a8042993..76b6fa97fa5 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, 2025, Intel Corporation. All rights reserved. + * Copyright (c) 2024, 2026, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -676,7 +676,7 @@ address StubGenerator::generate_intpoly_assign() { // KNOWN Lengths: // MontgomeryIntPolynP256: 5 = 4 + 1 // IntegerPolynomial1305: 5 = 4 + 1 - // IntegerPolynomial25519: 10 = 8 + 2 + // IntegerPolynomial25519: 5 = 4 + 1 // IntegerPolynomialP256: 10 = 8 + 2 // Curve25519OrderField: 10 = 8 + 2 // Curve25519OrderField: 10 = 8 + 2 diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 4cdcb1770bb..2ca1c172542 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1407,6 +1407,10 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); } + if (FLAG_IS_DEFAULT(UseIntPoly25519Intrinsics)) { + UseIntPoly25519Intrinsics = true; + } + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; } diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index cec3586a50b..4a1b9ead116 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -527,6 +527,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) { case vmIntrinsics::_intpoly_assign: if (!UseIntPolyIntrinsics) return true; break; + case vmIntrinsics::_intpoly_mult_25519: + case vmIntrinsics::_intpoly_square_25519: + if (!UseIntPoly25519Intrinsics) return true; + break; case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C: if (!UseCRC32CIntrinsics) return true; diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index de4eea669a1..8833e4167f6 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -549,6 +549,13 @@ class methodHandle; do_name(intPolyAssign_name, "conditionalAssign") \ do_signature(intPolyAssign_signature, "(I[J[J)V") \ \ + /* support for sun.security.util.math.intpoly.IntegerPolynomial25519 */ \ + do_class(sun_security_util_math_intpoly_IntegerPolynomial25519, "sun/security/util/math/intpoly/IntegerPolynomial25519") \ + do_intrinsic(_intpoly_mult_25519, sun_security_util_math_intpoly_IntegerPolynomial25519, intPolyMult_name, intPolyMult_signature, F_R) \ + do_intrinsic(_intpoly_square_25519, sun_security_util_math_intpoly_IntegerPolynomial25519, intPolySquare_name, intPolySquare_signature, F_R) \ + do_name(intPolySquare_name, "square") \ + do_signature(intPolySquare_signature, "([J[J)V") \ + \ /* support for java.util.Base64.Encoder*/ \ do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder") \ do_intrinsic(_base64_encodeBlock, java_util_Base64_Encoder, encodeBlock_name, encodeBlock_signature, F_R) \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 2f48fffcaa2..bf434030499 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -819,6 +819,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_poly1305_processBlocks: case vmIntrinsics::_intpoly_montgomeryMult_P256: case vmIntrinsics::_intpoly_assign: + case vmIntrinsics::_intpoly_mult_25519: + case vmIntrinsics::_intpoly_square_25519: case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp index 49e59c70c47..f561818a99b 100644 --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp @@ -2272,6 +2272,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { strcmp(call->as_CallLeaf()->_name, "poly1305_processBlocks") == 0 || strcmp(call->as_CallLeaf()->_name, "intpoly_montgomeryMult_P256") == 0 || strcmp(call->as_CallLeaf()->_name, "intpoly_assign") == 0 || + strcmp(call->as_CallLeaf()->_name, "intpoly_mult_25519") == 0 || + strcmp(call->as_CallLeaf()->_name, "intpoly_square_25519") == 0 || strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 || strcmp(call->as_CallLeaf()->_name, "chacha20Block") == 0 || strcmp(call->as_CallLeaf()->_name, "kyberNtt") == 0 || diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 7251783d771..adb8ff2dedb 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -666,6 +666,10 @@ bool LibraryCallKit::try_to_inline(int predicate) { return inline_intpoly_montgomeryMult_P256(); case vmIntrinsics::_intpoly_assign: return inline_intpoly_assign(); + case vmIntrinsics::_intpoly_mult_25519: + return inline_intpoly_mult_25519(); + case vmIntrinsics::_intpoly_square_25519: + return inline_intpoly_square_25519(); case vmIntrinsics::_encodeISOArray: case vmIntrinsics::_encodeByteISOArray: return inline_encodeISOArray(false); @@ -8373,6 +8377,70 @@ bool LibraryCallKit::inline_intpoly_assign() { return true; } +bool LibraryCallKit::inline_intpoly_mult_25519() { + address stubAddr; + const char *stubName; + assert(UseIntPoly25519Intrinsics, "need intpoly25519 intrinsics support"); + assert(callee()->signature()->size() == 3, "intpoly_mult_25519 has %d parameters", callee()->signature()->size()); + stubAddr = StubRoutines::intpoly_mult_25519(); + stubName = "intpoly_mult_25519"; + + if (!stubAddr) return false; + null_check_receiver(); // null-check receiver + if (stopped()) return true; + + Node* a = argument(1); + Node* b = argument(2); + Node* r = argument(3); + + a = must_be_not_null(a, true); + b = must_be_not_null(b, true); + r = must_be_not_null(r, true); + + Node* a_start = array_element_address(a, intcon(0), T_LONG); + assert(a_start, "a array is null"); + Node* b_start = array_element_address(b, intcon(0), T_LONG); + assert(b_start, "b array is null"); + Node* r_start = array_element_address(r, intcon(0), T_LONG); + assert(r_start, "r array is null"); + + Node* call = make_runtime_call(RC_LEAF | RC_NO_FP, + OptoRuntime::intpoly_mult_25519_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + a_start, b_start, r_start); + return true; +} + +bool LibraryCallKit::inline_intpoly_square_25519() { + address stubAddr; + const char *stubName; + assert(UseIntPoly25519Intrinsics, "need intpoly25519 intrinsics support"); + assert(callee()->signature()->size() == 2, "intpoly_mult_25519 has %d parameters", callee()->signature()->size()); + stubAddr = StubRoutines::intpoly_square_25519(); + stubName = "intpoly_square_25519"; + + if (!stubAddr) return false; + null_check_receiver(); // null-check receiver + if (stopped()) return true; + + Node* a = argument(1); + Node* r = argument(2); + + a = must_be_not_null(a, true); + r = must_be_not_null(r, true); + + Node* a_start = array_element_address(a, intcon(0), T_LONG); + assert(a_start, "a array is null"); + Node* r_start = array_element_address(r, intcon(0), T_LONG); + assert(r_start, "r array is null"); + + Node* call = make_runtime_call(RC_LEAF | RC_NO_FP, + OptoRuntime::intpoly_square_25519_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + a_start, r_start); + return true; +} + //------------------------------inline_digestBase_implCompress----------------------- // // Calculate MD5 for single-block byte[] array. diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index 5b46ae832a4..871a6b0d072 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -343,6 +343,8 @@ class LibraryCallKit : public GraphKit { bool inline_poly1305_processBlocks(); bool inline_intpoly_montgomeryMult_P256(); bool inline_intpoly_assign(); + bool inline_intpoly_mult_25519(); + bool inline_intpoly_square_25519(); bool inline_digestBase_implCompress(vmIntrinsics::ID id); bool inline_keccak(vmIntrinsics::ID id); bool inline_digestBase_implCompressMB(int predicate); diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp index 1afffcadd6e..7f791082b65 100644 --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -237,6 +237,8 @@ const TypeFunc* OptoRuntime::_string_IndexOf_Type = nullptr; const TypeFunc* OptoRuntime::_poly1305_processBlocks_Type = nullptr; const TypeFunc* OptoRuntime::_intpoly_montgomeryMult_P256_Type = nullptr; const TypeFunc* OptoRuntime::_intpoly_assign_Type = nullptr; +const TypeFunc* OptoRuntime::_intpoly_mult_25519_Type = nullptr; +const TypeFunc* OptoRuntime::_intpoly_square_25519_Type = nullptr; const TypeFunc* OptoRuntime::_updateBytesCRC32_Type = nullptr; const TypeFunc* OptoRuntime::_updateBytesCRC32C_Type = nullptr; const TypeFunc* OptoRuntime::_updateBytesAdler32_Type = nullptr; @@ -1786,6 +1788,41 @@ static const TypeFunc* make_intpoly_assign_Type() { return TypeFunc::make(domain, range); } +static const TypeFunc* make_intpoly_mult_25519_Type() { + int argcnt = 3; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // a array + fields[argp++] = TypePtr::NOTNULL; // b array + fields[argp++] = TypePtr::NOTNULL; // r(esult) array + assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms + 0] = nullptr; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + +static const TypeFunc* make_intpoly_square_25519_Type() { + int argcnt = 2; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // a array + fields[argp++] = TypePtr::NOTNULL; // r(esult) array + assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms + 0] = nullptr; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + //------------- Interpreter state for on stack replacement static const TypeFunc* make_osr_end_Type() { // create input type (domain) @@ -2354,6 +2391,8 @@ void OptoRuntime::initialize_types() { _poly1305_processBlocks_Type = make_poly1305_processBlocks_Type(); _intpoly_montgomeryMult_P256_Type = make_intpoly_montgomeryMult_P256_Type(); _intpoly_assign_Type = make_intpoly_assign_Type(); + _intpoly_mult_25519_Type = make_intpoly_mult_25519_Type(); + _intpoly_square_25519_Type = make_intpoly_square_25519_Type(); _updateBytesCRC32_Type = make_updateBytesCRC32_Type(); _updateBytesCRC32C_Type = make_updateBytesCRC32C_Type(); _updateBytesAdler32_Type = make_updateBytesAdler32_Type(); diff --git a/src/hotspot/share/opto/runtime.hpp b/src/hotspot/share/opto/runtime.hpp index af8a206e10c..5802bf59ae5 100644 --- a/src/hotspot/share/opto/runtime.hpp +++ b/src/hotspot/share/opto/runtime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -190,6 +190,8 @@ class OptoRuntime : public AllStatic { static const TypeFunc* _poly1305_processBlocks_Type; static const TypeFunc* _intpoly_montgomeryMult_P256_Type; static const TypeFunc* _intpoly_assign_Type; + static const TypeFunc* _intpoly_mult_25519_Type; + static const TypeFunc* _intpoly_square_25519_Type; static const TypeFunc* _updateBytesCRC32_Type; static const TypeFunc* _updateBytesCRC32C_Type; static const TypeFunc* _updateBytesAdler32_Type; @@ -687,6 +689,18 @@ private: return _intpoly_assign_Type; } + // IntegerPolynomial25519 multiply function + static inline const TypeFunc* intpoly_mult_25519_Type() { + assert(_intpoly_mult_25519_Type != nullptr, "should be initialized"); + return _intpoly_mult_25519_Type; + } + + // IntegerPolynomial25519 square function + static inline const TypeFunc* intpoly_square_25519_Type() { + assert(_intpoly_square_25519_Type != nullptr, "should be initialized"); + return _intpoly_square_25519_Type; + } + /** * int updateBytesCRC32(int crc, byte* b, int len) */ diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index f90a644eaa4..ec34305f837 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -229,9 +229,13 @@ const int ObjectAlignmentInBytes = 8; \ product(bool, UsePoly1305Intrinsics, false, DIAGNOSTIC, \ "Use intrinsics for sun.security.util.math.intpoly") \ - product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC, \ + \ + product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC, \ "Use intrinsics for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256") \ \ + product(bool, UseIntPoly25519Intrinsics, false, DIAGNOSTIC, \ + "Use intrinsics for sun.security.util.math.intpoly.IntegerPolynomial25519") \ + \ product(size_t, LargePageSizeInBytes, 0, \ "Maximum large page size used (0 will use the default large " \ "page size for the environment as the maximum) " \ diff --git a/src/hotspot/share/runtime/stubDeclarations.hpp b/src/hotspot/share/runtime/stubDeclarations.hpp index bef6a0c27f0..5c3567eb0c0 100644 --- a/src/hotspot/share/runtime/stubDeclarations.hpp +++ b/src/hotspot/share/runtime/stubDeclarations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2025, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -801,6 +801,12 @@ intpoly_montgomeryMult_P256, intpoly_montgomeryMult_P256) \ do_stub(compiler, intpoly_assign) \ do_entry(compiler, intpoly_assign, intpoly_assign, intpoly_assign) \ + do_stub(compiler, intpoly_mult_25519) \ + do_entry(compiler, intpoly_mult_25519, \ + intpoly_mult_25519, intpoly_mult_25519) \ + do_stub(compiler, intpoly_square_25519) \ + do_entry(compiler, intpoly_square_25519, \ + intpoly_square_25519, intpoly_square_25519) \ do_stub(compiler, md5_implCompress) \ do_entry(compiler, md5_implCompress, md5_implCompress, \ md5_implCompress) \ diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial25519.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial25519.java index c8f23da417e..b7b1ddae0e0 100644 --- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial25519.java +++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial25519.java @@ -26,6 +26,7 @@ package sun.security.util.math.intpoly; import java.math.BigInteger; +import jdk.internal.vm.annotation.IntrinsicCandidate; public final class IntegerPolynomial25519 extends IntegerPolynomial { private static final int BITS_PER_LIMB = 51; @@ -235,6 +236,7 @@ public final class IntegerPolynomial25519 extends IntegerPolynomial { * @param b [in] the limb operand to multiply. * @param r [out] the product of the limbs operands that is fully reduced. */ + @IntrinsicCandidate protected void mult(long[] a, long[] b, long[] r) { long aa0 = a[0]; long aa1 = a[1]; @@ -414,6 +416,7 @@ public final class IntegerPolynomial25519 extends IntegerPolynomial { * @param a [in] the limb operand to square. * @param r [out] the resulting square of the limb which is fully reduced. */ + @IntrinsicCandidate protected void square(long[] a, long[] r) { long aa0 = a[0]; long aa1 = a[1];