8371305: X25519 should utilize x86 intrinsics

Reviewed-by: vpaprotski, ascarpino
This commit is contained in:
Shawn Emery 2026-06-05 16:20:42 +00:00 committed by Anthony Scarpino
parent 0fcf41112c
commit 0dd26b312e
16 changed files with 476 additions and 7 deletions

View File

@ -4904,6 +4904,11 @@ void StubGenerator::generate_compiler_stubs() {
StubRoutines::_intpoly_assign = generate_intpoly_assign();
}
if (UseIntPoly25519Intrinsics) {
StubRoutines::_intpoly_mult_25519 = generate_intpoly_mult_25519();
StubRoutines::_intpoly_square_25519 = generate_intpoly_square_25519();
}
if (UseMD5Intrinsics) {
StubRoutines::_md5_implCompress = generate_md5_implCompress(StubId::stubgen_md5_implCompress_id);
StubRoutines::_md5_implCompressMB = generate_md5_implCompress(StubId::stubgen_md5_implCompressMB_id);

View File

@ -496,6 +496,9 @@ class StubGenerator: public StubCodeGenerator {
address generate_intpoly_montgomeryMult_P256();
address generate_intpoly_assign();
address generate_intpoly_mult_25519();
address generate_intpoly_square_25519();
// SHA3 stubs
void generate_sha3_stubs();

View File

@ -0,0 +1,306 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "macroAssembler_x86.hpp"
#include "stubGenerator_x86_64.hpp"
#define __ _masm->
const int32_t term = 19;
const int32_t limbs = 5;
const int32_t bpl = 51;
const int32_t rem = 64 - bpl;
const uint64_t MASK = 0x7FFFFFFFFFFFF;
const uint64_t CARRY_ADD = 0x4000000000000;
// Multiplication operation for polynomial arithmetic in Curve25519.
//
// This is the same algorithm as used in Java, except we use pseudo-Mersenne
// reduction to reduce register pressure instead of using the full 10 columns
// in Java.
void multiply_25519_scalar(const Register aLimbs, const Register bLimbs, const Register rLimbs, Register c[], Register bArg, Register d, Register b, Register mask, MacroAssembler* _masm) {
for (int i = 0; i < limbs; i++) {
__ xorq(c[i], c[i]);
}
__ mov64(mask, MASK);
__ movq(bArg, bLimbs);
// Perform high/low multiplication with signed 5x51 bit limbs
for (int i = 0; i < limbs; i++) {
__ movq(b, Address(bArg, i * 8));
for (int j = 0; j < limbs; j++) {
__ movq(rax, Address(aLimbs, j * 8));
__ imulq(b); // rdx:rax = a * b
__ movq(d, rax);
__ andq(d, mask);
__ shrq(rax, bpl);
__ shlq(rdx, rem);
__ orq(rax, rdx);
// Fold in pseudo-Mersenne reduction
if ((i + j + 1) >= limbs) {
__ imulq(rax, rax, term);
}
if ((i + j) >= limbs) {
__ imulq(d, d, term);
}
__ addq(c[(i + j) % limbs], d);
__ addq(c[(i + j + 1) % limbs], rax);
}
}
// Carry-add with reduction from high limb
Register carry = bArg;
__ mov64(mask, CARRY_ADD);
__ movq(carry, mask);
// Limb 3
__ addq(carry, c[3]);
__ sarq(carry, bpl);
__ addq(c[4], carry);
__ shlq(carry, bpl);
__ subq(c[3], carry);
// Limb 4
__ movq(carry, mask);
__ addq(carry, c[4]);
__ sarq(carry, bpl);
// Reduce high order limb and fold back into low order limb
__ mov64(rax, term);
__ imulq(carry);
__ addq(c[0], rax);
__ shlq(carry, bpl);
__ subq(c[4], carry);
// Limbs 0 - 3
for (int i = 0; i < (limbs - 1); i++) {
__ movq(carry, mask);
__ addq(carry, c[i]);
__ sarq(carry, bpl);
__ addq(c[i + 1], carry);
__ shlq(carry, bpl);
__ subq(c[i], carry);
}
__ pop_ppx(rdx);
for (int i = 0; i < limbs; i++) {
__ movq(Address(rLimbs, i * 8), c[i]);
}
}
// Squaring operation for polynomial arithmetic in Curve25519.
//
// This is the same algorithm as used in Java, except we use pseudo-Mersenne
// reduction to reduce register pressure instead of using the full 10 columns
// in Java.
void square_25519_scalar(const Register aLimbs, const Register rLimbs, Register c[], Register aArg, Register d, Register carry, Register mask, MacroAssembler* _masm) {
for (int i = 0; i < limbs; i++) {
__ xorq(c[i], c[i]);
}
__ mov64(mask, MASK);
// Perform high/low multiplication with signed 5x51 bit limbs
for (int i = 0; i < limbs; i++) {
__ movq(aArg, Address(aLimbs, i * 8));
__ movq(rax, aArg);
__ imulq(aArg); // rdx:rax = a[j] * a[i]
__ movq(d, rax);
__ andq(d, mask);
__ shrq(rax, bpl);
__ shlq(rdx, rem);
__ orq(rax, rdx); // rax = dd
if ((i * 2 + 1) >= limbs) {
__ imulq(rax, rax, term);
}
if ((i * 2) >= limbs) {
__ imulq(d, d, term);
}
__ addq(c[(i * 2) % limbs], d);
__ addq(c[(i * 2 + 1) % limbs], rax);
for (int j = i + 1; j < limbs; j++) {
__ movq(rax, Address(aLimbs, j * 8));
__ imulq(aArg); // rdx:rax = a * a
__ movq(d, rax);
__ andq(d, mask);
__ shlq(d, 1);
__ shrq(rax, bpl);
__ shlq(rdx, rem);
__ orq(rax, rdx); // rax = dd
__ shlq(rax, 1);
if ((j + i + 1) >= limbs) {
__ imulq(rax, rax, term);
}
if ((j + i) >= limbs) {
__ imulq(d, d, term);
}
__ addq(c[(i + j) % limbs], d);
__ addq(c[(i + j + 1) % limbs], rax);
}
}
// Carry-add with reduction from high limb
// Limb 3
__ mov64(mask, CARRY_ADD);
__ movq(carry, mask);
__ addq(carry, c[3]);
__ sarq(carry, bpl);
__ addq(c[4], carry);
__ shlq(carry, bpl);
__ subq(c[3], carry);
// Limb 4
__ movq(carry, mask);
__ addq(carry, c[4]);
__ sarq(carry, bpl);
// Reduce high order limb and fold back into low order limb
__ mov64(rax, term);
__ imulq(carry);
__ addq(c[0], rax);
__ shlq(carry, bpl);
__ subq(c[4], carry);
// Limbs 0 - 3
for (int i = 0; i < (limbs - 1); i++) {
__ movq(carry, mask);
__ addq(carry, c[i]);
__ sarq(carry, bpl);
__ addq(c[i + 1], carry);
__ shlq(carry, bpl);
__ subq(c[i], carry);
}
__ pop_ppx(rdx);
for (int i = 0; i < limbs; i++) {
__ movq(Address(rLimbs, i * 8), c[i]);
}
}
address StubGenerator::generate_intpoly_mult_25519() {
StubId stub_id = StubId::stubgen_intpoly_mult_25519_id;
int entry_count = StubInfo::entry_count(stub_id);
assert(entry_count == 1, "sanity check");
address start = load_archive_data(stub_id);
if (start != nullptr) {
return start;
}
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
start = __ pc();
__ enter();
// Register Map
const Register aLimbs = c_rarg0; // rdi | rcx
const Register bLimbs = c_rarg1; // rsi | rdx
const Register rLimbs = c_rarg2; // rdx | r8
Register c[] = {r9, r10, r11, r12, r13};
Register bArg = r14;
Register d = r15;
Register b = rbp;
Register mask = rbx;
__ push_ppx(rbp);
__ push_ppx(rbx);
__ push_ppx(r12);
__ push_ppx(r13);
__ push_ppx(r14);
__ push_ppx(r15);
__ push_ppx(rdx);
multiply_25519_scalar(aLimbs, bLimbs, rLimbs, c, bArg, d, b, mask, _masm);
// __ pop_ppx(rdx); // restored in the helper already
__ pop_ppx(r15);
__ pop_ppx(r14);
__ pop_ppx(r13);
__ pop_ppx(r12);
__ pop_ppx(rbx);
__ pop_ppx(rbp);
__ leave();
__ ret(0);
// Record the stub entry and end
store_archive_data(stub_id, start, __ pc());
return start;
}
address StubGenerator::generate_intpoly_square_25519() {
StubId stub_id = StubId::stubgen_intpoly_square_25519_id;
int entry_count = StubInfo::entry_count(stub_id);
assert(entry_count == 1, "sanity check");
address start = load_archive_data(stub_id);
if (start != nullptr) {
return start;
}
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
start = __ pc();
__ enter();
// Register Map
const Register aLimbs = c_rarg0; // rdi | rcx
const Register rLimbs = c_rarg1; // rsi | rdx
Register c[] = {r9, r10, r11, r12, r13};
Register aArg = r14;
Register d = r15;
Register carry = rbp;
Register mask = rbx;
__ push_ppx(rbp);
__ push_ppx(rbx);
__ push_ppx(r12);
__ push_ppx(r13);
__ push_ppx(r14);
__ push_ppx(r15);
__ push_ppx(rdx);
square_25519_scalar(aLimbs, rLimbs, c, aArg, d, carry, mask, _masm);
// __ pop_ppx(rdx); // restored in the helper already
__ pop_ppx(r15);
__ pop_ppx(r14);
__ pop_ppx(r13);
__ pop_ppx(r12);
__ pop_ppx(rbx);
__ pop_ppx(rbp);
__ leave();
__ ret(0);
// Record the stub entry and end
store_archive_data(stub_id, start, __ pc());
return start;
}
#undef __

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, 2025, Intel Corporation. All rights reserved.
* Copyright (c) 2024, 2026, Intel Corporation. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -676,7 +676,7 @@ address StubGenerator::generate_intpoly_assign() {
// KNOWN Lengths:
// MontgomeryIntPolynP256: 5 = 4 + 1
// IntegerPolynomial1305: 5 = 4 + 1
// IntegerPolynomial25519: 10 = 8 + 2
// IntegerPolynomial25519: 5 = 4 + 1
// IntegerPolynomialP256: 10 = 8 + 2
// Curve25519OrderField: 10 = 8 + 2
// Curve25519OrderField: 10 = 8 + 2

View File

@ -1407,6 +1407,10 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
}
if (FLAG_IS_DEFAULT(UseIntPoly25519Intrinsics)) {
UseIntPoly25519Intrinsics = true;
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -527,6 +527,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_intpoly_assign:
if (!UseIntPolyIntrinsics) return true;
break;
case vmIntrinsics::_intpoly_mult_25519:
case vmIntrinsics::_intpoly_square_25519:
if (!UseIntPoly25519Intrinsics) return true;
break;
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C:
if (!UseCRC32CIntrinsics) return true;

View File

@ -549,6 +549,13 @@ class methodHandle;
do_name(intPolyAssign_name, "conditionalAssign") \
do_signature(intPolyAssign_signature, "(I[J[J)V") \
\
/* support for sun.security.util.math.intpoly.IntegerPolynomial25519 */ \
do_class(sun_security_util_math_intpoly_IntegerPolynomial25519, "sun/security/util/math/intpoly/IntegerPolynomial25519") \
do_intrinsic(_intpoly_mult_25519, sun_security_util_math_intpoly_IntegerPolynomial25519, intPolyMult_name, intPolyMult_signature, F_R) \
do_intrinsic(_intpoly_square_25519, sun_security_util_math_intpoly_IntegerPolynomial25519, intPolySquare_name, intPolySquare_signature, F_R) \
do_name(intPolySquare_name, "square") \
do_signature(intPolySquare_signature, "([J[J)V") \
\
/* support for java.util.Base64.Encoder*/ \
do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder") \
do_intrinsic(_base64_encodeBlock, java_util_Base64_Encoder, encodeBlock_name, encodeBlock_signature, F_R) \

View File

@ -819,6 +819,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_poly1305_processBlocks:
case vmIntrinsics::_intpoly_montgomeryMult_P256:
case vmIntrinsics::_intpoly_assign:
case vmIntrinsics::_intpoly_mult_25519:
case vmIntrinsics::_intpoly_square_25519:
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32:

View File

@ -2272,6 +2272,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "poly1305_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "intpoly_montgomeryMult_P256") == 0 ||
strcmp(call->as_CallLeaf()->_name, "intpoly_assign") == 0 ||
strcmp(call->as_CallLeaf()->_name, "intpoly_mult_25519") == 0 ||
strcmp(call->as_CallLeaf()->_name, "intpoly_square_25519") == 0 ||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "chacha20Block") == 0 ||
strcmp(call->as_CallLeaf()->_name, "kyberNtt") == 0 ||

View File

@ -666,6 +666,10 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_intpoly_montgomeryMult_P256();
case vmIntrinsics::_intpoly_assign:
return inline_intpoly_assign();
case vmIntrinsics::_intpoly_mult_25519:
return inline_intpoly_mult_25519();
case vmIntrinsics::_intpoly_square_25519:
return inline_intpoly_square_25519();
case vmIntrinsics::_encodeISOArray:
case vmIntrinsics::_encodeByteISOArray:
return inline_encodeISOArray(false);
@ -8373,6 +8377,70 @@ bool LibraryCallKit::inline_intpoly_assign() {
return true;
}
bool LibraryCallKit::inline_intpoly_mult_25519() {
address stubAddr;
const char *stubName;
assert(UseIntPoly25519Intrinsics, "need intpoly25519 intrinsics support");
assert(callee()->signature()->size() == 3, "intpoly_mult_25519 has %d parameters", callee()->signature()->size());
stubAddr = StubRoutines::intpoly_mult_25519();
stubName = "intpoly_mult_25519";
if (!stubAddr) return false;
null_check_receiver(); // null-check receiver
if (stopped()) return true;
Node* a = argument(1);
Node* b = argument(2);
Node* r = argument(3);
a = must_be_not_null(a, true);
b = must_be_not_null(b, true);
r = must_be_not_null(r, true);
Node* a_start = array_element_address(a, intcon(0), T_LONG);
assert(a_start, "a array is null");
Node* b_start = array_element_address(b, intcon(0), T_LONG);
assert(b_start, "b array is null");
Node* r_start = array_element_address(r, intcon(0), T_LONG);
assert(r_start, "r array is null");
Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
OptoRuntime::intpoly_mult_25519_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
a_start, b_start, r_start);
return true;
}
bool LibraryCallKit::inline_intpoly_square_25519() {
address stubAddr;
const char *stubName;
assert(UseIntPoly25519Intrinsics, "need intpoly25519 intrinsics support");
assert(callee()->signature()->size() == 2, "intpoly_mult_25519 has %d parameters", callee()->signature()->size());
stubAddr = StubRoutines::intpoly_square_25519();
stubName = "intpoly_square_25519";
if (!stubAddr) return false;
null_check_receiver(); // null-check receiver
if (stopped()) return true;
Node* a = argument(1);
Node* r = argument(2);
a = must_be_not_null(a, true);
r = must_be_not_null(r, true);
Node* a_start = array_element_address(a, intcon(0), T_LONG);
assert(a_start, "a array is null");
Node* r_start = array_element_address(r, intcon(0), T_LONG);
assert(r_start, "r array is null");
Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
OptoRuntime::intpoly_square_25519_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
a_start, r_start);
return true;
}
//------------------------------inline_digestBase_implCompress-----------------------
//
// Calculate MD5 for single-block byte[] array.

View File

@ -343,6 +343,8 @@ class LibraryCallKit : public GraphKit {
bool inline_poly1305_processBlocks();
bool inline_intpoly_montgomeryMult_P256();
bool inline_intpoly_assign();
bool inline_intpoly_mult_25519();
bool inline_intpoly_square_25519();
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
bool inline_keccak(vmIntrinsics::ID id);
bool inline_digestBase_implCompressMB(int predicate);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -237,6 +237,8 @@ const TypeFunc* OptoRuntime::_string_IndexOf_Type = nullptr;
const TypeFunc* OptoRuntime::_poly1305_processBlocks_Type = nullptr;
const TypeFunc* OptoRuntime::_intpoly_montgomeryMult_P256_Type = nullptr;
const TypeFunc* OptoRuntime::_intpoly_assign_Type = nullptr;
const TypeFunc* OptoRuntime::_intpoly_mult_25519_Type = nullptr;
const TypeFunc* OptoRuntime::_intpoly_square_25519_Type = nullptr;
const TypeFunc* OptoRuntime::_updateBytesCRC32_Type = nullptr;
const TypeFunc* OptoRuntime::_updateBytesCRC32C_Type = nullptr;
const TypeFunc* OptoRuntime::_updateBytesAdler32_Type = nullptr;
@ -1786,6 +1788,41 @@ static const TypeFunc* make_intpoly_assign_Type() {
return TypeFunc::make(domain, range);
}
static const TypeFunc* make_intpoly_mult_25519_Type() {
int argcnt = 3;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // a array
fields[argp++] = TypePtr::NOTNULL; // b array
fields[argp++] = TypePtr::NOTNULL; // r(esult) array
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms + 0] = nullptr; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
static const TypeFunc* make_intpoly_square_25519_Type() {
int argcnt = 2;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // a array
fields[argp++] = TypePtr::NOTNULL; // r(esult) array
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms + 0] = nullptr; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
//------------- Interpreter state for on stack replacement
static const TypeFunc* make_osr_end_Type() {
// create input type (domain)
@ -2354,6 +2391,8 @@ void OptoRuntime::initialize_types() {
_poly1305_processBlocks_Type = make_poly1305_processBlocks_Type();
_intpoly_montgomeryMult_P256_Type = make_intpoly_montgomeryMult_P256_Type();
_intpoly_assign_Type = make_intpoly_assign_Type();
_intpoly_mult_25519_Type = make_intpoly_mult_25519_Type();
_intpoly_square_25519_Type = make_intpoly_square_25519_Type();
_updateBytesCRC32_Type = make_updateBytesCRC32_Type();
_updateBytesCRC32C_Type = make_updateBytesCRC32C_Type();
_updateBytesAdler32_Type = make_updateBytesAdler32_Type();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -190,6 +190,8 @@ class OptoRuntime : public AllStatic {
static const TypeFunc* _poly1305_processBlocks_Type;
static const TypeFunc* _intpoly_montgomeryMult_P256_Type;
static const TypeFunc* _intpoly_assign_Type;
static const TypeFunc* _intpoly_mult_25519_Type;
static const TypeFunc* _intpoly_square_25519_Type;
static const TypeFunc* _updateBytesCRC32_Type;
static const TypeFunc* _updateBytesCRC32C_Type;
static const TypeFunc* _updateBytesAdler32_Type;
@ -687,6 +689,18 @@ private:
return _intpoly_assign_Type;
}
// IntegerPolynomial25519 multiply function
static inline const TypeFunc* intpoly_mult_25519_Type() {
assert(_intpoly_mult_25519_Type != nullptr, "should be initialized");
return _intpoly_mult_25519_Type;
}
// IntegerPolynomial25519 square function
static inline const TypeFunc* intpoly_square_25519_Type() {
assert(_intpoly_square_25519_Type != nullptr, "should be initialized");
return _intpoly_square_25519_Type;
}
/**
* int updateBytesCRC32(int crc, byte* b, int len)
*/

View File

@ -229,9 +229,13 @@ const int ObjectAlignmentInBytes = 8;
\
product(bool, UsePoly1305Intrinsics, false, DIAGNOSTIC, \
"Use intrinsics for sun.security.util.math.intpoly") \
product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC, \
\
product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC, \
"Use intrinsics for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256") \
\
product(bool, UseIntPoly25519Intrinsics, false, DIAGNOSTIC, \
"Use intrinsics for sun.security.util.math.intpoly.IntegerPolynomial25519") \
\
product(size_t, LargePageSizeInBytes, 0, \
"Maximum large page size used (0 will use the default large " \
"page size for the environment as the maximum) " \

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -801,6 +801,12 @@
intpoly_montgomeryMult_P256, intpoly_montgomeryMult_P256) \
do_stub(compiler, intpoly_assign) \
do_entry(compiler, intpoly_assign, intpoly_assign, intpoly_assign) \
do_stub(compiler, intpoly_mult_25519) \
do_entry(compiler, intpoly_mult_25519, \
intpoly_mult_25519, intpoly_mult_25519) \
do_stub(compiler, intpoly_square_25519) \
do_entry(compiler, intpoly_square_25519, \
intpoly_square_25519, intpoly_square_25519) \
do_stub(compiler, md5_implCompress) \
do_entry(compiler, md5_implCompress, md5_implCompress, \
md5_implCompress) \

View File

@ -26,6 +26,7 @@
package sun.security.util.math.intpoly;
import java.math.BigInteger;
import jdk.internal.vm.annotation.IntrinsicCandidate;
public final class IntegerPolynomial25519 extends IntegerPolynomial {
private static final int BITS_PER_LIMB = 51;
@ -235,6 +236,7 @@ public final class IntegerPolynomial25519 extends IntegerPolynomial {
* @param b [in] the limb operand to multiply.
* @param r [out] the product of the limbs operands that is fully reduced.
*/
@IntrinsicCandidate
protected void mult(long[] a, long[] b, long[] r) {
long aa0 = a[0];
long aa1 = a[1];
@ -414,6 +416,7 @@ public final class IntegerPolynomial25519 extends IntegerPolynomial {
* @param a [in] the limb operand to square.
* @param r [out] the resulting square of the limb which is fully reduced.
*/
@IntrinsicCandidate
protected void square(long[] a, long[] r) {
long aa0 = a[0];
long aa1 = a[1];