mirror of
https://github.com/openjdk/jdk.git
synced 2026-06-06 18:53:37 +00:00
8384353: SHA3 AVX2 and AVX512 intrinsics and improvements
Reviewed-by: sviswanathan, ascarpino, semery
This commit is contained in:
parent
bb4d2abb0f
commit
114e3c6106
@ -3439,6 +3439,16 @@ void Assembler::movdqa(XMMRegister dst, Address src) {
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::movdqa(Address dst, XMMRegister src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.reset_is_clear_context();
|
||||
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst, 0);
|
||||
}
|
||||
|
||||
void Assembler::movdqu(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -3848,6 +3858,26 @@ void Assembler::evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool mer
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::evmovdqaq(Address dst, XMMRegister src, int vector_len) {
|
||||
// Unmasked instruction
|
||||
evmovdqaq(dst, k0, src, /*merge*/ false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evmovdqaq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst, 0);
|
||||
}
|
||||
|
||||
void Assembler::evmovntdquq(Address dst, XMMRegister src, int vector_len) {
|
||||
// Unmasked instruction
|
||||
evmovntdquq(dst, k0, src, /*merge*/ true, vector_len);
|
||||
@ -9481,6 +9511,20 @@ void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
emit_int16((unsigned char)0xF3, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit ? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_M128, /* input_size_in_bits */ EVEX_NObit);
|
||||
vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xF3);
|
||||
emit_operand(dst, shift, 0);
|
||||
}
|
||||
|
||||
// Shift packed integers logically right by specified number of bits.
|
||||
void Assembler::psrlw(XMMRegister dst, int shift) {
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -9572,6 +9616,20 @@ void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
emit_int16((unsigned char)0xD3, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit ? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_M128, /* input_size_in_bits */ EVEX_NObit);
|
||||
vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD3);
|
||||
emit_operand(dst, shift, 0);
|
||||
}
|
||||
|
||||
void Assembler::evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -9712,6 +9770,18 @@ void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
emit_int16(0x47, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
|
||||
assert(vector_len == AVX_128bit ? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_256bit ? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
|
||||
vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x47);
|
||||
emit_operand(dst, shift, 0);
|
||||
}
|
||||
|
||||
//Variable Shift packed integers logically right.
|
||||
void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 1, "requires AVX2");
|
||||
@ -9727,6 +9797,18 @@ void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
emit_int16(0x45, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
|
||||
assert(vector_len == AVX_128bit ? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_256bit ? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
|
||||
vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x45);
|
||||
emit_operand(dst, shift, 0);
|
||||
}
|
||||
|
||||
//Variable right Shift arithmetic packed integers .
|
||||
void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 1, "requires AVX2");
|
||||
|
||||
@ -1612,6 +1612,7 @@ private:
|
||||
// Move Aligned Double Quadword
|
||||
void movdqa(XMMRegister dst, XMMRegister src);
|
||||
void movdqa(XMMRegister dst, Address src);
|
||||
void movdqa(Address dst, XMMRegister src);
|
||||
|
||||
// Move Unaligned Double Quadword
|
||||
void movdqu(Address dst, XMMRegister src);
|
||||
@ -1661,8 +1662,10 @@ private:
|
||||
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
// Move Aligned 512bit Vector
|
||||
void evmovdqaq(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdqaq(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdqaq(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdqaq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
void vmovsldup(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vmovshdup(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
@ -2860,6 +2863,7 @@ private:
|
||||
void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsllq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
|
||||
void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
|
||||
// Logical shift right packed integers
|
||||
@ -2875,6 +2879,7 @@ private:
|
||||
void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrlq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
|
||||
void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -2895,10 +2900,12 @@ private:
|
||||
// Variable shift left packed integers
|
||||
void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsllvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
|
||||
|
||||
// Variable shift right packed integers
|
||||
void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrlvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
|
||||
|
||||
// Variable shift right arithmetic packed integers
|
||||
void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
@ -2124,6 +2124,26 @@ void MacroAssembler::vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vmovdqa(XMMRegister dst, Address src, int vector_len) {
|
||||
if (vector_len == AVX_512bit) {
|
||||
Assembler::evmovdqaq(dst, src, AVX_512bit);
|
||||
} else if (vector_len == AVX_256bit) {
|
||||
Assembler::vmovdqa(dst, src);
|
||||
} else {
|
||||
Assembler::movdqa(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vmovdqa(Address dst, XMMRegister src, int vector_len) {
|
||||
if (vector_len == AVX_512bit) {
|
||||
Assembler::evmovdqaq(dst, src, AVX_512bit);
|
||||
} else if (vector_len == AVX_256bit) {
|
||||
Assembler::vmovdqa(dst, src);
|
||||
} else {
|
||||
Assembler::movdqa(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmov(KRegister dst, Address src) {
|
||||
if (VM_Version::supports_avx512bw()) {
|
||||
kmovql(dst, src);
|
||||
|
||||
@ -1181,6 +1181,8 @@ public:
|
||||
using Assembler::vmovdqa;
|
||||
void vmovdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
|
||||
void vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
|
||||
void vmovdqa(XMMRegister dst, Address src, int vector_len);
|
||||
void vmovdqa(Address dst, XMMRegister src, int vector_len);
|
||||
|
||||
// AVX512 Unaligned
|
||||
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1323,7 +1323,8 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA && supports_evex() && supports_avx512bw()) {
|
||||
if (UseSHA && ((supports_evex() && supports_avx512vlbw()) ||
|
||||
(EnableX86ECoreOpts && !supports_hybrid()))) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
|
||||
}
|
||||
|
||||
@ -487,6 +487,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
|
||||
if (!UseSHA512Intrinsics) return true;
|
||||
break;
|
||||
case vmIntrinsics::_double_keccak:
|
||||
case vmIntrinsics::_quad_keccak:
|
||||
case vmIntrinsics::_sha3_implCompress:
|
||||
if (!UseSHA3Intrinsics) return true;
|
||||
break;
|
||||
|
||||
@ -526,9 +526,12 @@ class methodHandle;
|
||||
\
|
||||
/* support for sun.security.provider.SHAKE128Parallel */ \
|
||||
do_class(sun_security_provider_sha3_parallel, "sun/security/provider/SHA3Parallel") \
|
||||
do_intrinsic(_double_keccak, sun_security_provider_sha3_parallel, double_keccak_name, double_keccak_signature, F_S) \
|
||||
do_intrinsic(_double_keccak, sun_security_provider_sha3_parallel, double_keccak_name, double_keccak_signature, F_S) \
|
||||
do_name( double_keccak_name, "doubleKeccak") \
|
||||
do_signature(double_keccak_signature, "([J[J)I") \
|
||||
do_intrinsic(_quad_keccak, sun_security_provider_sha3_parallel, quad_keccak_name, quad_keccak_signature, F_S) \
|
||||
do_name( quad_keccak_name, "quadKeccak") \
|
||||
do_signature(quad_keccak_signature, "([J[J[J[J)I") \
|
||||
\
|
||||
/* support for sun.security.provider.DigestBase */ \
|
||||
do_class(sun_security_provider_digestbase, "sun/security/provider/DigestBase") \
|
||||
|
||||
@ -792,6 +792,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_sha5_implCompress:
|
||||
case vmIntrinsics::_sha3_implCompress:
|
||||
case vmIntrinsics::_double_keccak:
|
||||
case vmIntrinsics::_quad_keccak:
|
||||
case vmIntrinsics::_digestBase_implCompressMB:
|
||||
case vmIntrinsics::_multiplyToLen:
|
||||
case vmIntrinsics::_squareToLen:
|
||||
|
||||
@ -2285,6 +2285,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
||||
strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "sha3_implCompress") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "double_keccak") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "quad_keccak") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "sha3_implCompressMB") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
|
||||
|
||||
@ -600,7 +600,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_sha3_implCompress:
|
||||
return inline_digestBase_implCompress(intrinsic_id());
|
||||
case vmIntrinsics::_double_keccak:
|
||||
return inline_double_keccak();
|
||||
case vmIntrinsics::_quad_keccak:
|
||||
return inline_keccak(intrinsic_id());
|
||||
|
||||
case vmIntrinsics::_digestBase_implCompressMB:
|
||||
return inline_digestBase_implCompressMB(predicate);
|
||||
@ -8471,33 +8472,60 @@ bool LibraryCallKit::inline_digestBase_implCompress(vmIntrinsics::ID id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------inline_double_keccak
|
||||
bool LibraryCallKit::inline_double_keccak() {
|
||||
address stubAddr;
|
||||
//------------------------------inline_keccak
|
||||
bool LibraryCallKit::inline_keccak(vmIntrinsics::ID id) {
|
||||
address stubAddr = nullptr;
|
||||
const char *stubName;
|
||||
assert(UseSHA3Intrinsics, "need SHA3 intrinsics support");
|
||||
assert(callee()->signature()->size() == 2, "double_keccak has 2 parameters");
|
||||
assert((id == vmIntrinsics::_double_keccak && callee()->signature()->size() == 2) ||
|
||||
(id == vmIntrinsics::_quad_keccak && callee()->signature()->size() == 4),
|
||||
"double_keccak wrong number of parameters");
|
||||
|
||||
int parmCnt = 0;
|
||||
switch (id) {
|
||||
case vmIntrinsics::_double_keccak:
|
||||
stubAddr = StubRoutines::double_keccak();
|
||||
stubName = "double_keccak";
|
||||
parmCnt = 2;
|
||||
break;
|
||||
case vmIntrinsics::_quad_keccak:
|
||||
stubAddr = StubRoutines::quad_keccak();
|
||||
stubName = "quad_keccak";
|
||||
parmCnt = 4;
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
stubAddr = StubRoutines::double_keccak();
|
||||
stubName = "double_keccak";
|
||||
if (!stubAddr) return false;
|
||||
|
||||
Node* status0 = argument(0);
|
||||
Node* status1 = argument(1);
|
||||
Node* state[4];
|
||||
for (int i = 0; i<parmCnt; i++) {
|
||||
state[i] = must_be_not_null(argument(i), true);
|
||||
state[i] = array_element_address(state[i], intcon(0), T_LONG);
|
||||
assert(state[i], "state[%d] is null", i);
|
||||
}
|
||||
|
||||
status0 = must_be_not_null(status0, true);
|
||||
status1 = must_be_not_null(status1, true);
|
||||
|
||||
Node* status0_start = array_element_address(status0, intcon(0), T_LONG);
|
||||
assert(status0_start, "status0 is null");
|
||||
Node* status1_start = array_element_address(status1, intcon(0), T_LONG);
|
||||
assert(status1_start, "status1 is null");
|
||||
Node* double_keccak = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
Node* keccak;
|
||||
switch (id) {
|
||||
case vmIntrinsics::_double_keccak:
|
||||
keccak = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::double_keccak_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
status0_start, status1_start);
|
||||
state[0], state[1]);
|
||||
break;
|
||||
case vmIntrinsics::_quad_keccak:
|
||||
keccak = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::quad_keccak_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
state[0], state[1], state[2], state[3]);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// return an int
|
||||
Node* retvalue = _gvn.transform(new ProjNode(double_keccak, TypeFunc::Parms));
|
||||
Node* retvalue = _gvn.transform(new ProjNode(keccak, TypeFunc::Parms));
|
||||
set_result(retvalue);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -344,7 +344,7 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_intpoly_montgomeryMult_P256();
|
||||
bool inline_intpoly_assign();
|
||||
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
|
||||
bool inline_double_keccak();
|
||||
bool inline_keccak(vmIntrinsics::ID id);
|
||||
bool inline_digestBase_implCompressMB(int predicate);
|
||||
bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
|
||||
BasicType elem_type, address stubAddr, const char *stubName,
|
||||
|
||||
@ -209,6 +209,7 @@ const TypeFunc* OptoRuntime::_digestBase_implCompress_without_sha3_Type = null
|
||||
const TypeFunc* OptoRuntime::_digestBase_implCompressMB_with_sha3_Type = nullptr;
|
||||
const TypeFunc* OptoRuntime::_digestBase_implCompressMB_without_sha3_Type = nullptr;
|
||||
const TypeFunc* OptoRuntime::_double_keccak_Type = nullptr;
|
||||
const TypeFunc* OptoRuntime::_quad_keccak_Type = nullptr;
|
||||
const TypeFunc* OptoRuntime::_multiplyToLen_Type = nullptr;
|
||||
const TypeFunc* OptoRuntime::_montgomeryMultiply_Type = nullptr;
|
||||
const TypeFunc* OptoRuntime::_montgomerySquare_Type = nullptr;
|
||||
@ -1221,6 +1222,26 @@ static const TypeFunc* make_double_keccak_Type() {
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
static const TypeFunc* make_quad_keccak_Type() {
|
||||
int argcnt = 4;
|
||||
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
fields[argp++] = TypePtr::NOTNULL; // status0
|
||||
fields[argp++] = TypePtr::NOTNULL; // status1
|
||||
fields[argp++] = TypePtr::NOTNULL; // status2
|
||||
fields[argp++] = TypePtr::NOTNULL; // status3
|
||||
|
||||
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
|
||||
|
||||
// result type needed
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms + 0] = TypeInt::INT;
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
static const TypeFunc* make_multiplyToLen_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 5;
|
||||
@ -2305,6 +2326,7 @@ void OptoRuntime::initialize_types() {
|
||||
_digestBase_implCompressMB_with_sha3_Type = make_digestBase_implCompressMB_Type(/* is_sha3= */ true);
|
||||
_digestBase_implCompressMB_without_sha3_Type = make_digestBase_implCompressMB_Type(/* is_sha3= */ false);
|
||||
_double_keccak_Type = make_double_keccak_Type();
|
||||
_quad_keccak_Type = make_quad_keccak_Type();
|
||||
_multiplyToLen_Type = make_multiplyToLen_Type();
|
||||
_montgomeryMultiply_Type = make_montgomeryMultiply_Type();
|
||||
_montgomerySquare_Type = make_montgomerySquare_Type();
|
||||
|
||||
@ -162,6 +162,7 @@ class OptoRuntime : public AllStatic {
|
||||
static const TypeFunc* _digestBase_implCompressMB_with_sha3_Type;
|
||||
static const TypeFunc* _digestBase_implCompressMB_without_sha3_Type;
|
||||
static const TypeFunc* _double_keccak_Type;
|
||||
static const TypeFunc* _quad_keccak_Type;
|
||||
static const TypeFunc* _multiplyToLen_Type;
|
||||
static const TypeFunc* _montgomeryMultiply_Type;
|
||||
static const TypeFunc* _montgomerySquare_Type;
|
||||
@ -537,6 +538,11 @@ private:
|
||||
return _double_keccak_Type;
|
||||
}
|
||||
|
||||
static inline const TypeFunc* quad_keccak_Type() {
|
||||
assert(_quad_keccak_Type != nullptr, "should be initialized");
|
||||
return _quad_keccak_Type;
|
||||
}
|
||||
|
||||
static inline const TypeFunc* multiplyToLen_Type() {
|
||||
assert(_multiplyToLen_Type != nullptr, "should be initialized");
|
||||
return _multiplyToLen_Type;
|
||||
|
||||
@ -830,6 +830,8 @@
|
||||
sha3_implCompress) \
|
||||
do_stub(compiler, double_keccak) \
|
||||
do_entry(compiler, double_keccak, double_keccak, double_keccak) \
|
||||
do_stub(compiler, quad_keccak) \
|
||||
do_entry(compiler, quad_keccak, quad_keccak, quad_keccak) \
|
||||
do_stub(compiler, sha3_implCompressMB) \
|
||||
do_entry(compiler, sha3_implCompressMB, sha3_implCompressMB, \
|
||||
sha3_implCompressMB) \
|
||||
|
||||
@ -827,7 +827,7 @@ public final class ML_KEM {
|
||||
private short[][][] generateA(byte[] rho, Boolean transposed) {
|
||||
short[][][] a = new short[mlKem_k][mlKem_k][];
|
||||
|
||||
int nrPar = 2;
|
||||
int nrPar = 4;
|
||||
int rhoLen = rho.length;
|
||||
byte[] seedBuf = new byte[XOF_BLOCK_LEN];
|
||||
System.arraycopy(rho, 0, seedBuf, 0, rho.length);
|
||||
|
||||
@ -1146,7 +1146,7 @@ public class ML_DSA {
|
||||
a[i] = new int[mlDsa_l][];
|
||||
}
|
||||
|
||||
int nrPar = 2;
|
||||
int nrPar = 4;
|
||||
int rhoLen = seed.length;
|
||||
byte[] seedBuf = new byte[SHAKE128_BLOCK_SIZE];
|
||||
System.arraycopy(seed, 0, seedBuf, 0, seed.length);
|
||||
|
||||
@ -97,6 +97,7 @@ public abstract class SHA3 extends DigestBase {
|
||||
private SHA3(String name, int digestLength, byte suffix, int c) {
|
||||
super(name, digestLength, (WIDTH - c));
|
||||
this.suffix = suffix;
|
||||
blockSizeCheck();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -113,6 +114,14 @@ public abstract class SHA3 extends DigestBase {
|
||||
Preconditions.checkIndex(ofs + blockSize - 1, b.length, Preconditions.AIOOBE_FORMATTER);
|
||||
}
|
||||
|
||||
private void blockSizeCheck() {
|
||||
switch(blockSize) {
|
||||
case 72, 104, 136, 144, 168: break;
|
||||
default:
|
||||
throw new ProviderException("Invalid SHA3 blocksize:" + blockSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Core compression function. Processes blockSize bytes at a time
|
||||
* and updates the state of this object.
|
||||
|
||||
@ -36,7 +36,7 @@ import static sun.security.provider.ByteArrayAccess.l2bLittle;
|
||||
import static sun.security.provider.SHA3.keccak;
|
||||
|
||||
/*
|
||||
* This class is for making it possible that NRPAR (= 2) (rather restricted)
|
||||
* This class is for making it possible that NRPAR (= 4) (rather restricted)
|
||||
* SHAKE computations execute in parallel.
|
||||
* The restrictions are:
|
||||
* 1. The messages processed should be such that the absorb phase should
|
||||
@ -54,7 +54,7 @@ public class SHA3Parallel {
|
||||
private static final int DM = 5; // dimension of lanesArr
|
||||
private byte[][] buffers;
|
||||
private long[][] lanesArr;
|
||||
private static final int NRPAR = 2;
|
||||
private static final int NRPAR = 4;
|
||||
|
||||
private SHA3Parallel(byte[][] buffers, int blockSize) throws InvalidAlgorithmParameterException {
|
||||
if ((buffers.length != NRPAR) || (buffers[0].length < blockSize)) {
|
||||
@ -81,13 +81,20 @@ public class SHA3Parallel {
|
||||
}
|
||||
|
||||
public int squeezeBlock() {
|
||||
int retVal = doubleKeccak(lanesArr[0], lanesArr[1]);
|
||||
int retVal = quadKeccak(lanesArr[0], lanesArr[1], lanesArr[2], lanesArr[3]);
|
||||
for (int i = 0; i < NRPAR; i++) {
|
||||
l2bLittle(lanesArr[i], 0, buffers[i], 0, blockSize);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
private static int quadKeccak(long[] lanes0, long[] lanes1, long[] lanes2, long[] lanes3) {
|
||||
doubleKeccak(lanes0, lanes1);
|
||||
doubleKeccak(lanes2, lanes3);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
private static int doubleKeccak(long[] lanes0, long[] lanes1) {
|
||||
doubleKeccakJava(lanes0, lanes1);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user