mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8341052: SHA-512 implementation using SHA-NI
Reviewed-by: jbhateja, ascarpino, sviswanathan, sparasa
This commit is contained in:
parent
54a744b023
commit
18bcbf7941
@ -6751,6 +6751,27 @@ void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
|
||||
emit_int16((unsigned char)0xCD, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha512msg1(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xCC, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha512msg2(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xCD, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xCB, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::shll(Register dst, int imm8) {
|
||||
assert(isShiftCount(imm8), "illegal shift count");
|
||||
int encode = prefix_and_encode(dst->encoding());
|
||||
@ -11670,6 +11691,19 @@ void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::vbroadcasti128(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(vector_len == AVX_256bit, "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
// scalar single/double precision replicate
|
||||
|
||||
// duplicate single precision data from src into programmed locations in dest : requires AVX512VL
|
||||
|
||||
@ -2345,6 +2345,9 @@ private:
|
||||
void sha256rnds2(XMMRegister dst, XMMRegister src);
|
||||
void sha256msg1(XMMRegister dst, XMMRegister src);
|
||||
void sha256msg2(XMMRegister dst, XMMRegister src);
|
||||
void sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void sha512msg1(XMMRegister dst, XMMRegister src);
|
||||
void sha512msg2(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void shldl(Register dst, Register src);
|
||||
void eshldl(Register dst, Register src1, Register src2, bool no_flags);
|
||||
@ -3035,6 +3038,7 @@ private:
|
||||
void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
|
||||
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
|
||||
void vbroadcasti128(XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
// scalar single/double/128bit precision replicate
|
||||
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
@ -3482,6 +3482,17 @@ void MacroAssembler::vpbroadcastd(XMMRegister dst, AddressLiteral src, int vecto
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
assert(rscratch != noreg || always_reachable(src), "missing");
|
||||
|
||||
if (reachable(src)) {
|
||||
Assembler::vbroadcasti128(dst, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch, src);
|
||||
Assembler::vbroadcasti128(dst, Address(rscratch, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
assert(rscratch != noreg || always_reachable(src), "missing");
|
||||
|
||||
|
||||
@ -1118,6 +1118,7 @@ public:
|
||||
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
|
||||
XMMRegister shuf_mask);
|
||||
void sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block);
|
||||
#endif // _LP64
|
||||
|
||||
void fast_md5(Register buf, Address state, Address ofs, Address limit,
|
||||
@ -1216,6 +1217,9 @@ public:
|
||||
void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
|
||||
void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
|
||||
|
||||
using Assembler::vbroadcasti128;
|
||||
void vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
|
||||
|
||||
using Assembler::vbroadcastsd;
|
||||
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
|
||||
|
||||
|
||||
@ -1519,5 +1519,184 @@ void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste
|
||||
}
|
||||
}
|
||||
|
||||
//Implemented using Intel IpSec implementation (intel-ipsec-mb on github)
|
||||
void MacroAssembler::sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block) {
|
||||
Label done_hash, block_loop;
|
||||
address K512_W = StubRoutines::x86::k512_W_addr();
|
||||
|
||||
vbroadcasti128(xmm15, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512()), Assembler::AVX_256bit, r10);
|
||||
|
||||
//load current hash value and transform
|
||||
vmovdqu(xmm0, Address(arg_hash));
|
||||
vmovdqu(xmm1, Address(arg_hash, 32));
|
||||
//ymm0 = D C B A, ymm1 = H G F E
|
||||
vperm2i128(xmm2, xmm0, xmm1, 0x20);
|
||||
vperm2i128(xmm3, xmm0, xmm1, 0x31);
|
||||
//ymm2 = F E B A, ymm3 = H G D C
|
||||
vpermq(xmm13, xmm2, 0x1b, Assembler::AVX_256bit);
|
||||
vpermq(xmm14, xmm3, 0x1b, Assembler::AVX_256bit);
|
||||
//ymm13 = A B E F, ymm14 = C D G H
|
||||
|
||||
lea(rax, ExternalAddress(K512_W));
|
||||
align(32);
|
||||
bind(block_loop);
|
||||
vmovdqu(xmm11, xmm13);//ABEF
|
||||
vmovdqu(xmm12, xmm14);//CDGH
|
||||
|
||||
//R0 - R3
|
||||
vmovdqu(xmm0, Address(arg_msg, 0 * 32));
|
||||
vpshufb(xmm3, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm3 = W[0..3]
|
||||
vpaddq(xmm0, xmm3, Address(rax, 0 * 32), Assembler::AVX_256bit);
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
|
||||
//R4 - R7
|
||||
vmovdqu(xmm0, Address(arg_msg, 1 * 32));
|
||||
vpshufb(xmm4, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm4 = W[4..7]
|
||||
vpaddq(xmm0, xmm4, Address(rax, 1 * 32), Assembler::AVX_256bit);
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm3, xmm4); //ymm3 = W[0..3] + S0(W[1..4])
|
||||
|
||||
//R8 - R11
|
||||
vmovdqu(xmm0, Address(arg_msg, 2 * 32));
|
||||
vpshufb(xmm5, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm5 = W[8..11]
|
||||
vpaddq(xmm0, xmm5, Address(rax, 2 * 32), Assembler::AVX_256bit);
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm4, xmm5);//ymm4 = W[4..7] + S0(W[5..8])
|
||||
|
||||
//R12 - R15
|
||||
vmovdqu(xmm0, Address(arg_msg, 3 * 32));
|
||||
vpshufb(xmm6, xmm0, xmm15, Assembler::AVX_256bit); //ymm0 / ymm6 = W[12..15]
|
||||
vpaddq(xmm0, xmm6, Address(rax, 3 * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm6, 0x1b, Assembler::AVX_256bit); //ymm8 = W[12] W[13] W[14] W[15]
|
||||
vpermq(xmm9, xmm5, 0x39, Assembler::AVX_256bit); //ymm9 = W[8] W[11] W[10] W[9]
|
||||
vpblendd(xmm8, xmm8, xmm9, 0x3f, Assembler::AVX_256bit); //ymm8 = W[12] W[11] W[10] W[9]
|
||||
vpaddq(xmm3, xmm3, xmm8, Assembler::AVX_256bit);
|
||||
sha512msg2(xmm3, xmm6);//W[16..19] = xmm3 + W[9..12] + S1(W[14..17])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm5, xmm6); //ymm5 = W[8..11] + S0(W[9..12])
|
||||
|
||||
//R16 - R19, R32 - R35, R48 - R51
|
||||
for (int i = 4, j = 3; j > 0; j--) {
|
||||
vpaddq(xmm0, xmm3, Address(rax, i * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm3, 0x1b, Assembler::AVX_256bit);//ymm8 = W[16] W[17] W[18] W[19]
|
||||
vpermq(xmm9, xmm6, 0x39, Assembler::AVX_256bit);//ymm9 = W[12] W[15] W[14] W[13]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//xmm7 = W[16] W[15] W[14] W[13]
|
||||
vpaddq(xmm4, xmm4, xmm7, Assembler::AVX_256bit);//ymm4 = W[4..7] + S0(W[5..8]) + W[13..16]
|
||||
sha512msg2(xmm4, xmm3);//ymm4 += S1(W[14..17])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm6, xmm3); //ymm6 = W[12..15] + S0(W[13..16])
|
||||
i += 1;
|
||||
//R20 - R23, R36 - R39, R52 - R55
|
||||
vpaddq(xmm0, xmm4, Address(rax, i * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm4, 0x1b, Assembler::AVX_256bit);//ymm8 = W[20] W[21] W[22] W[23]
|
||||
vpermq(xmm9, xmm3, 0x39, Assembler::AVX_256bit);//ymm9 = W[16] W[19] W[18] W[17]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[20] W[19] W[18] W[17]
|
||||
vpaddq(xmm5, xmm5, xmm7, Assembler::AVX_256bit);//ymm5 = W[8..11] + S0(W[9..12]) + W[17..20]
|
||||
sha512msg2(xmm5, xmm4);//ymm5 += S1(W[18..21])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm3, xmm4); //ymm3 = W[16..19] + S0(W[17..20])
|
||||
i += 1;
|
||||
//R24 - R27, R40 - R43, R56 - R59
|
||||
vpaddq(xmm0, xmm5, Address(rax, i * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm5, 0x1b, Assembler::AVX_256bit);//ymm8 = W[24] W[25] W[26] W[27]
|
||||
vpermq(xmm9, xmm4, 0x39, Assembler::AVX_256bit);//ymm9 = W[20] W[23] W[22] W[21]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[24] W[23] W[22] W[21]
|
||||
vpaddq(xmm6, xmm6, xmm7, Assembler::AVX_256bit);//ymm6 = W[12..15] + S0(W[13..16]) + W[21..24]
|
||||
sha512msg2(xmm6, xmm5);//ymm6 += S1(W[22..25])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm4, xmm5);//ymm4 = W[20..23] + S0(W[21..24])
|
||||
i += 1;
|
||||
//R28 - R31, R44 - R47, R60 - R63
|
||||
vpaddq(xmm0, xmm6, Address(rax, i * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm6, 0x1b, Assembler::AVX_256bit);//ymm8 = W[28] W[29] W[30] W[31]
|
||||
vpermq(xmm9, xmm5, 0x39, Assembler::AVX_256bit);//ymm9 = W[24] W[27] W[26] W[25]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[28] W[27] W[26] W[25]
|
||||
vpaddq(xmm3, xmm3, xmm7, Assembler::AVX_256bit);//ymm3 = W[16..19] + S0(W[17..20]) + W[25..28]
|
||||
sha512msg2(xmm3, xmm6); //ymm3 += S1(W[26..29])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm5, xmm6);//ymm5 = W[24..27] + S0(W[25..28])
|
||||
i += 1;
|
||||
}
|
||||
//R64 - R67
|
||||
vpaddq(xmm0, xmm3, Address(rax, 16 * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm3, 0x1b, Assembler::AVX_256bit);//ymm8 = W[64] W[65] W[66] W[67]
|
||||
vpermq(xmm9, xmm6, 0x39, Assembler::AVX_256bit);//ymm9 = W[60] W[63] W[62] W[61]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[64] W[63] W[62] W[61]
|
||||
vpaddq(xmm4, xmm4, xmm7, Assembler::AVX_256bit);//ymm4 = W[52..55] + S0(W[53..56]) + W[61..64]
|
||||
sha512msg2(xmm4, xmm3);//ymm4 += S1(W[62..65])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
sha512msg1(xmm6, xmm3);//ymm6 = W[60..63] + S0(W[61..64])
|
||||
|
||||
//R68 - R71
|
||||
vpaddq(xmm0, xmm4, Address(rax, 17 * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm4, 0x1b, Assembler::AVX_256bit);//ymm8 = W[68] W[69] W[70] W[71]
|
||||
vpermq(xmm9, xmm3, 0x39, Assembler::AVX_256bit);//ymm9 = W[64] W[67] W[66] W[65]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[68] W[67] W[66] W[65]
|
||||
vpaddq(xmm5, xmm5, xmm7, Assembler::AVX_256bit);//ymm5 = W[56..59] + S0(W[57..60]) + W[65..68]
|
||||
sha512msg2(xmm5, xmm4);//ymm5 += S1(W[66..69])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
|
||||
//R72 - R75
|
||||
vpaddq(xmm0, xmm5, Address(rax, 18 * 32), Assembler::AVX_256bit);
|
||||
vpermq(xmm8, xmm5, 0x1b, Assembler::AVX_256bit);//ymm8 = W[72] W[73] W[74] W[75]
|
||||
vpermq(xmm9, xmm4, 0x39, Assembler::AVX_256bit);//ymm9 = W[68] W[71] W[70] W[69]
|
||||
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[72] W[71] W[70] W[69]
|
||||
vpaddq(xmm6, xmm6, xmm7, Assembler::AVX_256bit);//ymm6 = W[60..63] + S0(W[61..64]) + W[69..72]
|
||||
sha512msg2(xmm6, xmm5);//ymm6 += S1(W[70..73])
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
|
||||
//R76 - R79
|
||||
vpaddq(xmm0, xmm6, Address(rax, 19 * 32), Assembler::AVX_256bit);
|
||||
sha512rnds2(xmm12, xmm11, xmm0);
|
||||
vperm2i128(xmm0, xmm0, xmm0, 0x01);
|
||||
sha512rnds2(xmm11, xmm12, xmm0);
|
||||
|
||||
//update hash value
|
||||
vpaddq(xmm14, xmm14, xmm12, Assembler::AVX_256bit);
|
||||
vpaddq(xmm13, xmm13, xmm11, Assembler::AVX_256bit);
|
||||
|
||||
if (multi_block) {
|
||||
addptr(arg_msg, 4 * 32);
|
||||
addptr(ofs, 128);
|
||||
cmpptr(ofs, limit);
|
||||
jcc(Assembler::belowEqual, block_loop);
|
||||
movptr(rax, ofs); //return ofs
|
||||
}
|
||||
|
||||
//store the hash value back in memory
|
||||
//xmm13 = ABEF
|
||||
//xmm14 = CDGH
|
||||
vperm2i128(xmm1, xmm13, xmm14, 0x31);
|
||||
vperm2i128(xmm2, xmm13, xmm14, 0x20);
|
||||
vpermq(xmm1, xmm1, 0xb1, Assembler::AVX_256bit);//ymm1 = D C B A
|
||||
vpermq(xmm2, xmm2, 0xb1, Assembler::AVX_256bit);//ymm2 = H G F E
|
||||
vmovdqu(Address(arg_hash, 0 * 32), xmm1);
|
||||
vmovdqu(Address(arg_hash, 1 * 32), xmm2);
|
||||
|
||||
bind(done_hash);
|
||||
}
|
||||
|
||||
#endif //#ifdef _LP64
|
||||
|
||||
|
||||
@ -1558,7 +1558,7 @@ address StubGenerator::generate_sha256_implCompress(bool multi_block, const char
|
||||
|
||||
address StubGenerator::generate_sha512_implCompress(bool multi_block, const char *name) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(VM_Version::supports_bmi2(), "");
|
||||
assert(VM_Version::supports_bmi2() || VM_Version::supports_sha512(), "");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
@ -1568,22 +1568,24 @@ address StubGenerator::generate_sha512_implCompress(bool multi_block, const char
|
||||
Register ofs = c_rarg2;
|
||||
Register limit = c_rarg3;
|
||||
|
||||
const XMMRegister msg = xmm0;
|
||||
const XMMRegister state0 = xmm1;
|
||||
const XMMRegister state1 = xmm2;
|
||||
const XMMRegister msgtmp0 = xmm3;
|
||||
const XMMRegister msgtmp1 = xmm4;
|
||||
const XMMRegister msgtmp2 = xmm5;
|
||||
const XMMRegister msgtmp3 = xmm6;
|
||||
const XMMRegister msgtmp4 = xmm7;
|
||||
|
||||
const XMMRegister shuf_mask = xmm8;
|
||||
|
||||
__ enter();
|
||||
|
||||
__ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
|
||||
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
|
||||
if (VM_Version::supports_sha512()) {
|
||||
__ sha512_update_ni_x1(state, buf, ofs, limit, multi_block);
|
||||
} else {
|
||||
const XMMRegister msg = xmm0;
|
||||
const XMMRegister state0 = xmm1;
|
||||
const XMMRegister state1 = xmm2;
|
||||
const XMMRegister msgtmp0 = xmm3;
|
||||
const XMMRegister msgtmp1 = xmm4;
|
||||
const XMMRegister msgtmp2 = xmm5;
|
||||
const XMMRegister msgtmp3 = xmm6;
|
||||
const XMMRegister msgtmp4 = xmm7;
|
||||
|
||||
const XMMRegister shuf_mask = xmm8;
|
||||
__ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
|
||||
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
|
||||
}
|
||||
__ vzeroupper();
|
||||
__ leave();
|
||||
__ ret(0);
|
||||
|
||||
@ -1060,6 +1060,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX;
|
||||
_features &= ~CPU_VZEROUPPER;
|
||||
_features &= ~CPU_F16C;
|
||||
_features &= ~CPU_SHA512;
|
||||
}
|
||||
|
||||
if (logical_processors_per_package() == 1) {
|
||||
@ -1304,7 +1305,7 @@ void VM_Version::get_processor_features() {
|
||||
|
||||
#ifdef _LP64
|
||||
// These are only supported on 64-bit
|
||||
if (UseSHA && supports_avx2() && supports_bmi2()) {
|
||||
if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
|
||||
}
|
||||
@ -3007,6 +3008,8 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
|
||||
xem_xcr0_eax.bits.ymm != 0) {
|
||||
result |= CPU_AVX;
|
||||
result |= CPU_VZEROUPPER;
|
||||
if (sefsl1_cpuid7_eax.bits.sha512 != 0)
|
||||
result |= CPU_SHA512;
|
||||
if (std_cpuid1_ecx.bits.f16c != 0)
|
||||
result |= CPU_F16C;
|
||||
if (sef_cpuid7_ebx.bits.avx2 != 0) {
|
||||
|
||||
@ -283,7 +283,8 @@ class VM_Version : public Abstract_VM_Version {
|
||||
union SefCpuid7SubLeaf1Eax {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t : 23,
|
||||
uint32_t sha512 : 1,
|
||||
: 22,
|
||||
avx_ifma : 1,
|
||||
: 8;
|
||||
} bits;
|
||||
@ -415,7 +416,8 @@ protected:
|
||||
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
|
||||
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
|
||||
decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
|
||||
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/
|
||||
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/\
|
||||
decl(SHA512, "sha512", 61) /* SHA512 instructions*/
|
||||
|
||||
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
|
||||
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
|
||||
@ -757,6 +759,7 @@ public:
|
||||
static bool supports_ospke() { return (_features & CPU_OSPKE) != 0; }
|
||||
static bool supports_cet_ss() { return (_features & CPU_CET_SS) != 0; }
|
||||
static bool supports_cet_ibt() { return (_features & CPU_CET_IBT) != 0; }
|
||||
static bool supports_sha512() { return (_features & CPU_SHA512) != 0; }
|
||||
|
||||
//
|
||||
// Feature identification not affected by VM flags
|
||||
|
||||
@ -256,6 +256,7 @@ public class AMD64 extends Architecture {
|
||||
AVX512_IFMA,
|
||||
AVX_IFMA,
|
||||
APX_F,
|
||||
SHA512,
|
||||
}
|
||||
|
||||
private final EnumSet<CPUFeature> features;
|
||||
|
||||
@ -103,7 +103,7 @@ public class ClhsdbLongConstant {
|
||||
String arch = System.getProperty("os.arch");
|
||||
if (arch.equals("amd64") || arch.equals("i386") || arch.equals("x86")) {
|
||||
// Expected value obtained from the CPU_SHA definition in vm_version_x86.hpp
|
||||
checkLongValue("VM_Version::CPU_SHA",
|
||||
checkLongValue("VM_Version::CPU_SHA ",
|
||||
longConstantOutput,
|
||||
17179869184L);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user