From 18bcbf7941f7567449983b3f317401efb3e34d39 Mon Sep 17 00:00:00 2001 From: Smita Kamath Date: Mon, 21 Oct 2024 15:37:36 +0000 Subject: [PATCH] 8341052: SHA-512 implementation using SHA-NI Reviewed-by: jbhateja, ascarpino, sviswanathan, sparasa --- src/hotspot/cpu/x86/assembler_x86.cpp | 34 ++++ src/hotspot/cpu/x86/assembler_x86.hpp | 4 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 11 ++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 4 + .../cpu/x86/macroAssembler_x86_sha.cpp | 179 ++++++++++++++++++ src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 30 +-- src/hotspot/cpu/x86/vm_version_x86.cpp | 5 +- src/hotspot/cpu/x86/vm_version_x86.hpp | 7 +- .../share/classes/jdk/vm/ci/amd64/AMD64.java | 1 + .../serviceability/sa/ClhsdbLongConstant.java | 2 +- 10 files changed, 259 insertions(+), 18 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 8b61fd27de0..678f7426321 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -6751,6 +6751,27 @@ void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) { emit_int16((unsigned char)0xCD, (0xC0 | encode)); } +void Assembler::sha512msg1(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xCC, (0xC0 | encode)); +} + +void Assembler::sha512msg2(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xCD, (0xC0 | encode)); +} + +void Assembler::sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xCB, (0xC0 | encode)); +} + void Assembler::shll(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); @@ -11670,6 +11691,19 @@ void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) { emit_operand(dst, src, 0); } +void Assembler::vbroadcasti128(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + assert(vector_len == AVX_256bit, ""); + assert(dst != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x5A); + emit_operand(dst, src, 0); +} + // scalar single/double precision replicate // duplicate single precision data from src into programmed locations in dest : requires AVX512VL diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 696fff5e3eb..94c0535b025 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -2345,6 +2345,9 @@ private: void sha256rnds2(XMMRegister dst, XMMRegister src); void sha256msg1(XMMRegister dst, XMMRegister src); void sha256msg2(XMMRegister dst, XMMRegister src); + void sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src); + void sha512msg1(XMMRegister dst, XMMRegister src); + void sha512msg2(XMMRegister dst, XMMRegister src); void shldl(Register dst, Register src); void eshldl(Register dst, Register src1, Register src2, bool no_flags); @@ -3035,6 +3038,7 @@ private: void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len); void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len); void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len); + void vbroadcasti128(XMMRegister dst, Address src, int vector_len); // scalar single/double/128bit precision replicate void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 018258a012e..1a69b4c1ad7 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -3482,6 +3482,17 @@ void MacroAssembler::vpbroadcastd(XMMRegister dst, AddressLiteral src, int vecto } } +void MacroAssembler::vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + + if (reachable(src)) { + Assembler::vbroadcasti128(dst, as_Address(src), vector_len); + } else { + lea(rscratch, src); + Assembler::vbroadcasti128(dst, Address(rscratch, 0), vector_len); + } +} + void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 2ce4fc40e90..e6de99eb207 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1118,6 +1118,7 @@ public: XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, XMMRegister shuf_mask); + void sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block); #endif // _LP64 void fast_md5(Register buf, Address state, Address ofs, Address limit, @@ -1216,6 +1217,9 @@ public: void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + using Assembler::vbroadcasti128; + void vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); + using Assembler::vbroadcastsd; void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp index 090de71425f..e7d728c2e96 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp @@ -1519,5 +1519,184 @@ void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste } } +//Implemented using Intel IpSec implementation (intel-ipsec-mb on github) +void MacroAssembler::sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block) { + Label done_hash, block_loop; + address K512_W = StubRoutines::x86::k512_W_addr(); + + vbroadcasti128(xmm15, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512()), Assembler::AVX_256bit, r10); + + //load current hash value and transform + vmovdqu(xmm0, Address(arg_hash)); + vmovdqu(xmm1, Address(arg_hash, 32)); + //ymm0 = D C B A, ymm1 = H G F E + vperm2i128(xmm2, xmm0, xmm1, 0x20); + vperm2i128(xmm3, xmm0, xmm1, 0x31); + //ymm2 = F E B A, ymm3 = H G D C + vpermq(xmm13, xmm2, 0x1b, Assembler::AVX_256bit); + vpermq(xmm14, xmm3, 0x1b, Assembler::AVX_256bit); + //ymm13 = A B E F, ymm14 = C D G H + + lea(rax, ExternalAddress(K512_W)); + align(32); + bind(block_loop); + vmovdqu(xmm11, xmm13);//ABEF + vmovdqu(xmm12, xmm14);//CDGH + + //R0 - R3 + vmovdqu(xmm0, Address(arg_msg, 0 * 32)); + vpshufb(xmm3, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm3 = W[0..3] + vpaddq(xmm0, xmm3, Address(rax, 0 * 32), Assembler::AVX_256bit); + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + + //R4 - R7 + vmovdqu(xmm0, Address(arg_msg, 1 * 32)); + vpshufb(xmm4, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm4 = W[4..7] + vpaddq(xmm0, xmm4, Address(rax, 1 * 32), Assembler::AVX_256bit); + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm3, xmm4); //ymm3 = W[0..3] + S0(W[1..4]) + + //R8 - R11 + vmovdqu(xmm0, Address(arg_msg, 2 * 32)); + vpshufb(xmm5, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm5 = W[8..11] + vpaddq(xmm0, xmm5, Address(rax, 2 * 32), Assembler::AVX_256bit); + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm4, xmm5);//ymm4 = W[4..7] + S0(W[5..8]) + + //R12 - R15 + vmovdqu(xmm0, Address(arg_msg, 3 * 32)); + vpshufb(xmm6, xmm0, xmm15, Assembler::AVX_256bit); //ymm0 / ymm6 = W[12..15] + vpaddq(xmm0, xmm6, Address(rax, 3 * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm6, 0x1b, Assembler::AVX_256bit); //ymm8 = W[12] W[13] W[14] W[15] + vpermq(xmm9, xmm5, 0x39, Assembler::AVX_256bit); //ymm9 = W[8] W[11] W[10] W[9] + vpblendd(xmm8, xmm8, xmm9, 0x3f, Assembler::AVX_256bit); //ymm8 = W[12] W[11] W[10] W[9] + vpaddq(xmm3, xmm3, xmm8, Assembler::AVX_256bit); + sha512msg2(xmm3, xmm6);//W[16..19] = xmm3 + W[9..12] + S1(W[14..17]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm5, xmm6); //ymm5 = W[8..11] + S0(W[9..12]) + + //R16 - R19, R32 - R35, R48 - R51 + for (int i = 4, j = 3; j > 0; j--) { + vpaddq(xmm0, xmm3, Address(rax, i * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm3, 0x1b, Assembler::AVX_256bit);//ymm8 = W[16] W[17] W[18] W[19] + vpermq(xmm9, xmm6, 0x39, Assembler::AVX_256bit);//ymm9 = W[12] W[15] W[14] W[13] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//xmm7 = W[16] W[15] W[14] W[13] + vpaddq(xmm4, xmm4, xmm7, Assembler::AVX_256bit);//ymm4 = W[4..7] + S0(W[5..8]) + W[13..16] + sha512msg2(xmm4, xmm3);//ymm4 += S1(W[14..17]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm6, xmm3); //ymm6 = W[12..15] + S0(W[13..16]) + i += 1; + //R20 - R23, R36 - R39, R52 - R55 + vpaddq(xmm0, xmm4, Address(rax, i * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm4, 0x1b, Assembler::AVX_256bit);//ymm8 = W[20] W[21] W[22] W[23] + vpermq(xmm9, xmm3, 0x39, Assembler::AVX_256bit);//ymm9 = W[16] W[19] W[18] W[17] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[20] W[19] W[18] W[17] + vpaddq(xmm5, xmm5, xmm7, Assembler::AVX_256bit);//ymm5 = W[8..11] + S0(W[9..12]) + W[17..20] + sha512msg2(xmm5, xmm4);//ymm5 += S1(W[18..21]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm3, xmm4); //ymm3 = W[16..19] + S0(W[17..20]) + i += 1; + //R24 - R27, R40 - R43, R56 - R59 + vpaddq(xmm0, xmm5, Address(rax, i * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm5, 0x1b, Assembler::AVX_256bit);//ymm8 = W[24] W[25] W[26] W[27] + vpermq(xmm9, xmm4, 0x39, Assembler::AVX_256bit);//ymm9 = W[20] W[23] W[22] W[21] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[24] W[23] W[22] W[21] + vpaddq(xmm6, xmm6, xmm7, Assembler::AVX_256bit);//ymm6 = W[12..15] + S0(W[13..16]) + W[21..24] + sha512msg2(xmm6, xmm5);//ymm6 += S1(W[22..25]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm4, xmm5);//ymm4 = W[20..23] + S0(W[21..24]) + i += 1; + //R28 - R31, R44 - R47, R60 - R63 + vpaddq(xmm0, xmm6, Address(rax, i * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm6, 0x1b, Assembler::AVX_256bit);//ymm8 = W[28] W[29] W[30] W[31] + vpermq(xmm9, xmm5, 0x39, Assembler::AVX_256bit);//ymm9 = W[24] W[27] W[26] W[25] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[28] W[27] W[26] W[25] + vpaddq(xmm3, xmm3, xmm7, Assembler::AVX_256bit);//ymm3 = W[16..19] + S0(W[17..20]) + W[25..28] + sha512msg2(xmm3, xmm6); //ymm3 += S1(W[26..29]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm5, xmm6);//ymm5 = W[24..27] + S0(W[25..28]) + i += 1; + } + //R64 - R67 + vpaddq(xmm0, xmm3, Address(rax, 16 * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm3, 0x1b, Assembler::AVX_256bit);//ymm8 = W[64] W[65] W[66] W[67] + vpermq(xmm9, xmm6, 0x39, Assembler::AVX_256bit);//ymm9 = W[60] W[63] W[62] W[61] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[64] W[63] W[62] W[61] + vpaddq(xmm4, xmm4, xmm7, Assembler::AVX_256bit);//ymm4 = W[52..55] + S0(W[53..56]) + W[61..64] + sha512msg2(xmm4, xmm3);//ymm4 += S1(W[62..65]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + sha512msg1(xmm6, xmm3);//ymm6 = W[60..63] + S0(W[61..64]) + + //R68 - R71 + vpaddq(xmm0, xmm4, Address(rax, 17 * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm4, 0x1b, Assembler::AVX_256bit);//ymm8 = W[68] W[69] W[70] W[71] + vpermq(xmm9, xmm3, 0x39, Assembler::AVX_256bit);//ymm9 = W[64] W[67] W[66] W[65] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[68] W[67] W[66] W[65] + vpaddq(xmm5, xmm5, xmm7, Assembler::AVX_256bit);//ymm5 = W[56..59] + S0(W[57..60]) + W[65..68] + sha512msg2(xmm5, xmm4);//ymm5 += S1(W[66..69]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + + //R72 - R75 + vpaddq(xmm0, xmm5, Address(rax, 18 * 32), Assembler::AVX_256bit); + vpermq(xmm8, xmm5, 0x1b, Assembler::AVX_256bit);//ymm8 = W[72] W[73] W[74] W[75] + vpermq(xmm9, xmm4, 0x39, Assembler::AVX_256bit);//ymm9 = W[68] W[71] W[70] W[69] + vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[72] W[71] W[70] W[69] + vpaddq(xmm6, xmm6, xmm7, Assembler::AVX_256bit);//ymm6 = W[60..63] + S0(W[61..64]) + W[69..72] + sha512msg2(xmm6, xmm5);//ymm6 += S1(W[70..73]) + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + + //R76 - R79 + vpaddq(xmm0, xmm6, Address(rax, 19 * 32), Assembler::AVX_256bit); + sha512rnds2(xmm12, xmm11, xmm0); + vperm2i128(xmm0, xmm0, xmm0, 0x01); + sha512rnds2(xmm11, xmm12, xmm0); + + //update hash value + vpaddq(xmm14, xmm14, xmm12, Assembler::AVX_256bit); + vpaddq(xmm13, xmm13, xmm11, Assembler::AVX_256bit); + + if (multi_block) { + addptr(arg_msg, 4 * 32); + addptr(ofs, 128); + cmpptr(ofs, limit); + jcc(Assembler::belowEqual, block_loop); + movptr(rax, ofs); //return ofs + } + + //store the hash value back in memory + //xmm13 = ABEF + //xmm14 = CDGH + vperm2i128(xmm1, xmm13, xmm14, 0x31); + vperm2i128(xmm2, xmm13, xmm14, 0x20); + vpermq(xmm1, xmm1, 0xb1, Assembler::AVX_256bit);//ymm1 = D C B A + vpermq(xmm2, xmm2, 0xb1, Assembler::AVX_256bit);//ymm2 = H G F E + vmovdqu(Address(arg_hash, 0 * 32), xmm1); + vmovdqu(Address(arg_hash, 1 * 32), xmm2); + + bind(done_hash); +} + #endif //#ifdef _LP64 diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index ee6311c25f6..e23c83ed197 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -1558,7 +1558,7 @@ address StubGenerator::generate_sha256_implCompress(bool multi_block, const char address StubGenerator::generate_sha512_implCompress(bool multi_block, const char *name) { assert(VM_Version::supports_avx2(), ""); - assert(VM_Version::supports_bmi2(), ""); + assert(VM_Version::supports_bmi2() || VM_Version::supports_sha512(), ""); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); @@ -1568,22 +1568,24 @@ address StubGenerator::generate_sha512_implCompress(bool multi_block, const char Register ofs = c_rarg2; Register limit = c_rarg3; - const XMMRegister msg = xmm0; - const XMMRegister state0 = xmm1; - const XMMRegister state1 = xmm2; - const XMMRegister msgtmp0 = xmm3; - const XMMRegister msgtmp1 = xmm4; - const XMMRegister msgtmp2 = xmm5; - const XMMRegister msgtmp3 = xmm6; - const XMMRegister msgtmp4 = xmm7; - - const XMMRegister shuf_mask = xmm8; - __ enter(); - __ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, - buf, state, ofs, limit, rsp, multi_block, shuf_mask); + if (VM_Version::supports_sha512()) { + __ sha512_update_ni_x1(state, buf, ofs, limit, multi_block); + } else { + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + const XMMRegister shuf_mask = xmm8; + __ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + } __ vzeroupper(); __ leave(); __ ret(0); diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 038797924a9..63347c51d60 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1060,6 +1060,7 @@ void VM_Version::get_processor_features() { _features &= ~CPU_AVX; _features &= ~CPU_VZEROUPPER; _features &= ~CPU_F16C; + _features &= ~CPU_SHA512; } if (logical_processors_per_package() == 1) { @@ -1304,7 +1305,7 @@ void VM_Version::get_processor_features() { #ifdef _LP64 // These are only supported on 64-bit - if (UseSHA && supports_avx2() && supports_bmi2()) { + if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); } @@ -3007,6 +3008,8 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const { xem_xcr0_eax.bits.ymm != 0) { result |= CPU_AVX; result |= CPU_VZEROUPPER; + if (sefsl1_cpuid7_eax.bits.sha512 != 0) + result |= CPU_SHA512; if (std_cpuid1_ecx.bits.f16c != 0) result |= CPU_F16C; if (sef_cpuid7_ebx.bits.avx2 != 0) { diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index d58b5a9c099..beac8a0f2d7 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -283,7 +283,8 @@ class VM_Version : public Abstract_VM_Version { union SefCpuid7SubLeaf1Eax { uint32_t value; struct { - uint32_t : 23, + uint32_t sha512 : 1, + : 22, avx_ifma : 1, : 8; } bits; @@ -415,7 +416,8 @@ protected: decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \ decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \ decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \ - decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/ + decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/\ + decl(SHA512, "sha512", 61) /* SHA512 instructions*/ #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit), CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) @@ -757,6 +759,7 @@ public: static bool supports_ospke() { return (_features & CPU_OSPKE) != 0; } static bool supports_cet_ss() { return (_features & CPU_CET_SS) != 0; } static bool supports_cet_ibt() { return (_features & CPU_CET_IBT) != 0; } + static bool supports_sha512() { return (_features & CPU_SHA512) != 0; } // // Feature identification not affected by VM flags diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java index f0ca7a2fc23..eda1eada737 100644 --- a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java +++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java @@ -256,6 +256,7 @@ public class AMD64 extends Architecture { AVX512_IFMA, AVX_IFMA, APX_F, + SHA512, } private final EnumSet features; diff --git a/test/hotspot/jtreg/serviceability/sa/ClhsdbLongConstant.java b/test/hotspot/jtreg/serviceability/sa/ClhsdbLongConstant.java index b991610badb..bcf93498574 100644 --- a/test/hotspot/jtreg/serviceability/sa/ClhsdbLongConstant.java +++ b/test/hotspot/jtreg/serviceability/sa/ClhsdbLongConstant.java @@ -103,7 +103,7 @@ public class ClhsdbLongConstant { String arch = System.getProperty("os.arch"); if (arch.equals("amd64") || arch.equals("i386") || arch.equals("x86")) { // Expected value obtained from the CPU_SHA definition in vm_version_x86.hpp - checkLongValue("VM_Version::CPU_SHA", + checkLongValue("VM_Version::CPU_SHA ", longConstantOutput, 17179869184L); }