mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-27 18:50:07 +00:00
8165404: AArch64: Implement SHA512 accelerator/intrinsic
Reviewed-by: aph
This commit is contained in:
parent
555f0e6e2a
commit
c148c2c176
@ -865,6 +865,37 @@ class LdStSIMDOp(Instruction):
|
||||
def aname(self):
|
||||
return self._name
|
||||
|
||||
class SHA512SIMDOp(Instruction):
|
||||
|
||||
def generate(self):
|
||||
if (self._name == 'sha512su0'):
|
||||
self.reg = [FloatRegister().generate(), FloatRegister().generate()]
|
||||
else:
|
||||
self.reg = [FloatRegister().generate(), FloatRegister().generate(),
|
||||
FloatRegister().generate()]
|
||||
return self
|
||||
|
||||
def cstr(self):
|
||||
if (self._name == 'sha512su0'):
|
||||
return (super(SHA512SIMDOp, self).cstr()
|
||||
+ ('%s, __ T2D, %s);' % (self.reg[0], self.reg[1])))
|
||||
else:
|
||||
return (super(SHA512SIMDOp, self).cstr()
|
||||
+ ('%s, __ T2D, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2])))
|
||||
|
||||
def astr(self):
|
||||
if (self._name == 'sha512su0'):
|
||||
return (super(SHA512SIMDOp, self).astr()
|
||||
+ ('\t%s.2D, %s.2D' % (self.reg[0].astr("v"), self.reg[1].astr("v"))))
|
||||
elif (self._name == 'sha512su1'):
|
||||
return (super(SHA512SIMDOp, self).astr()
|
||||
+ ('\t%s.2D, %s.2D, %s.2D' % (self.reg[0].astr("v"),
|
||||
self.reg[1].astr("v"), self.reg[2].astr("v"))))
|
||||
else:
|
||||
return (super(SHA512SIMDOp, self).astr()
|
||||
+ ('\t%s, %s, %s.2D' % (self.reg[0].astr("q"),
|
||||
self.reg[1].astr("q"), self.reg[2].astr("v"))))
|
||||
|
||||
class LSEOp(Instruction):
|
||||
def __init__(self, args):
|
||||
self._name, self.asmname, self.size, self.suffix = args
|
||||
@ -1100,6 +1131,8 @@ generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
|
||||
["ld4r", 4, "2S", Address.post_reg],
|
||||
])
|
||||
|
||||
generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
|
||||
|
||||
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
|
||||
["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"],
|
||||
["ccmp", "__ ccmp(zr, 1, 4u, Assembler::NE);", "ccmp\txzr, 1, #4, NE"],
|
||||
@ -1147,8 +1180,8 @@ outfile.close()
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# compile for 8.1 because of lse atomics
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
# compile for 8.1 and sha2 because of lse atomics and sha512 crypto extension.
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
|
||||
print
|
||||
print "/*",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -2372,6 +2372,30 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||
starti; \
|
||||
assert(T == T2D, "arrangement must be T2D"); \
|
||||
f(0b11001110011, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
}
|
||||
|
||||
INSN(sha512h, 0b100000);
|
||||
INSN(sha512h2, 0b100001);
|
||||
INSN(sha512su1, 0b100010);
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
|
||||
starti; \
|
||||
assert(T == T2D, "arrangement must be T2D"); \
|
||||
f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
}
|
||||
|
||||
INSN(sha512su0, 0b1100111011000000100000);
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Vd, FloatRegister Vn) { \
|
||||
starti; \
|
||||
|
||||
@ -3125,6 +3125,172 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
// Arguments:
|
||||
//
|
||||
// Inputs:
|
||||
// c_rarg0 - byte[] source+offset
|
||||
// c_rarg1 - int[] SHA.state
|
||||
// c_rarg2 - int offset
|
||||
// c_rarg3 - int limit
|
||||
//
|
||||
address generate_sha512_implCompress(bool multi_block, const char *name) {
|
||||
static const uint64_t round_consts[80] = {
|
||||
0x428A2F98D728AE22L, 0x7137449123EF65CDL, 0xB5C0FBCFEC4D3B2FL,
|
||||
0xE9B5DBA58189DBBCL, 0x3956C25BF348B538L, 0x59F111F1B605D019L,
|
||||
0x923F82A4AF194F9BL, 0xAB1C5ED5DA6D8118L, 0xD807AA98A3030242L,
|
||||
0x12835B0145706FBEL, 0x243185BE4EE4B28CL, 0x550C7DC3D5FFB4E2L,
|
||||
0x72BE5D74F27B896FL, 0x80DEB1FE3B1696B1L, 0x9BDC06A725C71235L,
|
||||
0xC19BF174CF692694L, 0xE49B69C19EF14AD2L, 0xEFBE4786384F25E3L,
|
||||
0x0FC19DC68B8CD5B5L, 0x240CA1CC77AC9C65L, 0x2DE92C6F592B0275L,
|
||||
0x4A7484AA6EA6E483L, 0x5CB0A9DCBD41FBD4L, 0x76F988DA831153B5L,
|
||||
0x983E5152EE66DFABL, 0xA831C66D2DB43210L, 0xB00327C898FB213FL,
|
||||
0xBF597FC7BEEF0EE4L, 0xC6E00BF33DA88FC2L, 0xD5A79147930AA725L,
|
||||
0x06CA6351E003826FL, 0x142929670A0E6E70L, 0x27B70A8546D22FFCL,
|
||||
0x2E1B21385C26C926L, 0x4D2C6DFC5AC42AEDL, 0x53380D139D95B3DFL,
|
||||
0x650A73548BAF63DEL, 0x766A0ABB3C77B2A8L, 0x81C2C92E47EDAEE6L,
|
||||
0x92722C851482353BL, 0xA2BFE8A14CF10364L, 0xA81A664BBC423001L,
|
||||
0xC24B8B70D0F89791L, 0xC76C51A30654BE30L, 0xD192E819D6EF5218L,
|
||||
0xD69906245565A910L, 0xF40E35855771202AL, 0x106AA07032BBD1B8L,
|
||||
0x19A4C116B8D2D0C8L, 0x1E376C085141AB53L, 0x2748774CDF8EEB99L,
|
||||
0x34B0BCB5E19B48A8L, 0x391C0CB3C5C95A63L, 0x4ED8AA4AE3418ACBL,
|
||||
0x5B9CCA4F7763E373L, 0x682E6FF3D6B2B8A3L, 0x748F82EE5DEFB2FCL,
|
||||
0x78A5636F43172F60L, 0x84C87814A1F0AB72L, 0x8CC702081A6439ECL,
|
||||
0x90BEFFFA23631E28L, 0xA4506CEBDE82BDE9L, 0xBEF9A3F7B2C67915L,
|
||||
0xC67178F2E372532BL, 0xCA273ECEEA26619CL, 0xD186B8C721C0C207L,
|
||||
0xEADA7DD6CDE0EB1EL, 0xF57D4F7FEE6ED178L, 0x06F067AA72176FBAL,
|
||||
0x0A637DC5A2C898A6L, 0x113F9804BEF90DAEL, 0x1B710B35131C471BL,
|
||||
0x28DB77F523047D84L, 0x32CAAB7B40C72493L, 0x3C9EBE0A15C9BEBCL,
|
||||
0x431D67C49C100D4CL, 0x4CC5D4BECB3E42B6L, 0x597F299CFC657E2AL,
|
||||
0x5FCB6FAB3AD6FAECL, 0x6C44198C4A475817L
|
||||
};
|
||||
|
||||
// Double rounds for sha512.
|
||||
#define sha512_dround(dr, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4) \
|
||||
if (dr < 36) \
|
||||
__ ld1(v##rc1, __ T2D, __ post(rscratch2, 16)); \
|
||||
__ addv(v5, __ T2D, v##rc0, v##in0); \
|
||||
__ ext(v6, __ T16B, v##i2, v##i3, 8); \
|
||||
__ ext(v5, __ T16B, v5, v5, 8); \
|
||||
__ ext(v7, __ T16B, v##i1, v##i2, 8); \
|
||||
__ addv(v##i3, __ T2D, v##i3, v5); \
|
||||
if (dr < 32) { \
|
||||
__ ext(v5, __ T16B, v##in3, v##in4, 8); \
|
||||
__ sha512su0(v##in0, __ T2D, v##in1); \
|
||||
} \
|
||||
__ sha512h(v##i3, __ T2D, v6, v7); \
|
||||
if (dr < 32) \
|
||||
__ sha512su1(v##in0, __ T2D, v##in2, v5); \
|
||||
__ addv(v##i4, __ T2D, v##i1, v##i3); \
|
||||
__ sha512h2(v##i3, __ T2D, v##i1, v##i0); \
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Register buf = c_rarg0;
|
||||
Register state = c_rarg1;
|
||||
Register ofs = c_rarg2;
|
||||
Register limit = c_rarg3;
|
||||
|
||||
__ stpd(v8, v9, __ pre(sp, -64));
|
||||
__ stpd(v10, v11, Address(sp, 16));
|
||||
__ stpd(v12, v13, Address(sp, 32));
|
||||
__ stpd(v14, v15, Address(sp, 48));
|
||||
|
||||
Label sha512_loop;
|
||||
|
||||
// load state
|
||||
__ ld1(v8, v9, v10, v11, __ T2D, state);
|
||||
|
||||
// load first 4 round constants
|
||||
__ lea(rscratch1, ExternalAddress((address)round_consts));
|
||||
__ ld1(v20, v21, v22, v23, __ T2D, __ post(rscratch1, 64));
|
||||
|
||||
__ BIND(sha512_loop);
|
||||
// load 128B of data into v12..v19
|
||||
__ ld1(v12, v13, v14, v15, __ T2D, __ post(buf, 64));
|
||||
__ ld1(v16, v17, v18, v19, __ T2D, __ post(buf, 64));
|
||||
__ rev64(v12, __ T16B, v12);
|
||||
__ rev64(v13, __ T16B, v13);
|
||||
__ rev64(v14, __ T16B, v14);
|
||||
__ rev64(v15, __ T16B, v15);
|
||||
__ rev64(v16, __ T16B, v16);
|
||||
__ rev64(v17, __ T16B, v17);
|
||||
__ rev64(v18, __ T16B, v18);
|
||||
__ rev64(v19, __ T16B, v19);
|
||||
|
||||
__ mov(rscratch2, rscratch1);
|
||||
|
||||
__ mov(v0, __ T16B, v8);
|
||||
__ mov(v1, __ T16B, v9);
|
||||
__ mov(v2, __ T16B, v10);
|
||||
__ mov(v3, __ T16B, v11);
|
||||
|
||||
sha512_dround( 0, 0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17);
|
||||
sha512_dround( 1, 3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18);
|
||||
sha512_dround( 2, 2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19);
|
||||
sha512_dround( 3, 4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12);
|
||||
sha512_dround( 4, 1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13);
|
||||
sha512_dround( 5, 0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14);
|
||||
sha512_dround( 6, 3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15);
|
||||
sha512_dround( 7, 2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16);
|
||||
sha512_dround( 8, 4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17);
|
||||
sha512_dround( 9, 1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18);
|
||||
sha512_dround(10, 0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19);
|
||||
sha512_dround(11, 3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12);
|
||||
sha512_dround(12, 2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13);
|
||||
sha512_dround(13, 4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14);
|
||||
sha512_dround(14, 1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15);
|
||||
sha512_dround(15, 0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16);
|
||||
sha512_dround(16, 3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17);
|
||||
sha512_dround(17, 2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18);
|
||||
sha512_dround(18, 4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19);
|
||||
sha512_dround(19, 1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12);
|
||||
sha512_dround(20, 0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13);
|
||||
sha512_dround(21, 3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14);
|
||||
sha512_dround(22, 2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15);
|
||||
sha512_dround(23, 4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16);
|
||||
sha512_dround(24, 1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17);
|
||||
sha512_dround(25, 0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18);
|
||||
sha512_dround(26, 3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19);
|
||||
sha512_dround(27, 2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12);
|
||||
sha512_dround(28, 4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13);
|
||||
sha512_dround(29, 1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14);
|
||||
sha512_dround(30, 0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15);
|
||||
sha512_dround(31, 3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16);
|
||||
sha512_dround(32, 2, 3, 1, 4, 0, 28, 24, 12, 0, 0, 0, 0);
|
||||
sha512_dround(33, 4, 2, 0, 1, 3, 29, 25, 13, 0, 0, 0, 0);
|
||||
sha512_dround(34, 1, 4, 3, 0, 2, 30, 26, 14, 0, 0, 0, 0);
|
||||
sha512_dround(35, 0, 1, 2, 3, 4, 31, 27, 15, 0, 0, 0, 0);
|
||||
sha512_dround(36, 3, 0, 4, 2, 1, 24, 0, 16, 0, 0, 0, 0);
|
||||
sha512_dround(37, 2, 3, 1, 4, 0, 25, 0, 17, 0, 0, 0, 0);
|
||||
sha512_dround(38, 4, 2, 0, 1, 3, 26, 0, 18, 0, 0, 0, 0);
|
||||
sha512_dround(39, 1, 4, 3, 0, 2, 27, 0, 19, 0, 0, 0, 0);
|
||||
|
||||
__ addv(v8, __ T2D, v8, v0);
|
||||
__ addv(v9, __ T2D, v9, v1);
|
||||
__ addv(v10, __ T2D, v10, v2);
|
||||
__ addv(v11, __ T2D, v11, v3);
|
||||
|
||||
if (multi_block) {
|
||||
__ add(ofs, ofs, 128);
|
||||
__ cmp(ofs, limit);
|
||||
__ br(Assembler::LE, sha512_loop);
|
||||
__ mov(c_rarg0, ofs); // return ofs
|
||||
}
|
||||
|
||||
__ st1(v8, v9, v10, v11, __ T2D, state);
|
||||
|
||||
__ ldpd(v14, v15, Address(sp, 48));
|
||||
__ ldpd(v12, v13, Address(sp, 32));
|
||||
__ ldpd(v10, v11, Address(sp, 16));
|
||||
__ ldpd(v8, v9, __ post(sp, 64));
|
||||
|
||||
__ ret(lr);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Safefetch stubs.
|
||||
void generate_safefetch(const char* name, int size, address* entry,
|
||||
address* fault_pc, address* continuation_pc) {
|
||||
@ -5852,6 +6018,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||
}
|
||||
if (UseSHA512Intrinsics) {
|
||||
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
|
||||
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
|
||||
}
|
||||
|
||||
// generate Adler32 intrinsics code
|
||||
if (UseAdler32Intrinsics) {
|
||||
|
||||
@ -62,6 +62,10 @@
|
||||
#define HWCAP_ATOMICS (1<<8)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP_SHA512
|
||||
#define HWCAP_SHA512 (1 << 21)
|
||||
#endif
|
||||
|
||||
int VM_Version::_cpu;
|
||||
int VM_Version::_model;
|
||||
int VM_Version::_model2;
|
||||
@ -285,6 +289,7 @@ void VM_Version::get_processor_features() {
|
||||
if (auxv & HWCAP_AES) strcat(buf, ", aes");
|
||||
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
|
||||
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
|
||||
if (auxv & HWCAP_SHA512) strcat(buf, ", sha512");
|
||||
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
|
||||
|
||||
_features_string = os::strdup(buf);
|
||||
@ -390,7 +395,12 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA512Intrinsics) {
|
||||
if (UseSHA && (auxv & HWCAP_SHA512)) {
|
||||
// Do not auto-enable UseSHA512Intrinsics until it has been fully tested on hardware
|
||||
// if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
|
||||
// FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
|
||||
// }
|
||||
} else if (UseSHA512Intrinsics) {
|
||||
warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user