8365732: RISC-V: implement AES CTR intrinsics

Reviewed-by: fyang, mli
This commit is contained in:
Anjian Wen 2025-12-05 02:51:13 +00:00 committed by Fei Yang
parent 15f2538943
commit 7e91d34f3e
2 changed files with 238 additions and 9 deletions

View File

@ -2493,8 +2493,8 @@ class StubGenerator: public StubCodeGenerator {
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ vle32_v(res, from);
__ mv(t2, 52);
__ blt(keylen, t2, L_aes128);
__ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
__ bltu(keylen, t2, L_aes128);
__ beq(keylen, t2, L_aes192);
// Else we fallthrough to the biggest case (256-bit key size)
@ -2572,8 +2572,8 @@ class StubGenerator: public StubCodeGenerator {
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ vle32_v(res, from);
__ mv(t2, 52);
__ blt(keylen, t2, L_aes128);
__ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
__ bltu(keylen, t2, L_aes128);
__ beq(keylen, t2, L_aes192);
// Else we fallthrough to the biggest case (256-bit key size)
@ -2606,6 +2606,223 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
// Load big-endian 128-bit from memory.
void be_load_counter_128(Register counter_hi, Register counter_lo, Register counter) {
__ ld(counter_lo, Address(counter, 8)); // Load 128-bits from counter
__ ld(counter_hi, Address(counter));
__ rev8(counter_lo, counter_lo); // Convert big-endian to little-endian
__ rev8(counter_hi, counter_hi);
}
// Little-endian 128-bit + 64-bit -> 128-bit addition.
void add_counter_128(Register counter_hi, Register counter_lo) {
assert_different_registers(counter_hi, counter_lo, t0);
__ addi(counter_lo, counter_lo, 1);
__ seqz(t0, counter_lo); // Check for result overflow
__ add(counter_hi, counter_hi, t0); // Add 1 if overflow otherwise 0
}
// Store big-endian 128-bit to memory.
void be_store_counter_128(Register counter_hi, Register counter_lo, Register counter) {
assert_different_registers(counter_hi, counter_lo, t0, t1);
__ rev8(t0, counter_lo); // Convert little-endian to big-endian
__ rev8(t1, counter_hi);
__ sd(t0, Address(counter, 8)); // Store 128-bits to counter
__ sd(t1, Address(counter));
}
void counterMode_AESCrypt(int round, Register in, Register out, Register key, Register counter,
Register input_len, Register saved_encrypted_ctr, Register used_ptr) {
// Algorithm:
//
// generate_aes_loadkeys();
// load_counter_128(counter_hi, counter_lo, counter);
//
// L_next:
// if (used >= BLOCK_SIZE) goto L_main_loop;
//
// L_encrypt_next:
// *out = *in ^ saved_encrypted_ctr[used]);
// out++; in++; used++; len--;
// if (len == 0) goto L_exit;
// goto L_next;
//
// L_main_loop:
// if (len == 0) goto L_exit;
// saved_encrypted_ctr = generate_aes_encrypt(counter);
//
// add_counter_128(counter_hi, counter_lo);
// be_store_counter_128(counter_hi, counter_lo, counter);
// used = 0;
//
// if(len < BLOCK_SIZE) goto L_encrypt_next;
//
// v_in = load_16Byte(in);
// v_out = load_16Byte(out);
// v_saved_encrypted_ctr = load_16Byte(saved_encrypted_ctr);
// v_out = v_in ^ v_saved_encrypted_ctr;
// out += BLOCK_SIZE;
// in += BLOCK_SIZE;
// len -= BLOCK_SIZE;
// used = BLOCK_SIZE;
// goto L_main_loop;
//
//
// L_exit:
// store(used);
// result = input_len
// return result;
const Register used = x28;
const Register len = x29;
const Register counter_hi = x30;
const Register counter_lo = x31;
const Register block_size = t2;
const unsigned int BLOCK_SIZE = 16;
VectorRegister working_vregs[] = {
v1, v2, v3, v4, v5, v6, v7, v8,
v9, v10, v11, v12, v13, v14, v15
};
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ lwu(used, Address(used_ptr));
__ mv(len, input_len);
__ mv(block_size, BLOCK_SIZE);
// load keys to working_vregs according to round
generate_aes_loadkeys(key, working_vregs, round);
// 128-bit big-endian load
be_load_counter_128(counter_hi, counter_lo, counter);
Label L_next, L_encrypt_next, L_main_loop, L_exit;
// Check the last saved_encrypted_ctr used value, we fall through
// to L_encrypt_next when the used value lower than block_size
__ bind(L_next);
__ bgeu(used, block_size, L_main_loop);
// There is still data left fewer than block_size after L_main_loop
// or last used, we encrypt them one by one.
__ bind(L_encrypt_next);
__ add(t0, saved_encrypted_ctr, used);
__ lbu(t1, Address(t0));
__ lbu(t0, Address(in));
__ xorr(t1, t1, t0);
__ sb(t1, Address(out));
__ addi(in, in, 1);
__ addi(out, out, 1);
__ addi(used, used, 1);
__ subi(len, len, 1);
__ beqz(len, L_exit);
__ j(L_next);
// We will calculate the next saved_encrypted_ctr and encrypt the blocks of data
// one by one until there is less than a full block remaining if len not zero
__ bind(L_main_loop);
__ beqz(len, L_exit);
__ vle32_v(v16, counter);
// encrypt counter according to round
generate_aes_encrypt(v16, working_vregs, round);
__ vse32_v(v16, saved_encrypted_ctr);
// 128-bit little-endian increment
add_counter_128(counter_hi, counter_lo);
// 128-bit big-endian store
be_store_counter_128(counter_hi, counter_lo, counter);
__ mv(used, 0);
// Check if we have a full block_size
__ bltu(len, block_size, L_encrypt_next);
// We have one full block to encrypt at least
__ vle32_v(v17, in);
__ vxor_vv(v16, v16, v17);
__ vse32_v(v16, out);
__ add(out, out, block_size);
__ add(in, in, block_size);
__ sub(len, len, block_size);
__ mv(used, block_size);
__ j(L_main_loop);
__ bind(L_exit);
__ sw(used, Address(used_ptr));
__ mv(x10, input_len);
__ leave();
__ ret();
};
// CTR AES crypt.
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
// c_rarg3 - counter vector byte array address
// c_rarg4 - input length
// c_rarg5 - saved encryptedCounter start
// c_rarg6 - saved used length
//
// Output:
// x10 - input length
//
address generate_counterMode_AESCrypt() {
assert(UseZvkn, "need AES instructions (Zvkned extension) support");
assert(UseAESCTRIntrinsics, "need AES instructions (Zvkned extension) support");
assert(UseZbb, "need basic bit manipulation (Zbb extension) support");
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
StubCodeMark mark(this, stub_id);
const Register in = c_rarg0;
const Register out = c_rarg1;
const Register key = c_rarg2;
const Register counter = c_rarg3;
const Register input_len = c_rarg4;
const Register saved_encrypted_ctr = c_rarg5;
const Register used_len_ptr = c_rarg6;
const Register keylen = c_rarg7; // temporary register
const address start = __ pc();
__ enter();
Label L_exit;
__ beqz(input_len, L_exit);
Label L_aes128, L_aes192;
// Compute #rounds for AES based on the length of the key array
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ mv(t0, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
__ bltu(keylen, t0, L_aes128);
__ beq(keylen, t0, L_aes192);
// Else we fallthrough to the biggest case (256-bit key size)
// Note: the following function performs crypt with key += 15*16
counterMode_AESCrypt(15, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
// Note: the following function performs crypt with key += 13*16
__ bind(L_aes192);
counterMode_AESCrypt(13, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
// Note: the following function performs crypt with key += 11*16
__ bind(L_aes128);
counterMode_AESCrypt(11, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
__ bind(L_exit);
__ mv(x10, input_len);
__ leave();
__ ret();
return start;
}
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
void compare_string_8_x_LU(Register tmpL, Register tmpU,
Register strL, Register strU, Label& DIFF) {
@ -6826,6 +7043,10 @@ static const int64_t right_3_bits = right_n_bits(3);
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
}
if (UseAESCTRIntrinsics) {
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
}
if (UsePoly1305Intrinsics) {
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
}

View File

@ -434,6 +434,15 @@ void VM_Version::c2_initialize() {
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
UseAES = true;
}
if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics) && UseZbb) {
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
}
if (UseAESCTRIntrinsics && !UseZbb) {
warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZbb support.");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
}
} else {
if (UseAES) {
warning("AES instructions are not available on this CPU");
@ -443,11 +452,10 @@ void VM_Version::c2_initialize() {
warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
}
if (UseAESCTRIntrinsics) {
warning("AES/CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
if (UseAESCTRIntrinsics) {
warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZvkn support.");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
}
}
}