8360179: RISC-V: Only enable BigInteger intrinsics when AvoidUnalignedAccess == false

Reviewed-by: fjiang, fyang
This commit is contained in:
Anjian Wen 2025-06-24 03:08:50 +00:00 committed by Feilong Jiang
parent a350a1115a
commit 34412da52b
3 changed files with 30 additions and 111 deletions

View File

@ -5341,42 +5341,6 @@ void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, R
add(final_dest_hi, dest_hi, carry);
}
/**
* Multiply 32 bit by 32 bit first loop.
*/
void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
Register y, Register y_idx, Register z,
Register carry, Register product,
Register idx, Register kdx) {
// jlong carry, x[], y[], z[];
// for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
// long product = y[idx] * x[xstart] + carry;
// z[kdx] = (int)product;
// carry = product >>> 32;
// }
// z[xstart] = (int)carry;
Label L_first_loop, L_first_loop_exit;
blez(idx, L_first_loop_exit);
shadd(t0, xstart, x, t0, LogBytesPerInt);
lwu(x_xstart, Address(t0, 0));
bind(L_first_loop);
subiw(idx, idx, 1);
shadd(t0, idx, y, t0, LogBytesPerInt);
lwu(y_idx, Address(t0, 0));
mul(product, x_xstart, y_idx);
add(product, product, carry);
srli(carry, product, 32);
subiw(kdx, kdx, 1);
shadd(t0, kdx, z, t0, LogBytesPerInt);
sw(product, Address(t0, 0));
bgtz(idx, L_first_loop);
bind(L_first_loop_exit);
}
/**
* Multiply 64 bit by 64 bit first loop.
*/
@ -5593,77 +5557,16 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
const Register carry = tmp5;
const Register product = xlen;
const Register x_xstart = tmp0;
const Register jdx = tmp1;
mv(idx, ylen); // idx = ylen;
addw(kdx, xlen, ylen); // kdx = xlen+ylen;
mv(carry, zr); // carry = 0;
Label L_multiply_64_x_64_loop, L_done;
Label L_done;
subiw(xstart, xlen, 1);
bltz(xstart, L_done);
const Register jdx = tmp1;
if (AvoidUnalignedAccesses) {
int base_offset = arrayOopDesc::base_offset_in_bytes(T_INT);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
if ((base_offset % 8) == 0) {
// multiply_64_x_64_loop emits 8-byte load/store to access two elements
// at a time from int arrays x and y. When base_offset is 8 bytes, these
// accesses are naturally aligned if both xlen and ylen are even numbers.
orr(t0, xlen, ylen);
test_bit(t0, t0, 0);
beqz(t0, L_multiply_64_x_64_loop);
}
Label L_second_loop_unaligned, L_third_loop, L_third_loop_exit;
multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
shadd(t0, xstart, z, t0, LogBytesPerInt);
sw(carry, Address(t0, 0));
bind(L_second_loop_unaligned);
mv(carry, zr);
mv(jdx, ylen);
subiw(xstart, xstart, 1);
bltz(xstart, L_done);
subi(sp, sp, 2 * wordSize);
sd(z, Address(sp, 0));
sd(zr, Address(sp, wordSize));
shadd(t0, xstart, z, t0, LogBytesPerInt);
addi(z, t0, 4);
shadd(t0, xstart, x, t0, LogBytesPerInt);
lwu(product, Address(t0, 0));
blez(jdx, L_third_loop_exit);
bind(L_third_loop);
subiw(jdx, jdx, 1);
shadd(t0, jdx, y, t0, LogBytesPerInt);
lwu(t0, Address(t0, 0));
mul(t1, t0, product);
add(t0, t1, carry);
shadd(tmp6, jdx, z, t1, LogBytesPerInt);
lwu(t1, Address(tmp6, 0));
add(t0, t0, t1);
sw(t0, Address(tmp6, 0));
srli(carry, t0, 32);
bgtz(jdx, L_third_loop);
bind(L_third_loop_exit);
ld(z, Address(sp, 0));
addi(sp, sp, 2 * wordSize);
shadd(t0, xstart, z, t0, LogBytesPerInt);
sw(carry, Address(t0, 0));
j(L_second_loop_unaligned);
}
bind(L_multiply_64_x_64_loop);
multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
Label L_second_loop_aligned;

View File

@ -1384,10 +1384,6 @@ public:
void adc(Register dst, Register src1, Register src2, Register carry);
void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
Register src1, Register src2, Register carry);
void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
Register y, Register y_idx, Register z,
Register carry, Register product,
Register idx, Register kdx);
void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
Register y, Register y_idx, Register z,
Register carry, Register product,

View File

@ -325,20 +325,40 @@ void VM_Version::c2_initialize() {
FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
if (!AvoidUnalignedAccesses) {
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
}
} else if (UseMultiplyToLenIntrinsic) {
warning("Intrinsics for BigInteger.multiplyToLen() not available on this CPU.");
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
if (!AvoidUnalignedAccesses) {
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
}
} else if (UseSquareToLenIntrinsic) {
warning("Intrinsics for BigInteger.squareToLen() not available on this CPU.");
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
}
if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
if (!AvoidUnalignedAccesses) {
if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
}
} else if (UseMontgomeryMultiplyIntrinsic) {
warning("Intrinsics for BigInteger.montgomeryMultiply() not available on this CPU.");
FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
}
if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
if (!AvoidUnalignedAccesses) {
if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
}
} else if (UseMontgomerySquareIntrinsic) {
warning("Intrinsics for BigInteger.montgomerySquare() not available on this CPU.");
FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
}
// Adler32