8347489: RISC-V: Misaligned memory access with COH

Reviewed-by: mli, vkempik
This commit is contained in:
Fei Yang 2025-02-04 14:03:07 +00:00
parent d699aba26f
commit e91a6ec49c
3 changed files with 130 additions and 26 deletions

View File

@ -1409,6 +1409,14 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
BLOCK_COMMENT("string_compare {");
// Bizarrely, the counts are passed in bytes, regardless of whether they
@ -1426,6 +1434,24 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
mv(cnt2, cnt1);
bind(L);
// Load 4 bytes once to compare for alignment before main loop. Note that this
// is only possible for LL/UU case. We need to resort to load_long_misaligned
// for both LU and UL cases.
if (str1_isL == str2_isL) { // LL or UU
beq(str1, str2, DONE);
int base_offset = isLL ? base_offset1 : base_offset2;
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
mv(t0, minCharsInWord / 2);
ble(cnt2, t0, SHORT_STRING);
lwu(tmp1, Address(str1));
lwu(tmp2, Address(str2));
bne(tmp1, tmp2, DIFFERENCE);
addi(str1, str1, 4);
addi(str2, str2, 4);
subi(cnt2, cnt2, minCharsInWord / 2);
}
}
// A very short string
mv(t0, minCharsInWord);
ble(cnt2, t0, SHORT_STRING);
@ -1434,8 +1460,14 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// load first parts of strings and finish initialization while loading
{
if (str1_isL == str2_isL) { // LL or UU
// check if str1 and str2 is same pointer
beq(str1, str2, DONE);
#ifdef ASSERT
Label align_ok;
orr(t0, str1, str2);
andi(t0, t0, 0x7);
beqz(t0, align_ok);
stop("bad alignment");
bind(align_ok);
#endif
// load 8 bytes once to compare
ld(tmp1, Address(str1));
ld(tmp2, Address(str2));
@ -1452,7 +1484,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
sub(cnt2, zr, cnt2);
} else if (isLU) { // LU case
lwu(tmp1, Address(str1));
ld(tmp2, Address(str2));
load_long_misaligned(tmp2, Address(str2), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, STUB);
subi(cnt2, cnt2, 4);
@ -1465,11 +1497,11 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
sub(cnt2, zr, cnt2);
addi(cnt1, cnt1, 4);
} else { // UL case
ld(tmp1, Address(str1));
load_long_misaligned(tmp1, Address(str1), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
lwu(tmp2, Address(str2));
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, STUB);
addi(cnt2, cnt2, -4);
subi(cnt2, cnt2, 4);
slli(t0, cnt2, 1);
sub(cnt1, zr, t0);
add(str1, str1, t0);
@ -1486,6 +1518,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// main loop
bind(NEXT_WORD);
if (str1_isL == str2_isL) { // LL or UU
// both of the two loads are 8-byte aligned
add(t0, str1, cnt2);
ld(tmp1, Address(t0));
add(t0, str2, cnt2);
@ -1495,7 +1528,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
add(t0, str1, cnt1);
lwu(tmp1, Address(t0));
add(t0, str2, cnt2);
ld(tmp2, Address(t0));
load_long_misaligned(tmp2, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
addi(cnt1, cnt1, 4);
inflate_lo32(tmp3, tmp1);
mv(tmp1, tmp3);
@ -1504,7 +1537,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
add(t0, str2, cnt2);
lwu(tmp2, Address(t0));
add(t0, str1, cnt1);
ld(tmp1, Address(t0));
load_long_misaligned(tmp1, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
inflate_lo32(tmp3, tmp2);
mv(tmp2, tmp3);
addi(cnt1, cnt1, 8);
@ -1637,6 +1670,9 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
Register cnt1 = tmp3;
Register cnt2 = tmp1; // cnt2 only used in array length compare
Label DONE, SAME, NEXT_WORD, SHORT, TAIL03, TAIL01;
@ -1660,10 +1696,31 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
la(a1, Address(a1, base_offset));
la(a2, Address(a2, base_offset));
// Load 4 bytes once to compare for alignment before main loop.
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
subi(cnt1, cnt1, elem_per_word / 2);
bltz(cnt1, TAIL03);
lwu(tmp1, Address(a1));
lwu(tmp2, Address(a2));
addi(a1, a1, 4);
addi(a2, a2, 4);
bne(tmp1, tmp2, DONE);
}
// Check for short strings, i.e. smaller than wordSize.
subi(cnt1, cnt1, elem_per_word);
bltz(cnt1, SHORT);
#ifdef ASSERT
Label align_ok;
orr(t0, a1, a2);
andi(t0, t0, 0x7);
beqz(t0, align_ok);
stop("bad alignment");
bind(align_ok);
#endif
// Main 8 byte comparison loop.
bind(NEXT_WORD); {
ld(tmp1, Address(a1));
@ -1729,20 +1786,45 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
void C2_MacroAssembler::string_equals(Register a1, Register a2,
Register result, Register cnt1)
{
Label SAME, DONE, SHORT, NEXT_WORD;
Label SAME, DONE, SHORT, NEXT_WORD, TAIL03, TAIL01;
Register tmp1 = t0;
Register tmp2 = t1;
assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2);
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
BLOCK_COMMENT("string_equals {");
mv(result, false);
// Load 4 bytes once to compare for alignment before main loop.
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
subi(cnt1, cnt1, 4);
bltz(cnt1, TAIL03);
lwu(tmp1, Address(a1));
lwu(tmp2, Address(a2));
addi(a1, a1, 4);
addi(a2, a2, 4);
bne(tmp1, tmp2, DONE);
}
// Check for short strings, i.e. smaller than wordSize.
subi(cnt1, cnt1, wordSize);
bltz(cnt1, SHORT);
#ifdef ASSERT
Label align_ok;
orr(t0, a1, a2);
andi(t0, t0, 0x7);
beqz(t0, align_ok);
stop("bad alignment");
bind(align_ok);
#endif
// Main 8 byte comparison loop.
bind(NEXT_WORD); {
ld(tmp1, Address(a1));
@ -1757,8 +1839,6 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
beqz(tmp1, SAME);
bind(SHORT);
Label TAIL03, TAIL01;
// 0-7 bytes left.
test_bit(tmp1, cnt1, 2);
beqz(tmp1, TAIL03);
@ -2512,6 +2592,9 @@ void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register resul
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
BLOCK_COMMENT("arrays_equals_v {");
// if (a1 == a2), return true

View File

@ -4307,7 +4307,7 @@ void MacroAssembler::population_count(Register dst, Register src,
{
bind(loop);
addi(dst, dst, 1);
addi(tmp2, tmp1, -1);
subi(tmp2, tmp1, 1);
andr(tmp1, tmp1, tmp2);
bnez(tmp1, loop);
}

View File

@ -2449,17 +2449,28 @@ class StubGenerator: public StubCodeGenerator {
}
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) {
void compare_string_8_x_LU(Register tmpL, Register tmpU,
Register strL, Register strU, Label& DIFF) {
const Register tmp = x30, tmpLval = x12;
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
// strL is 8-byte aligned
__ ld(tmpLval, Address(strL));
__ addi(strL, strL, wordSize);
__ ld(tmpU, Address(strU));
// compare first 4 characters
__ load_long_misaligned(tmpU, Address(strU), tmp, (base_offset % 8) != 0 ? 4 : 8);
__ addi(strU, strU, wordSize);
__ inflate_lo32(tmpL, tmpLval);
__ xorr(tmp, tmpU, tmpL);
__ bnez(tmp, DIFF);
__ ld(tmpU, Address(strU));
// compare second 4 characters
__ load_long_misaligned(tmpU, Address(strU), tmp, (base_offset % 8) != 0 ? 4 : 8);
__ addi(strU, strU, wordSize);
__ inflate_hi32(tmpL, tmpLval);
__ xorr(tmp, tmpU, tmpL);
@ -2493,6 +2504,14 @@ class StubGenerator: public StubCodeGenerator {
const Register result = x10, str1 = x11, str2 = x13, cnt2 = x14,
tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x12;
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
// cnt2 == amount of characters left to compare
// Check already loaded first 4 symbols
__ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
@ -2509,17 +2528,19 @@ class StubGenerator: public StubCodeGenerator {
tmpU = isLU ? tmp2 : tmp1, // where to keep U for comparison
tmpL = isLU ? tmp1 : tmp2; // where to keep L for comparison
// make sure main loop is 8 byte-aligned, we should load another 4 bytes from strL
// cnt2 is >= 68 here, no need to check it for >= 0
__ lwu(tmpL, Address(strL));
__ addi(strL, strL, wordSize / 2);
__ ld(tmpU, Address(strU));
__ addi(strU, strU, wordSize);
__ inflate_lo32(tmp3, tmpL);
__ mv(tmpL, tmp3);
__ xorr(tmp3, tmpU, tmpL);
__ bnez(tmp3, CALCULATE_DIFFERENCE);
__ addi(cnt2, cnt2, -wordSize / 2);
if (AvoidUnalignedAccesses && (base_offset1 % 8) == 0) {
// Load another 4 bytes from strL to make sure main loop is 8-byte aligned
// cnt2 is >= 68 here, no need to check it for >= 0
__ lwu(tmpL, Address(strL));
__ addi(strL, strL, wordSize / 2);
__ load_long_misaligned(tmpU, Address(strU), tmp4, (base_offset2 % 8) != 0 ? 4 : 8);
__ addi(strU, strU, wordSize);
__ inflate_lo32(tmp3, tmpL);
__ mv(tmpL, tmp3);
__ xorr(tmp3, tmpU, tmpL);
__ bnez(tmp3, CALCULATE_DIFFERENCE);
__ subi(cnt2, cnt2, wordSize / 2);
}
// we are now 8-bytes aligned on strL
__ subi(cnt2, cnt2, wordSize * 2);
@ -4493,7 +4514,7 @@ class StubGenerator: public StubCodeGenerator {
if (multi_block) {
int total_adds = vset_sew == Assembler::e32 ? 240 : 608;
__ addi(consts, consts, -total_adds);
__ subi(consts, consts, total_adds);
__ addi(ofs, ofs, vset_sew == Assembler::e32 ? 64 : 128);
__ ble(ofs, limit, multi_block_loop);
__ mv(c_rarg0, ofs); // return ofs