mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-14 09:53:18 +00:00
8347489: RISC-V: Misaligned memory access with COH
Reviewed-by: mli, vkempik
This commit is contained in:
parent
d699aba26f
commit
e91a6ec49c
@ -1409,6 +1409,14 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
|
||||
load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
|
||||
|
||||
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
|
||||
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
|
||||
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
BLOCK_COMMENT("string_compare {");
|
||||
|
||||
// Bizarrely, the counts are passed in bytes, regardless of whether they
|
||||
@ -1426,6 +1434,24 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
mv(cnt2, cnt1);
|
||||
bind(L);
|
||||
|
||||
// Load 4 bytes once to compare for alignment before main loop. Note that this
|
||||
// is only possible for LL/UU case. We need to resort to load_long_misaligned
|
||||
// for both LU and UL cases.
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
beq(str1, str2, DONE);
|
||||
int base_offset = isLL ? base_offset1 : base_offset2;
|
||||
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
|
||||
mv(t0, minCharsInWord / 2);
|
||||
ble(cnt2, t0, SHORT_STRING);
|
||||
lwu(tmp1, Address(str1));
|
||||
lwu(tmp2, Address(str2));
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
addi(str1, str1, 4);
|
||||
addi(str2, str2, 4);
|
||||
subi(cnt2, cnt2, minCharsInWord / 2);
|
||||
}
|
||||
}
|
||||
|
||||
// A very short string
|
||||
mv(t0, minCharsInWord);
|
||||
ble(cnt2, t0, SHORT_STRING);
|
||||
@ -1434,8 +1460,14 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
// load first parts of strings and finish initialization while loading
|
||||
{
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
// check if str1 and str2 is same pointer
|
||||
beq(str1, str2, DONE);
|
||||
#ifdef ASSERT
|
||||
Label align_ok;
|
||||
orr(t0, str1, str2);
|
||||
andi(t0, t0, 0x7);
|
||||
beqz(t0, align_ok);
|
||||
stop("bad alignment");
|
||||
bind(align_ok);
|
||||
#endif
|
||||
// load 8 bytes once to compare
|
||||
ld(tmp1, Address(str1));
|
||||
ld(tmp2, Address(str2));
|
||||
@ -1452,7 +1484,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
sub(cnt2, zr, cnt2);
|
||||
} else if (isLU) { // LU case
|
||||
lwu(tmp1, Address(str1));
|
||||
ld(tmp2, Address(str2));
|
||||
load_long_misaligned(tmp2, Address(str2), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, STUB);
|
||||
subi(cnt2, cnt2, 4);
|
||||
@ -1465,11 +1497,11 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
sub(cnt2, zr, cnt2);
|
||||
addi(cnt1, cnt1, 4);
|
||||
} else { // UL case
|
||||
ld(tmp1, Address(str1));
|
||||
load_long_misaligned(tmp1, Address(str1), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
lwu(tmp2, Address(str2));
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, STUB);
|
||||
addi(cnt2, cnt2, -4);
|
||||
subi(cnt2, cnt2, 4);
|
||||
slli(t0, cnt2, 1);
|
||||
sub(cnt1, zr, t0);
|
||||
add(str1, str1, t0);
|
||||
@ -1486,6 +1518,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
// both of the two loads are 8-byte aligned
|
||||
add(t0, str1, cnt2);
|
||||
ld(tmp1, Address(t0));
|
||||
add(t0, str2, cnt2);
|
||||
@ -1495,7 +1528,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
add(t0, str1, cnt1);
|
||||
lwu(tmp1, Address(t0));
|
||||
add(t0, str2, cnt2);
|
||||
ld(tmp2, Address(t0));
|
||||
load_long_misaligned(tmp2, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
addi(cnt1, cnt1, 4);
|
||||
inflate_lo32(tmp3, tmp1);
|
||||
mv(tmp1, tmp3);
|
||||
@ -1504,7 +1537,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
add(t0, str2, cnt2);
|
||||
lwu(tmp2, Address(t0));
|
||||
add(t0, str1, cnt1);
|
||||
ld(tmp1, Address(t0));
|
||||
load_long_misaligned(tmp1, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
inflate_lo32(tmp3, tmp2);
|
||||
mv(tmp2, tmp3);
|
||||
addi(cnt1, cnt1, 8);
|
||||
@ -1637,6 +1670,9 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
|
||||
|
||||
assert((base_offset % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
Register cnt1 = tmp3;
|
||||
Register cnt2 = tmp1; // cnt2 only used in array length compare
|
||||
Label DONE, SAME, NEXT_WORD, SHORT, TAIL03, TAIL01;
|
||||
@ -1660,10 +1696,31 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
|
||||
la(a1, Address(a1, base_offset));
|
||||
la(a2, Address(a2, base_offset));
|
||||
|
||||
// Load 4 bytes once to compare for alignment before main loop.
|
||||
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
|
||||
subi(cnt1, cnt1, elem_per_word / 2);
|
||||
bltz(cnt1, TAIL03);
|
||||
lwu(tmp1, Address(a1));
|
||||
lwu(tmp2, Address(a2));
|
||||
addi(a1, a1, 4);
|
||||
addi(a2, a2, 4);
|
||||
bne(tmp1, tmp2, DONE);
|
||||
}
|
||||
|
||||
// Check for short strings, i.e. smaller than wordSize.
|
||||
subi(cnt1, cnt1, elem_per_word);
|
||||
bltz(cnt1, SHORT);
|
||||
|
||||
#ifdef ASSERT
|
||||
Label align_ok;
|
||||
orr(t0, a1, a2);
|
||||
andi(t0, t0, 0x7);
|
||||
beqz(t0, align_ok);
|
||||
stop("bad alignment");
|
||||
bind(align_ok);
|
||||
#endif
|
||||
|
||||
// Main 8 byte comparison loop.
|
||||
bind(NEXT_WORD); {
|
||||
ld(tmp1, Address(a1));
|
||||
@ -1729,20 +1786,45 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
void C2_MacroAssembler::string_equals(Register a1, Register a2,
|
||||
Register result, Register cnt1)
|
||||
{
|
||||
Label SAME, DONE, SHORT, NEXT_WORD;
|
||||
Label SAME, DONE, SHORT, NEXT_WORD, TAIL03, TAIL01;
|
||||
Register tmp1 = t0;
|
||||
Register tmp2 = t1;
|
||||
|
||||
assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2);
|
||||
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
|
||||
|
||||
assert((base_offset % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
BLOCK_COMMENT("string_equals {");
|
||||
|
||||
mv(result, false);
|
||||
|
||||
// Load 4 bytes once to compare for alignment before main loop.
|
||||
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
|
||||
subi(cnt1, cnt1, 4);
|
||||
bltz(cnt1, TAIL03);
|
||||
lwu(tmp1, Address(a1));
|
||||
lwu(tmp2, Address(a2));
|
||||
addi(a1, a1, 4);
|
||||
addi(a2, a2, 4);
|
||||
bne(tmp1, tmp2, DONE);
|
||||
}
|
||||
|
||||
// Check for short strings, i.e. smaller than wordSize.
|
||||
subi(cnt1, cnt1, wordSize);
|
||||
bltz(cnt1, SHORT);
|
||||
|
||||
#ifdef ASSERT
|
||||
Label align_ok;
|
||||
orr(t0, a1, a2);
|
||||
andi(t0, t0, 0x7);
|
||||
beqz(t0, align_ok);
|
||||
stop("bad alignment");
|
||||
bind(align_ok);
|
||||
#endif
|
||||
|
||||
// Main 8 byte comparison loop.
|
||||
bind(NEXT_WORD); {
|
||||
ld(tmp1, Address(a1));
|
||||
@ -1757,8 +1839,6 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
|
||||
beqz(tmp1, SAME);
|
||||
|
||||
bind(SHORT);
|
||||
Label TAIL03, TAIL01;
|
||||
|
||||
// 0-7 bytes left.
|
||||
test_bit(tmp1, cnt1, 2);
|
||||
beqz(tmp1, TAIL03);
|
||||
@ -2512,6 +2592,9 @@ void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register resul
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
|
||||
|
||||
assert((base_offset % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
BLOCK_COMMENT("arrays_equals_v {");
|
||||
|
||||
// if (a1 == a2), return true
|
||||
|
||||
@ -4307,7 +4307,7 @@ void MacroAssembler::population_count(Register dst, Register src,
|
||||
{
|
||||
bind(loop);
|
||||
addi(dst, dst, 1);
|
||||
addi(tmp2, tmp1, -1);
|
||||
subi(tmp2, tmp1, 1);
|
||||
andr(tmp1, tmp1, tmp2);
|
||||
bnez(tmp1, loop);
|
||||
}
|
||||
|
||||
@ -2449,17 +2449,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
|
||||
void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) {
|
||||
void compare_string_8_x_LU(Register tmpL, Register tmpU,
|
||||
Register strL, Register strU, Label& DIFF) {
|
||||
const Register tmp = x30, tmpLval = x12;
|
||||
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
|
||||
assert((base_offset % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
// strL is 8-byte aligned
|
||||
__ ld(tmpLval, Address(strL));
|
||||
__ addi(strL, strL, wordSize);
|
||||
__ ld(tmpU, Address(strU));
|
||||
|
||||
// compare first 4 characters
|
||||
__ load_long_misaligned(tmpU, Address(strU), tmp, (base_offset % 8) != 0 ? 4 : 8);
|
||||
__ addi(strU, strU, wordSize);
|
||||
__ inflate_lo32(tmpL, tmpLval);
|
||||
__ xorr(tmp, tmpU, tmpL);
|
||||
__ bnez(tmp, DIFF);
|
||||
|
||||
__ ld(tmpU, Address(strU));
|
||||
// compare second 4 characters
|
||||
__ load_long_misaligned(tmpU, Address(strU), tmp, (base_offset % 8) != 0 ? 4 : 8);
|
||||
__ addi(strU, strU, wordSize);
|
||||
__ inflate_hi32(tmpL, tmpLval);
|
||||
__ xorr(tmp, tmpU, tmpL);
|
||||
@ -2493,6 +2504,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register result = x10, str1 = x11, str2 = x13, cnt2 = x14,
|
||||
tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x12;
|
||||
|
||||
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
|
||||
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
|
||||
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
// cnt2 == amount of characters left to compare
|
||||
// Check already loaded first 4 symbols
|
||||
__ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
|
||||
@ -2509,17 +2528,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
tmpU = isLU ? tmp2 : tmp1, // where to keep U for comparison
|
||||
tmpL = isLU ? tmp1 : tmp2; // where to keep L for comparison
|
||||
|
||||
// make sure main loop is 8 byte-aligned, we should load another 4 bytes from strL
|
||||
// cnt2 is >= 68 here, no need to check it for >= 0
|
||||
__ lwu(tmpL, Address(strL));
|
||||
__ addi(strL, strL, wordSize / 2);
|
||||
__ ld(tmpU, Address(strU));
|
||||
__ addi(strU, strU, wordSize);
|
||||
__ inflate_lo32(tmp3, tmpL);
|
||||
__ mv(tmpL, tmp3);
|
||||
__ xorr(tmp3, tmpU, tmpL);
|
||||
__ bnez(tmp3, CALCULATE_DIFFERENCE);
|
||||
__ addi(cnt2, cnt2, -wordSize / 2);
|
||||
if (AvoidUnalignedAccesses && (base_offset1 % 8) == 0) {
|
||||
// Load another 4 bytes from strL to make sure main loop is 8-byte aligned
|
||||
// cnt2 is >= 68 here, no need to check it for >= 0
|
||||
__ lwu(tmpL, Address(strL));
|
||||
__ addi(strL, strL, wordSize / 2);
|
||||
__ load_long_misaligned(tmpU, Address(strU), tmp4, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
__ addi(strU, strU, wordSize);
|
||||
__ inflate_lo32(tmp3, tmpL);
|
||||
__ mv(tmpL, tmp3);
|
||||
__ xorr(tmp3, tmpU, tmpL);
|
||||
__ bnez(tmp3, CALCULATE_DIFFERENCE);
|
||||
__ subi(cnt2, cnt2, wordSize / 2);
|
||||
}
|
||||
|
||||
// we are now 8-bytes aligned on strL
|
||||
__ subi(cnt2, cnt2, wordSize * 2);
|
||||
@ -4493,7 +4514,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
if (multi_block) {
|
||||
int total_adds = vset_sew == Assembler::e32 ? 240 : 608;
|
||||
__ addi(consts, consts, -total_adds);
|
||||
__ subi(consts, consts, total_adds);
|
||||
__ addi(ofs, ofs, vset_sew == Assembler::e32 ? 64 : 128);
|
||||
__ ble(ofs, limit, multi_block_loop);
|
||||
__ mv(c_rarg0, ofs); // return ofs
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user