8350095: RISC-V: Refactor string_compare

Reviewed-by: fyang
This commit is contained in:
Hamlin Li 2025-03-03 18:31:56 +00:00
parent f53de9208c
commit e470f474ee
4 changed files with 207 additions and 170 deletions

View File

@ -1382,15 +1382,183 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
bind(DONE);
}
// Compare longwords
void C2_MacroAssembler::string_compare_long_same_encoding(Register result, Register str1, Register str2,
const bool isLL, Register cnt1, Register cnt2,
Register tmp1, Register tmp2, Register tmp3,
const int STUB_THRESHOLD, Label *STUB, Label *SHORT_STRING, Label *DONE) {
Label TAIL_CHECK, TAIL, NEXT_WORD, DIFFERENCE;
const int base_offset = isLL ? arrayOopDesc::base_offset_in_bytes(T_BYTE)
: arrayOopDesc::base_offset_in_bytes(T_CHAR);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
const int minCharsInWord = isLL ? wordSize : wordSize / 2;
// load first parts of strings and finish initialization while loading
beq(str1, str2, *DONE);
// Alignment
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
lwu(tmp1, Address(str1));
lwu(tmp2, Address(str2));
bne(tmp1, tmp2, DIFFERENCE);
addi(str1, str1, 4);
addi(str2, str2, 4);
subi(cnt2, cnt2, minCharsInWord / 2);
// A very short string
mv(t0, minCharsInWord);
ble(cnt2, t0, *SHORT_STRING);
}
#ifdef ASSERT
if (AvoidUnalignedAccesses) {
Label align_ok;
orr(t0, str1, str2);
andi(t0, t0, 0x7);
beqz(t0, align_ok);
stop("bad alignment");
bind(align_ok);
}
#endif
// load 8 bytes once to compare
ld(tmp1, Address(str1));
ld(tmp2, Address(str2));
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, *STUB);
subi(cnt2, cnt2, minCharsInWord);
beqz(cnt2, TAIL_CHECK);
// convert cnt2 from characters to bytes
if (!isLL) {
slli(cnt2, cnt2, 1);
}
add(str2, str2, cnt2);
add(str1, str1, cnt2);
sub(cnt2, zr, cnt2);
addi(cnt2, cnt2, 8);
bne(tmp1, tmp2, DIFFERENCE);
bgez(cnt2, TAIL);
// main loop
bind(NEXT_WORD);
// 8-byte aligned loads when AvoidUnalignedAccesses is enabled
add(t0, str1, cnt2);
ld(tmp1, Address(t0));
add(t0, str2, cnt2);
ld(tmp2, Address(t0));
addi(cnt2, cnt2, 8);
bne(tmp1, tmp2, DIFFERENCE);
bltz(cnt2, NEXT_WORD);
bind(TAIL);
load_long_misaligned(tmp1, Address(str1), tmp3, isLL ? 1 : 2);
load_long_misaligned(tmp2, Address(str2), tmp3, isLL ? 1 : 2);
bind(TAIL_CHECK);
beq(tmp1, tmp2, *DONE);
// Find the first different characters in the longwords and
// compute their difference.
bind(DIFFERENCE);
xorr(tmp3, tmp1, tmp2);
// count bits of trailing zero chars
ctzc_bits(result, tmp3, isLL);
srl(tmp1, tmp1, result);
srl(tmp2, tmp2, result);
if (isLL) {
zext(tmp1, tmp1, 8);
zext(tmp2, tmp2, 8);
} else {
zext(tmp1, tmp1, 16);
zext(tmp2, tmp2, 16);
}
sub(result, tmp1, tmp2);
j(*DONE);
}
// Compare longwords
void C2_MacroAssembler::string_compare_long_different_encoding(Register result, Register str1, Register str2,
bool isLU, Register cnt1, Register cnt2,
Register tmp1, Register tmp2, Register tmp3,
const int STUB_THRESHOLD, Label *STUB, Label *DONE) {
Label TAIL, NEXT_WORD, DIFFERENCE;
const int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
assert((base_offset % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
Register strL = isLU ? str1 : str2;
Register strU = isLU ? str2 : str1;
Register tmpL = tmp1, tmpU = tmp2;
// load first parts of strings and finish initialization while loading
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, *STUB);
lwu(tmpL, Address(strL));
load_long_misaligned(tmpU, Address(strU), tmp3, (base_offset % 8) != 0 ? 4 : 8);
subi(cnt2, cnt2, 4);
add(strL, strL, cnt2);
sub(cnt1, zr, cnt2);
slli(cnt2, cnt2, 1);
add(strU, strU, cnt2);
inflate_lo32(tmp3, tmpL);
mv(tmpL, tmp3);
sub(cnt2, zr, cnt2);
addi(cnt1, cnt1, 4);
addi(cnt2, cnt2, 8);
bne(tmpL, tmpU, DIFFERENCE);
bgez(cnt2, TAIL);
// main loop
bind(NEXT_WORD);
add(t0, strL, cnt1);
lwu(tmpL, Address(t0));
add(t0, strU, cnt2);
load_long_misaligned(tmpU, Address(t0), tmp3, (base_offset % 8) != 0 ? 4 : 8);
addi(cnt1, cnt1, 4);
inflate_lo32(tmp3, tmpL);
mv(tmpL, tmp3);
addi(cnt2, cnt2, 8);
bne(tmpL, tmpU, DIFFERENCE);
bltz(cnt2, NEXT_WORD);
bind(TAIL);
load_int_misaligned(tmpL, Address(strL), tmp3, false);
load_long_misaligned(tmpU, Address(strU), tmp3, 2);
inflate_lo32(tmp3, tmpL);
mv(tmpL, tmp3);
beq(tmpL, tmpU, *DONE);
// Find the first different characters in the longwords and
// compute their difference.
bind(DIFFERENCE);
xorr(tmp3, tmpL, tmpU);
// count bits of trailing zero chars
ctzc_bits(result, tmp3);
srl(tmpL, tmpL, result);
srl(tmpU, tmpU, result);
zext(tmpL, tmpL, 16);
zext(tmpU, tmpU, 16);
if (isLU) {
sub(result, tmpL, tmpU);
} else {
sub(result, tmpU, tmpL);
}
j(*DONE);
}
// Compare strings.
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1, Register tmp2, Register tmp3,
int ae)
{
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, TAIL_CHECK, L;
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, STUB,
SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, L;
const int STUB_THRESHOLD = 64 + 8;
bool isLL = ae == StrIntrinsicNode::LL;
@ -1409,14 +1577,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
BLOCK_COMMENT("string_compare {");
// Bizarrely, the counts are passed in bytes, regardless of whether they
@ -1434,154 +1594,23 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
mv(cnt2, cnt1);
bind(L);
// Load 4 bytes once to compare for alignment before main loop. Note that this
// is only possible for LL/UU case. We need to resort to load_long_misaligned
// for both LU and UL cases.
if (str1_isL == str2_isL) { // LL or UU
beq(str1, str2, DONE);
int base_offset = isLL ? base_offset1 : base_offset2;
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
mv(t0, minCharsInWord / 2);
ble(cnt2, t0, SHORT_STRING);
lwu(tmp1, Address(str1));
lwu(tmp2, Address(str2));
bne(tmp1, tmp2, DIFFERENCE);
addi(str1, str1, 4);
addi(str2, str2, 4);
subi(cnt2, cnt2, minCharsInWord / 2);
}
}
// A very short string
mv(t0, minCharsInWord);
ble(cnt2, t0, SHORT_STRING);
// Compare longwords
// load first parts of strings and finish initialization while loading
{
if (str1_isL == str2_isL) { // LL or UU
#ifdef ASSERT
if (AvoidUnalignedAccesses) {
Label align_ok;
orr(t0, str1, str2);
andi(t0, t0, 0x7);
beqz(t0, align_ok);
stop("bad alignment");
bind(align_ok);
}
#endif
// load 8 bytes once to compare
ld(tmp1, Address(str1));
ld(tmp2, Address(str2));
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, STUB);
subi(cnt2, cnt2, minCharsInWord);
beqz(cnt2, TAIL_CHECK);
// convert cnt2 from characters to bytes
if (!str1_isL) {
slli(cnt2, cnt2, 1);
}
add(str2, str2, cnt2);
add(str1, str1, cnt2);
sub(cnt2, zr, cnt2);
} else if (isLU) { // LU case
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, STUB);
lwu(tmp1, Address(str1));
load_long_misaligned(tmp2, Address(str2), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
subi(cnt2, cnt2, 4);
add(str1, str1, cnt2);
sub(cnt1, zr, cnt2);
slli(cnt2, cnt2, 1);
add(str2, str2, cnt2);
inflate_lo32(tmp3, tmp1);
mv(tmp1, tmp3);
sub(cnt2, zr, cnt2);
addi(cnt1, cnt1, 4);
} else { // UL case
mv(t0, STUB_THRESHOLD);
bge(cnt2, t0, STUB);
load_long_misaligned(tmp1, Address(str1), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
lwu(tmp2, Address(str2));
subi(cnt2, cnt2, 4);
slli(t0, cnt2, 1);
sub(cnt1, zr, t0);
add(str1, str1, t0);
add(str2, str2, cnt2);
inflate_lo32(tmp3, tmp2);
mv(tmp2, tmp3);
sub(cnt2, zr, cnt2);
addi(cnt1, cnt1, 8);
string_compare_long_same_encoding(result,
str1, str2, isLL,
cnt1, cnt2, tmp1, tmp2, tmp3,
STUB_THRESHOLD, &STUB, &SHORT_STRING, &DONE);
} else { // LU or UL
string_compare_long_different_encoding(result,
str1, str2, isLU,
cnt1, cnt2, tmp1, tmp2, tmp3,
STUB_THRESHOLD, &STUB, &DONE);
}
addi(cnt2, cnt2, isUL ? 4 : 8);
bne(tmp1, tmp2, DIFFERENCE);
bgez(cnt2, TAIL);
// main loop
bind(NEXT_WORD);
if (str1_isL == str2_isL) { // LL or UU
// 8-byte aligned loads when AvoidUnalignedAccesses is enabled
add(t0, str1, cnt2);
ld(tmp1, Address(t0));
add(t0, str2, cnt2);
ld(tmp2, Address(t0));
addi(cnt2, cnt2, 8);
} else if (isLU) { // LU case
add(t0, str1, cnt1);
lwu(tmp1, Address(t0));
add(t0, str2, cnt2);
load_long_misaligned(tmp2, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
addi(cnt1, cnt1, 4);
inflate_lo32(tmp3, tmp1);
mv(tmp1, tmp3);
addi(cnt2, cnt2, 8);
} else { // UL case
add(t0, str2, cnt2);
lwu(tmp2, Address(t0));
add(t0, str1, cnt1);
load_long_misaligned(tmp1, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
inflate_lo32(tmp3, tmp2);
mv(tmp2, tmp3);
addi(cnt1, cnt1, 8);
addi(cnt2, cnt2, 4);
}
bne(tmp1, tmp2, DIFFERENCE);
bltz(cnt2, NEXT_WORD);
bind(TAIL);
if (str1_isL == str2_isL) { // LL or UU
load_long_misaligned(tmp1, Address(str1), tmp3, isLL ? 1 : 2);
load_long_misaligned(tmp2, Address(str2), tmp3, isLL ? 1 : 2);
} else if (isLU) { // LU case
load_int_misaligned(tmp1, Address(str1), tmp3, false);
load_long_misaligned(tmp2, Address(str2), tmp3, 2);
inflate_lo32(tmp3, tmp1);
mv(tmp1, tmp3);
} else { // UL case
load_int_misaligned(tmp2, Address(str2), tmp3, false);
load_long_misaligned(tmp1, Address(str1), tmp3, 2);
inflate_lo32(tmp3, tmp2);
mv(tmp2, tmp3);
}
bind(TAIL_CHECK);
beq(tmp1, tmp2, DONE);
// Find the first different characters in the longwords and
// compute their difference.
bind(DIFFERENCE);
xorr(tmp3, tmp1, tmp2);
// count bits of trailing zero chars
ctzc_bits(result, tmp3, isLL);
srl(tmp1, tmp1, result);
srl(tmp2, tmp2, result);
if (isLL) {
zext(tmp1, tmp1, 8);
zext(tmp2, tmp2, 8);
} else {
zext(tmp1, tmp1, 16);
zext(tmp2, tmp2, 16);
}
sub(result, tmp1, tmp2);
j(DONE);
}
bind(STUB);
@ -2636,7 +2665,7 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
int minCharsInWord = encLL ? wordSize : wordSize / 2;
BLOCK_COMMENT("string_compare {");
BLOCK_COMMENT("string_compare_v {");
// for Latin strings, 1 byte for 1 character
// for UTF16 strings, 2 bytes for 1 character
@ -2696,6 +2725,8 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
sub(result, tmp1, tmp2);
bind(DONE);
BLOCK_COMMENT("} string_compare_v");
}
void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {

View File

@ -39,6 +39,15 @@
VectorRegister vrs,
bool is_latin, Label& DONE, Assembler::LMUL lmul);
void string_compare_long_same_encoding(Register result, Register str1, Register str2,
const bool isLL, Register cnt1, Register cnt2,
Register tmp1, Register tmp2, Register tmp3,
const int STUB_THRESHOLD, Label *STUB, Label *SHORT_STRING, Label *DONE);
void string_compare_long_different_encoding(Register result, Register str1, Register str2,
bool isLU, Register cnt1, Register cnt2,
Register tmp1, Register tmp2, Register tmp3,
const int STUB_THRESHOLD, Label *STUB, Label *DONE);
public:
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
void fast_lock(Register object, Register box,

View File

@ -2431,23 +2431,6 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
// code for comparing 16 bytes of strings with same encoding
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
__ ld(tmp5, Address(str1));
__ addi(str1, str1, 8);
__ xorr(tmp4, tmp1, tmp2);
__ ld(cnt1, Address(str2));
__ addi(str2, str2, 8);
__ bnez(tmp4, DIFF1);
__ ld(tmp1, Address(str1));
__ addi(str1, str1, 8);
__ xorr(tmp4, tmp5, cnt1);
__ ld(tmp2, Address(str2));
__ addi(str2, str2, 8);
__ bnez(tmp4, DIFF2);
}
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
void compare_string_8_x_LU(Register tmpL, Register tmpU,
Register strL, Register strU, Label& DIFF) {
@ -2702,7 +2685,20 @@ class StubGenerator: public StubCodeGenerator {
__ push_reg(spilled_regs, sp);
__ bltz(cnt2, TAIL);
__ bind(SMALL_LOOP);
compare_string_16_bytes_same(DIFF, DIFF2);
// compare 16 bytes of strings with same encoding
__ ld(tmp5, Address(str1));
__ addi(str1, str1, 8);
__ xorr(tmp4, tmp1, tmp2);
__ ld(cnt1, Address(str2));
__ addi(str2, str2, 8);
__ bnez(tmp4, DIFF);
__ ld(tmp1, Address(str1));
__ addi(str1, str1, 8);
__ xorr(tmp4, tmp5, cnt1);
__ ld(tmp2, Address(str2));
__ addi(str2, str2, 8);
__ bnez(tmp4, DIFF2);
__ subi(cnt2, cnt2, isLL ? 16 : 8);
__ bgez(cnt2, SMALL_LOOP);
__ bind(TAIL);

View File

@ -160,6 +160,7 @@ public class TestStringIntrinsics {
// Different lengths
invokeAndCheck(m, 1, "ABCD", "ABC");
invokeAndCheck(m, '\uff21' - 'A', "ABCEFGHIJKLMNOPQRSTUVWXY\uff21Z", "ABCEFGHIJKLMNOPQRSTUVWXYAZ");
invokeAndCheck(m, -1, "\uff21\uff22\uff23", "\uff21\uff22\uff23\uff24");
invokeAndCheck(m, 1, "ABC\uff24", "ABC");
invokeAndCheck(m, 3, "ABC\uff24\uff25\uff26", "ABC");