mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-04 07:28:22 +00:00
8350095: RISC-V: Refactor string_compare
Reviewed-by: fyang
This commit is contained in:
parent
f53de9208c
commit
e470f474ee
@ -1382,15 +1382,183 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
|
||||
bind(DONE);
|
||||
}
|
||||
|
||||
// Compare longwords
|
||||
void C2_MacroAssembler::string_compare_long_same_encoding(Register result, Register str1, Register str2,
|
||||
const bool isLL, Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
const int STUB_THRESHOLD, Label *STUB, Label *SHORT_STRING, Label *DONE) {
|
||||
Label TAIL_CHECK, TAIL, NEXT_WORD, DIFFERENCE;
|
||||
|
||||
const int base_offset = isLL ? arrayOopDesc::base_offset_in_bytes(T_BYTE)
|
||||
: arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
assert((base_offset % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
const int minCharsInWord = isLL ? wordSize : wordSize / 2;
|
||||
|
||||
// load first parts of strings and finish initialization while loading
|
||||
beq(str1, str2, *DONE);
|
||||
// Alignment
|
||||
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
|
||||
lwu(tmp1, Address(str1));
|
||||
lwu(tmp2, Address(str2));
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
addi(str1, str1, 4);
|
||||
addi(str2, str2, 4);
|
||||
subi(cnt2, cnt2, minCharsInWord / 2);
|
||||
|
||||
// A very short string
|
||||
mv(t0, minCharsInWord);
|
||||
ble(cnt2, t0, *SHORT_STRING);
|
||||
}
|
||||
#ifdef ASSERT
|
||||
if (AvoidUnalignedAccesses) {
|
||||
Label align_ok;
|
||||
orr(t0, str1, str2);
|
||||
andi(t0, t0, 0x7);
|
||||
beqz(t0, align_ok);
|
||||
stop("bad alignment");
|
||||
bind(align_ok);
|
||||
}
|
||||
#endif
|
||||
// load 8 bytes once to compare
|
||||
ld(tmp1, Address(str1));
|
||||
ld(tmp2, Address(str2));
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, *STUB);
|
||||
subi(cnt2, cnt2, minCharsInWord);
|
||||
beqz(cnt2, TAIL_CHECK);
|
||||
// convert cnt2 from characters to bytes
|
||||
if (!isLL) {
|
||||
slli(cnt2, cnt2, 1);
|
||||
}
|
||||
add(str2, str2, cnt2);
|
||||
add(str1, str1, cnt2);
|
||||
sub(cnt2, zr, cnt2);
|
||||
addi(cnt2, cnt2, 8);
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
bgez(cnt2, TAIL);
|
||||
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
// 8-byte aligned loads when AvoidUnalignedAccesses is enabled
|
||||
add(t0, str1, cnt2);
|
||||
ld(tmp1, Address(t0));
|
||||
add(t0, str2, cnt2);
|
||||
ld(tmp2, Address(t0));
|
||||
addi(cnt2, cnt2, 8);
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
bltz(cnt2, NEXT_WORD);
|
||||
|
||||
bind(TAIL);
|
||||
load_long_misaligned(tmp1, Address(str1), tmp3, isLL ? 1 : 2);
|
||||
load_long_misaligned(tmp2, Address(str2), tmp3, isLL ? 1 : 2);
|
||||
|
||||
bind(TAIL_CHECK);
|
||||
beq(tmp1, tmp2, *DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
bind(DIFFERENCE);
|
||||
xorr(tmp3, tmp1, tmp2);
|
||||
// count bits of trailing zero chars
|
||||
ctzc_bits(result, tmp3, isLL);
|
||||
srl(tmp1, tmp1, result);
|
||||
srl(tmp2, tmp2, result);
|
||||
if (isLL) {
|
||||
zext(tmp1, tmp1, 8);
|
||||
zext(tmp2, tmp2, 8);
|
||||
} else {
|
||||
zext(tmp1, tmp1, 16);
|
||||
zext(tmp2, tmp2, 16);
|
||||
}
|
||||
sub(result, tmp1, tmp2);
|
||||
|
||||
j(*DONE);
|
||||
}
|
||||
|
||||
// Compare longwords
|
||||
void C2_MacroAssembler::string_compare_long_different_encoding(Register result, Register str1, Register str2,
|
||||
bool isLU, Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
const int STUB_THRESHOLD, Label *STUB, Label *DONE) {
|
||||
Label TAIL, NEXT_WORD, DIFFERENCE;
|
||||
|
||||
const int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
assert((base_offset % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
Register strL = isLU ? str1 : str2;
|
||||
Register strU = isLU ? str2 : str1;
|
||||
Register tmpL = tmp1, tmpU = tmp2;
|
||||
|
||||
// load first parts of strings and finish initialization while loading
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, *STUB);
|
||||
lwu(tmpL, Address(strL));
|
||||
load_long_misaligned(tmpU, Address(strU), tmp3, (base_offset % 8) != 0 ? 4 : 8);
|
||||
subi(cnt2, cnt2, 4);
|
||||
add(strL, strL, cnt2);
|
||||
sub(cnt1, zr, cnt2);
|
||||
slli(cnt2, cnt2, 1);
|
||||
add(strU, strU, cnt2);
|
||||
inflate_lo32(tmp3, tmpL);
|
||||
mv(tmpL, tmp3);
|
||||
sub(cnt2, zr, cnt2);
|
||||
addi(cnt1, cnt1, 4);
|
||||
addi(cnt2, cnt2, 8);
|
||||
bne(tmpL, tmpU, DIFFERENCE);
|
||||
bgez(cnt2, TAIL);
|
||||
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
add(t0, strL, cnt1);
|
||||
lwu(tmpL, Address(t0));
|
||||
add(t0, strU, cnt2);
|
||||
load_long_misaligned(tmpU, Address(t0), tmp3, (base_offset % 8) != 0 ? 4 : 8);
|
||||
addi(cnt1, cnt1, 4);
|
||||
inflate_lo32(tmp3, tmpL);
|
||||
mv(tmpL, tmp3);
|
||||
addi(cnt2, cnt2, 8);
|
||||
bne(tmpL, tmpU, DIFFERENCE);
|
||||
bltz(cnt2, NEXT_WORD);
|
||||
|
||||
bind(TAIL);
|
||||
load_int_misaligned(tmpL, Address(strL), tmp3, false);
|
||||
load_long_misaligned(tmpU, Address(strU), tmp3, 2);
|
||||
inflate_lo32(tmp3, tmpL);
|
||||
mv(tmpL, tmp3);
|
||||
|
||||
beq(tmpL, tmpU, *DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
bind(DIFFERENCE);
|
||||
xorr(tmp3, tmpL, tmpU);
|
||||
// count bits of trailing zero chars
|
||||
ctzc_bits(result, tmp3);
|
||||
srl(tmpL, tmpL, result);
|
||||
srl(tmpU, tmpU, result);
|
||||
zext(tmpL, tmpL, 16);
|
||||
zext(tmpU, tmpU, 16);
|
||||
if (isLU) {
|
||||
sub(result, tmpL, tmpU);
|
||||
} else {
|
||||
sub(result, tmpU, tmpL);
|
||||
}
|
||||
|
||||
j(*DONE);
|
||||
}
|
||||
|
||||
// Compare strings.
|
||||
void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
int ae)
|
||||
{
|
||||
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
|
||||
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
|
||||
SHORT_LOOP_START, TAIL_CHECK, L;
|
||||
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, STUB,
|
||||
SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
|
||||
SHORT_LOOP_START, L;
|
||||
|
||||
const int STUB_THRESHOLD = 64 + 8;
|
||||
bool isLL = ae == StrIntrinsicNode::LL;
|
||||
@ -1409,14 +1577,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
|
||||
load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
|
||||
|
||||
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
|
||||
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
|
||||
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
|
||||
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
|
||||
|
||||
BLOCK_COMMENT("string_compare {");
|
||||
|
||||
// Bizarrely, the counts are passed in bytes, regardless of whether they
|
||||
@ -1434,154 +1594,23 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
mv(cnt2, cnt1);
|
||||
bind(L);
|
||||
|
||||
// Load 4 bytes once to compare for alignment before main loop. Note that this
|
||||
// is only possible for LL/UU case. We need to resort to load_long_misaligned
|
||||
// for both LU and UL cases.
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
beq(str1, str2, DONE);
|
||||
int base_offset = isLL ? base_offset1 : base_offset2;
|
||||
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
|
||||
mv(t0, minCharsInWord / 2);
|
||||
ble(cnt2, t0, SHORT_STRING);
|
||||
lwu(tmp1, Address(str1));
|
||||
lwu(tmp2, Address(str2));
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
addi(str1, str1, 4);
|
||||
addi(str2, str2, 4);
|
||||
subi(cnt2, cnt2, minCharsInWord / 2);
|
||||
}
|
||||
}
|
||||
|
||||
// A very short string
|
||||
mv(t0, minCharsInWord);
|
||||
ble(cnt2, t0, SHORT_STRING);
|
||||
|
||||
// Compare longwords
|
||||
// load first parts of strings and finish initialization while loading
|
||||
{
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
#ifdef ASSERT
|
||||
if (AvoidUnalignedAccesses) {
|
||||
Label align_ok;
|
||||
orr(t0, str1, str2);
|
||||
andi(t0, t0, 0x7);
|
||||
beqz(t0, align_ok);
|
||||
stop("bad alignment");
|
||||
bind(align_ok);
|
||||
}
|
||||
#endif
|
||||
// load 8 bytes once to compare
|
||||
ld(tmp1, Address(str1));
|
||||
ld(tmp2, Address(str2));
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, STUB);
|
||||
subi(cnt2, cnt2, minCharsInWord);
|
||||
beqz(cnt2, TAIL_CHECK);
|
||||
// convert cnt2 from characters to bytes
|
||||
if (!str1_isL) {
|
||||
slli(cnt2, cnt2, 1);
|
||||
}
|
||||
add(str2, str2, cnt2);
|
||||
add(str1, str1, cnt2);
|
||||
sub(cnt2, zr, cnt2);
|
||||
} else if (isLU) { // LU case
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, STUB);
|
||||
lwu(tmp1, Address(str1));
|
||||
load_long_misaligned(tmp2, Address(str2), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
subi(cnt2, cnt2, 4);
|
||||
add(str1, str1, cnt2);
|
||||
sub(cnt1, zr, cnt2);
|
||||
slli(cnt2, cnt2, 1);
|
||||
add(str2, str2, cnt2);
|
||||
inflate_lo32(tmp3, tmp1);
|
||||
mv(tmp1, tmp3);
|
||||
sub(cnt2, zr, cnt2);
|
||||
addi(cnt1, cnt1, 4);
|
||||
} else { // UL case
|
||||
mv(t0, STUB_THRESHOLD);
|
||||
bge(cnt2, t0, STUB);
|
||||
load_long_misaligned(tmp1, Address(str1), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
lwu(tmp2, Address(str2));
|
||||
subi(cnt2, cnt2, 4);
|
||||
slli(t0, cnt2, 1);
|
||||
sub(cnt1, zr, t0);
|
||||
add(str1, str1, t0);
|
||||
add(str2, str2, cnt2);
|
||||
inflate_lo32(tmp3, tmp2);
|
||||
mv(tmp2, tmp3);
|
||||
sub(cnt2, zr, cnt2);
|
||||
addi(cnt1, cnt1, 8);
|
||||
string_compare_long_same_encoding(result,
|
||||
str1, str2, isLL,
|
||||
cnt1, cnt2, tmp1, tmp2, tmp3,
|
||||
STUB_THRESHOLD, &STUB, &SHORT_STRING, &DONE);
|
||||
} else { // LU or UL
|
||||
string_compare_long_different_encoding(result,
|
||||
str1, str2, isLU,
|
||||
cnt1, cnt2, tmp1, tmp2, tmp3,
|
||||
STUB_THRESHOLD, &STUB, &DONE);
|
||||
}
|
||||
addi(cnt2, cnt2, isUL ? 4 : 8);
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
bgez(cnt2, TAIL);
|
||||
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
// 8-byte aligned loads when AvoidUnalignedAccesses is enabled
|
||||
add(t0, str1, cnt2);
|
||||
ld(tmp1, Address(t0));
|
||||
add(t0, str2, cnt2);
|
||||
ld(tmp2, Address(t0));
|
||||
addi(cnt2, cnt2, 8);
|
||||
} else if (isLU) { // LU case
|
||||
add(t0, str1, cnt1);
|
||||
lwu(tmp1, Address(t0));
|
||||
add(t0, str2, cnt2);
|
||||
load_long_misaligned(tmp2, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
addi(cnt1, cnt1, 4);
|
||||
inflate_lo32(tmp3, tmp1);
|
||||
mv(tmp1, tmp3);
|
||||
addi(cnt2, cnt2, 8);
|
||||
} else { // UL case
|
||||
add(t0, str2, cnt2);
|
||||
lwu(tmp2, Address(t0));
|
||||
add(t0, str1, cnt1);
|
||||
load_long_misaligned(tmp1, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
|
||||
inflate_lo32(tmp3, tmp2);
|
||||
mv(tmp2, tmp3);
|
||||
addi(cnt1, cnt1, 8);
|
||||
addi(cnt2, cnt2, 4);
|
||||
}
|
||||
bne(tmp1, tmp2, DIFFERENCE);
|
||||
bltz(cnt2, NEXT_WORD);
|
||||
bind(TAIL);
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
load_long_misaligned(tmp1, Address(str1), tmp3, isLL ? 1 : 2);
|
||||
load_long_misaligned(tmp2, Address(str2), tmp3, isLL ? 1 : 2);
|
||||
} else if (isLU) { // LU case
|
||||
load_int_misaligned(tmp1, Address(str1), tmp3, false);
|
||||
load_long_misaligned(tmp2, Address(str2), tmp3, 2);
|
||||
inflate_lo32(tmp3, tmp1);
|
||||
mv(tmp1, tmp3);
|
||||
} else { // UL case
|
||||
load_int_misaligned(tmp2, Address(str2), tmp3, false);
|
||||
load_long_misaligned(tmp1, Address(str1), tmp3, 2);
|
||||
inflate_lo32(tmp3, tmp2);
|
||||
mv(tmp2, tmp3);
|
||||
}
|
||||
bind(TAIL_CHECK);
|
||||
beq(tmp1, tmp2, DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
bind(DIFFERENCE);
|
||||
xorr(tmp3, tmp1, tmp2);
|
||||
// count bits of trailing zero chars
|
||||
ctzc_bits(result, tmp3, isLL);
|
||||
srl(tmp1, tmp1, result);
|
||||
srl(tmp2, tmp2, result);
|
||||
if (isLL) {
|
||||
zext(tmp1, tmp1, 8);
|
||||
zext(tmp2, tmp2, 8);
|
||||
} else {
|
||||
zext(tmp1, tmp1, 16);
|
||||
zext(tmp2, tmp2, 16);
|
||||
}
|
||||
sub(result, tmp1, tmp2);
|
||||
j(DONE);
|
||||
}
|
||||
|
||||
bind(STUB);
|
||||
@ -2636,7 +2665,7 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
|
||||
|
||||
int minCharsInWord = encLL ? wordSize : wordSize / 2;
|
||||
|
||||
BLOCK_COMMENT("string_compare {");
|
||||
BLOCK_COMMENT("string_compare_v {");
|
||||
|
||||
// for Latin strings, 1 byte for 1 character
|
||||
// for UTF16 strings, 2 bytes for 1 character
|
||||
@ -2696,6 +2725,8 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
|
||||
sub(result, tmp1, tmp2);
|
||||
|
||||
bind(DONE);
|
||||
|
||||
BLOCK_COMMENT("} string_compare_v");
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
|
||||
|
||||
@ -39,6 +39,15 @@
|
||||
VectorRegister vrs,
|
||||
bool is_latin, Label& DONE, Assembler::LMUL lmul);
|
||||
|
||||
void string_compare_long_same_encoding(Register result, Register str1, Register str2,
|
||||
const bool isLL, Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
const int STUB_THRESHOLD, Label *STUB, Label *SHORT_STRING, Label *DONE);
|
||||
void string_compare_long_different_encoding(Register result, Register str1, Register str2,
|
||||
bool isLU, Register cnt1, Register cnt2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
const int STUB_THRESHOLD, Label *STUB, Label *DONE);
|
||||
|
||||
public:
|
||||
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
|
||||
void fast_lock(Register object, Register box,
|
||||
|
||||
@ -2431,23 +2431,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
// code for comparing 16 bytes of strings with same encoding
|
||||
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
|
||||
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
|
||||
__ ld(tmp5, Address(str1));
|
||||
__ addi(str1, str1, 8);
|
||||
__ xorr(tmp4, tmp1, tmp2);
|
||||
__ ld(cnt1, Address(str2));
|
||||
__ addi(str2, str2, 8);
|
||||
__ bnez(tmp4, DIFF1);
|
||||
__ ld(tmp1, Address(str1));
|
||||
__ addi(str1, str1, 8);
|
||||
__ xorr(tmp4, tmp5, cnt1);
|
||||
__ ld(tmp2, Address(str2));
|
||||
__ addi(str2, str2, 8);
|
||||
__ bnez(tmp4, DIFF2);
|
||||
}
|
||||
|
||||
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
|
||||
void compare_string_8_x_LU(Register tmpL, Register tmpU,
|
||||
Register strL, Register strU, Label& DIFF) {
|
||||
@ -2702,7 +2685,20 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ push_reg(spilled_regs, sp);
|
||||
__ bltz(cnt2, TAIL);
|
||||
__ bind(SMALL_LOOP);
|
||||
compare_string_16_bytes_same(DIFF, DIFF2);
|
||||
// compare 16 bytes of strings with same encoding
|
||||
__ ld(tmp5, Address(str1));
|
||||
__ addi(str1, str1, 8);
|
||||
__ xorr(tmp4, tmp1, tmp2);
|
||||
__ ld(cnt1, Address(str2));
|
||||
__ addi(str2, str2, 8);
|
||||
__ bnez(tmp4, DIFF);
|
||||
__ ld(tmp1, Address(str1));
|
||||
__ addi(str1, str1, 8);
|
||||
__ xorr(tmp4, tmp5, cnt1);
|
||||
__ ld(tmp2, Address(str2));
|
||||
__ addi(str2, str2, 8);
|
||||
__ bnez(tmp4, DIFF2);
|
||||
|
||||
__ subi(cnt2, cnt2, isLL ? 16 : 8);
|
||||
__ bgez(cnt2, SMALL_LOOP);
|
||||
__ bind(TAIL);
|
||||
|
||||
@ -160,6 +160,7 @@ public class TestStringIntrinsics {
|
||||
|
||||
// Different lengths
|
||||
invokeAndCheck(m, 1, "ABCD", "ABC");
|
||||
invokeAndCheck(m, '\uff21' - 'A', "ABCEFGHIJKLMNOPQRSTUVWXY\uff21Z", "ABCEFGHIJKLMNOPQRSTUVWXYAZ");
|
||||
invokeAndCheck(m, -1, "\uff21\uff22\uff23", "\uff21\uff22\uff23\uff24");
|
||||
invokeAndCheck(m, 1, "ABC\uff24", "ABC");
|
||||
invokeAndCheck(m, 3, "ABC\uff24\uff25\uff26", "ABC");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user