mirror of
https://github.com/openjdk/jdk.git
synced 2026-04-26 06:41:24 +00:00
8153713: aarch64: improve short array clearing using store pair
Aarch64: generate store pair instruction to clear short arrays Reviewed-by: aph
This commit is contained in:
parent
63403a410e
commit
8215de72f5
@ -13321,6 +13321,20 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ClearArray $cnt, $base" %}
|
||||
|
||||
ins_encode %{
|
||||
__ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
// Overflow Math Instructions
|
||||
|
||||
|
||||
@ -76,7 +76,8 @@ define_pd_global(bool, CompactStrings, false);
|
||||
// avoid biased locking while we are bootstrapping the aarch64 build
|
||||
define_pd_global(bool, UseBiasedLocking, false);
|
||||
|
||||
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
|
||||
// Clear short arrays bigger than one word in an arch-specific way
|
||||
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
|
||||
|
||||
#if defined(COMPILER1) || defined(COMPILER2)
|
||||
define_pd_global(intx, InlineSmallCode, 1000);
|
||||
|
||||
@ -4677,6 +4677,39 @@ void MacroAssembler::zero_words(Register base, Register cnt)
|
||||
fill_words(base, cnt, zr);
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Immediate count in 8-byte unit.
|
||||
#define ShortArraySize (18 * BytesPerLong)
|
||||
void MacroAssembler::zero_words(Register base, u_int64_t cnt)
|
||||
{
|
||||
int i = cnt & 1; // store any odd word to start
|
||||
if (i) str(zr, Address(base));
|
||||
|
||||
if (cnt <= ShortArraySize / BytesPerLong) {
|
||||
for (; i < (int)cnt; i += 2)
|
||||
stp(zr, zr, Address(base, i * wordSize));
|
||||
} else {
|
||||
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
|
||||
int remainder = cnt % (2 * unroll);
|
||||
for (; i < remainder; i += 2)
|
||||
stp(zr, zr, Address(base, i * wordSize));
|
||||
|
||||
Label loop;
|
||||
Register cnt_reg = rscratch1;
|
||||
Register loop_base = rscratch2;
|
||||
cnt = cnt - remainder;
|
||||
mov(cnt_reg, cnt);
|
||||
// adjust base and prebias by -2 * wordSize so we can pre-increment
|
||||
add(loop_base, base, (remainder - 2) * wordSize);
|
||||
bind(loop);
|
||||
sub(cnt_reg, cnt_reg, 2 * unroll);
|
||||
for (i = 1; i < unroll; i++)
|
||||
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
|
||||
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
|
||||
cbnz(cnt_reg, loop);
|
||||
}
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be filled, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
// value: Value to be filled with.
|
||||
|
||||
@ -1186,6 +1186,7 @@ public:
|
||||
|
||||
void fill_words(Register base, Register cnt, Register value);
|
||||
void zero_words(Register base, Register cnt);
|
||||
void zero_words(Register base, u_int64_t cnt);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user