8153713: aarch64: improve short array clearing using store pair

Aarch64: generate store pair instruction to clear short arrays

Reviewed-by: aph
This commit is contained in:
Fei Yang 2016-04-12 11:53:44 +08:00
parent 63403a410e
commit 8215de72f5
4 changed files with 50 additions and 1 deletions

View File

@ -13321,6 +13321,20 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_pipe(pipe_class_memory);
%}
instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
%{
match(Set dummy (ClearArray cnt base));
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode %{
__ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
%}
ins_pipe(pipe_class_memory);
%}
// ============================================================================
// Overflow Math Instructions

View File

@ -76,7 +76,8 @@ define_pd_global(bool, CompactStrings, false);
// avoid biased locking while we are bootstrapping the aarch64 build
define_pd_global(bool, UseBiasedLocking, false);
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
// Clear short arrays bigger than one word in an arch-specific way
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
#if defined(COMPILER1) || defined(COMPILER2)
define_pd_global(intx, InlineSmallCode, 1000);

View File

@ -4677,6 +4677,39 @@ void MacroAssembler::zero_words(Register base, Register cnt)
fill_words(base, cnt, zr);
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Immediate count in 8-byte unit.
#define ShortArraySize (18 * BytesPerLong)
void MacroAssembler::zero_words(Register base, u_int64_t cnt)
{
int i = cnt & 1; // store any odd word to start
if (i) str(zr, Address(base));
if (cnt <= ShortArraySize / BytesPerLong) {
for (; i < (int)cnt; i += 2)
stp(zr, zr, Address(base, i * wordSize));
} else {
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
int remainder = cnt % (2 * unroll);
for (; i < remainder; i += 2)
stp(zr, zr, Address(base, i * wordSize));
Label loop;
Register cnt_reg = rscratch1;
Register loop_base = rscratch2;
cnt = cnt - remainder;
mov(cnt_reg, cnt);
// adjust base and prebias by -2 * wordSize so we can pre-increment
add(loop_base, base, (remainder - 2) * wordSize);
bind(loop);
sub(cnt_reg, cnt_reg, 2 * unroll);
for (i = 1; i < unroll; i++)
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
cbnz(cnt_reg, loop);
}
}
// base: Address of a buffer to be filled, 8 bytes aligned.
// cnt: Count in 8-byte unit.
// value: Value to be filled with.

View File

@ -1186,6 +1186,7 @@ public:
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, Register cnt);
void zero_words(Register base, u_int64_t cnt);
void encode_iso_array(Register src, Register dst,
Register len, Register result,