mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-23 16:55:09 +00:00
8257436: [aarch64] Regressions in ArrayCopyUnalignedDst.testByte/testChar for 65-78 bytes when UseSIMDForMemoryOps is on
Reviewed-by: simonis
This commit is contained in:
parent
b170c8376d
commit
e836396207
@ -1094,10 +1094,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
Register count, Register tmp, int step) {
|
||||
copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
|
||||
bool is_backwards = step < 0;
|
||||
int granularity = uabs(step);
|
||||
unsigned int granularity = uabs(step);
|
||||
const Register t0 = r3, t1 = r4;
|
||||
|
||||
// <= 96 bytes do inline. Direction doesn't matter because we always
|
||||
// <= 80 (or 96 for SIMD) bytes do inline. Direction doesn't matter because we always
|
||||
// load all the data before writing anything
|
||||
Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
|
||||
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
|
||||
@ -1154,7 +1154,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 0));
|
||||
__ ldpq(v2, v3, Address(s, 32));
|
||||
// Unaligned pointers can be an issue for copying.
|
||||
// The issue has more chances to happen when granularity of data is
|
||||
// less than 4(sizeof(jint)). Pointers for arrays of jint are at least
|
||||
// 4 byte aligned. Pointers for arrays of jlong are 8 byte aligned.
|
||||
// The most performance drop has been seen for the range 65-80 bytes.
|
||||
// For such cases using the pair of ldp/stp instead of the third pair of
|
||||
// ldpq/stpq fixes the performance issue.
|
||||
if (granularity < sizeof (jint)) {
|
||||
Label copy96;
|
||||
__ cmp(count, u1(80/granularity));
|
||||
__ br(Assembler::HI, copy96);
|
||||
__ ldp(t0, t1, Address(send, -16));
|
||||
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(d, 32));
|
||||
__ stp(t0, t1, Address(dend, -16));
|
||||
__ b(finish);
|
||||
|
||||
__ bind(copy96);
|
||||
}
|
||||
__ ldpq(v4, v5, Address(send, -32));
|
||||
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(d, 32));
|
||||
__ stpq(v4, v5, Address(dend, -32));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user