8354674: AArch64: Intrinsify Unsafe::setMemory

Reviewed-by: adinn
This commit is contained in:
Andrew Haley 2025-05-16 09:28:35 +00:00
parent a8d8ffa8ad
commit a6ebcf61eb
3 changed files with 216 additions and 1 deletions

View File

@ -2566,6 +2566,123 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_unsafecopy_common_error_exit() {
address start_pc = __ pc();
__ leave();
__ mov(r0, 0);
__ ret(lr);
return start_pc;
}
//
// Generate 'unsafe' set memory stub
// Though just as safe as the other stubs, it takes an unscaled
// size_t (# bytes) argument instead of an element count.
//
// This fill operation is atomicity preserving: as long as the
// address supplied is sufficiently aligned, all writes of up to 64
// bits in size are single-copy atomic.
//
// Input:
// c_rarg0 - destination array address
// c_rarg1 - byte count (size_t)
// c_rarg2 - byte value
//
address generate_unsafe_setmemory() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id);
address start = __ pc();
Register dest = c_rarg0, count = c_rarg1, value = c_rarg2;
Label tail;
UnsafeMemoryAccessMark umam(this, true, false);
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ dup(v0, __ T16B, value);
if (AvoidUnalignedAccesses) {
__ cmp(count, (u1)16);
__ br(__ LO, tail);
__ mov(rscratch1, 16);
__ andr(rscratch2, dest, 15);
__ sub(rscratch1, rscratch1, rscratch2); // Bytes needed to 16-align dest
__ strq(v0, Address(dest));
__ sub(count, count, rscratch1);
__ add(dest, dest, rscratch1);
}
__ subs(count, count, (u1)64);
__ br(__ LO, tail);
{
Label again;
__ bind(again);
__ stpq(v0, v0, Address(dest));
__ stpq(v0, v0, Address(dest, 32));
__ subs(count, count, 64);
__ add(dest, dest, 64);
__ br(__ HS, again);
}
__ bind(tail);
// The count of bytes is off by 64, but we don't need to correct
// it because we're only going to use the least-significant few
// count bits from here on.
// __ add(count, count, 64);
{
Label dont;
__ tbz(count, exact_log2(32), dont);
__ stpq(v0, v0, __ post(dest, 32));
__ bind(dont);
}
{
Label dont;
__ tbz(count, exact_log2(16), dont);
__ strq(v0, __ post(dest, 16));
__ bind(dont);
}
{
Label dont;
__ tbz(count, exact_log2(8), dont);
__ strd(v0, __ post(dest, 8));
__ bind(dont);
}
Label finished;
__ tst(count, 7);
__ br(__ EQ, finished);
{
Label dont;
__ tbz(count, exact_log2(4), dont);
__ strs(v0, __ post(dest, 4));
__ bind(dont);
}
{
Label dont;
__ tbz(count, exact_log2(2), dont);
__ bfi(value, value, 8, 8);
__ strh(value, __ post(dest, 2));
__ bind(dont);
}
{
Label dont;
__ tbz(count, exact_log2(1), dont);
__ strb(value, Address(dest));
__ bind(dont);
}
__ bind(finished);
__ leave();
__ ret(lr);
return start;
}
address generate_data_cache_writeback() {
const Register line = c_rarg0; // address of line to write back
@ -2615,6 +2732,9 @@ class StubGenerator: public StubCodeGenerator {
address entry_jlong_arraycopy;
address entry_checkcast_arraycopy;
address ucm_common_error_exit = generate_unsafecopy_common_error_exit();
UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit);
generate_copy_longs(StubGenStubId::copy_byte_f_id, IN_HEAP | IS_ARRAY, copy_f, r0, r1, r15);
generate_copy_longs(StubGenStubId::copy_byte_b_id, IN_HEAP | IS_ARRAY, copy_b, r0, r1, r15);
@ -11259,6 +11379,8 @@ class StubGenerator: public StubCodeGenerator {
}
#endif
StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory();
StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated
}

View File

@ -53,7 +53,7 @@ public final class SegmentBulkOperations {
// All the threshold values below MUST be a power of two and should preferably be
// greater or equal to 2^3.
private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", Architecture.isAARCH64() ? 18 : 5);
private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", 5);
private static final int NATIVE_THRESHOLD_MISMATCH = powerOfPropertyOr("mismatch", 6);
private static final int NATIVE_THRESHOLD_COPY = powerOfPropertyOr("copy", 6);

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang.foreign;
import jdk.internal.misc.Unsafe;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Setup;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(org.openjdk.jmh.annotations.Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = {"--enable-native-access=ALL-UNNAMED", "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED"})
public class MemorySegmentFillUnsafe {
static final Unsafe UNSAFE = Utils.unsafe;
long src;
@Param({"1", "2", "3", "4", "5", "6", "7", "8", "15", "16", "63", "64", "255", "256"})
public int size;
@Param({"true", "false"})
public boolean aligned;
private MemorySegment segment;
private long address;
@Setup
public void setup() throws Throwable {
Arena arena = Arena.global();
long alignment = 1;
// this complex logic is to ensure that if in the future we decide to batch writes with different
// batches based on alignment, we would spot it here
if (size == 2 || size == 3) {
alignment = 2;
} else if (size >= 4 && size <= 7) {
alignment = 4;
} else {
alignment = 8;
}
if (aligned) {
segment = arena.allocate(size, alignment);
} else {
// forcibly misaligned in both address AND size, given that would be the worst case
segment = arena.allocate(size + 1, alignment).asSlice(1);
}
address = segment.address();
}
@Benchmark
public void panama() {
segment.fill((byte) 99);
}
@Benchmark
public void unsafe() {
UNSAFE.setMemory(address, size, (byte) 99);
}
}