diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index beaa2510423..1b6f6d489f3 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -2566,6 +2566,123 @@ class StubGenerator: public StubCodeGenerator { return start; } + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + __ leave(); + __ mov(r0, 0); + __ ret(lr); + return start_pc; + } + + // + // Generate 'unsafe' set memory stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t (# bytes) argument instead of an element count. + // + // This fill operation is atomicity preserving: as long as the + // address supplied is sufficiently aligned, all writes of up to 64 + // bits in size are single-copy atomic. + // + // Input: + // c_rarg0 - destination array address + // c_rarg1 - byte count (size_t) + // c_rarg2 - byte value + // + address generate_unsafe_setmemory() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id); + address start = __ pc(); + + Register dest = c_rarg0, count = c_rarg1, value = c_rarg2; + Label tail; + + UnsafeMemoryAccessMark umam(this, true, false); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ dup(v0, __ T16B, value); + + if (AvoidUnalignedAccesses) { + __ cmp(count, (u1)16); + __ br(__ LO, tail); + + __ mov(rscratch1, 16); + __ andr(rscratch2, dest, 15); + __ sub(rscratch1, rscratch1, rscratch2); // Bytes needed to 16-align dest + __ strq(v0, Address(dest)); + __ sub(count, count, rscratch1); + __ add(dest, dest, rscratch1); + } + + __ subs(count, count, (u1)64); + __ br(__ LO, tail); + { + Label again; + __ bind(again); + __ stpq(v0, v0, Address(dest)); + __ stpq(v0, v0, Address(dest, 32)); + + __ subs(count, count, 64); + __ add(dest, dest, 64); + __ br(__ HS, again); + } + + __ bind(tail); + // The count of bytes is off by 64, but we don't need to correct + // it because we're only going to use the least-significant few + // count bits from here on. + // __ add(count, count, 64); + + { + Label dont; + __ tbz(count, exact_log2(32), dont); + __ stpq(v0, v0, __ post(dest, 32)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(16), dont); + __ strq(v0, __ post(dest, 16)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(8), dont); + __ strd(v0, __ post(dest, 8)); + __ bind(dont); + } + + Label finished; + __ tst(count, 7); + __ br(__ EQ, finished); + + { + Label dont; + __ tbz(count, exact_log2(4), dont); + __ strs(v0, __ post(dest, 4)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(2), dont); + __ bfi(value, value, 8, 8); + __ strh(value, __ post(dest, 2)); + __ bind(dont); + } + { + Label dont; + __ tbz(count, exact_log2(1), dont); + __ strb(value, Address(dest)); + __ bind(dont); + } + + __ bind(finished); + __ leave(); + __ ret(lr); + + return start; + } + address generate_data_cache_writeback() { const Register line = c_rarg0; // address of line to write back @@ -2615,6 +2732,9 @@ class StubGenerator: public StubCodeGenerator { address entry_jlong_arraycopy; address entry_checkcast_arraycopy; + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit); + generate_copy_longs(StubGenStubId::copy_byte_f_id, IN_HEAP | IS_ARRAY, copy_f, r0, r1, r15); generate_copy_longs(StubGenStubId::copy_byte_b_id, IN_HEAP | IS_ARRAY, copy_b, r0, r1, r15); @@ -11259,6 +11379,8 @@ class StubGenerator: public StubCodeGenerator { } #endif + StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory(); + StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated } diff --git a/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java b/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java index 5f0794c81d9..76513c6772c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java +++ b/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java @@ -53,7 +53,7 @@ public final class SegmentBulkOperations { // All the threshold values below MUST be a power of two and should preferably be // greater or equal to 2^3. - private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", Architecture.isAARCH64() ? 18 : 5); + private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", 5); private static final int NATIVE_THRESHOLD_MISMATCH = powerOfPropertyOr("mismatch", 6); private static final int NATIVE_THRESHOLD_COPY = powerOfPropertyOr("copy", 6); diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentFillUnsafe.java b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentFillUnsafe.java new file mode 100644 index 00000000000..18857d4657e --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentFillUnsafe.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + + package org.openjdk.bench.java.lang.foreign; + +import jdk.internal.misc.Unsafe; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Setup; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; + +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(org.openjdk.jmh.annotations.Scope.Thread) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value = 3, jvmArgs = {"--enable-native-access=ALL-UNNAMED", "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED"}) +public class MemorySegmentFillUnsafe { + + static final Unsafe UNSAFE = Utils.unsafe; + long src; + + @Param({"1", "2", "3", "4", "5", "6", "7", "8", "15", "16", "63", "64", "255", "256"}) + public int size; + + @Param({"true", "false"}) + public boolean aligned; + + private MemorySegment segment; + private long address; + + @Setup + public void setup() throws Throwable { + Arena arena = Arena.global(); + long alignment = 1; + // this complex logic is to ensure that if in the future we decide to batch writes with different + // batches based on alignment, we would spot it here + if (size == 2 || size == 3) { + alignment = 2; + } else if (size >= 4 && size <= 7) { + alignment = 4; + } else { + alignment = 8; + } + if (aligned) { + segment = arena.allocate(size, alignment); + } else { + // forcibly misaligned in both address AND size, given that would be the worst case + segment = arena.allocate(size + 1, alignment).asSlice(1); + } + address = segment.address(); + } + + @Benchmark + public void panama() { + segment.fill((byte) 99); + } + + @Benchmark + public void unsafe() { + UNSAFE.setMemory(address, size, (byte) 99); + } +}