diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 456cd622836..10a5624f056 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -8289,7 +8289,7 @@ instruct membar_acquire() %{ ins_cost(VOLATILE_REF_COST); format %{ "membar_acquire\n\t" - "dmb ish" %} + "dmb ishld" %} ins_encode %{ __ block_comment("membar_acquire"); @@ -8343,11 +8343,12 @@ instruct membar_release() %{ ins_cost(VOLATILE_REF_COST); format %{ "membar_release\n\t" - "dmb ish" %} + "dmb ishst\n\tdmb ishld" %} ins_encode %{ __ block_comment("membar_release"); - __ membar(Assembler::LoadStore|Assembler::StoreStore); + __ membar(Assembler::StoreStore); + __ membar(Assembler::LoadStore); %} ins_pipe(pipe_serial); %} diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index b26eaa4bfcd..73fdbb387e5 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -127,6 +127,8 @@ define_pd_global(intx, InlineSmallCode, 1000); range(1, 99) \ product(ccstr, UseBranchProtection, "none", \ "Branch Protection to use: none, standard, pac-ret") \ + product(bool, AlwaysMergeDMB, false, DIAGNOSTIC, \ + "Always merge DMB instructions in code emission") \ // end of ARCH_FLAGS diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index a3c560b28d3..1c1e2e28942 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -2066,14 +2066,21 @@ void MacroAssembler::membar(Membar_mask_bits order_constraint) { address last = code()->last_insn(); if (last != nullptr && nativeInstruction_at(last)->is_Membar() && prev == last) { NativeMembar *bar = NativeMembar_at(prev); - // We are merging two memory barrier instructions. On AArch64 we - // can do this simply by ORing them together. - bar->set_kind(bar->get_kind() | order_constraint); - BLOCK_COMMENT("merged membar"); - } else { - code()->set_last_insn(pc()); - dmb(Assembler::barrier(order_constraint)); + // Don't promote DMB ST|DMB LD to DMB (a full barrier) because + // doing so would introduce a StoreLoad which the caller did not + // intend + if (AlwaysMergeDMB || bar->get_kind() == order_constraint + || bar->get_kind() == AnyAny + || order_constraint == AnyAny) { + // We are merging two memory barrier instructions. On AArch64 we + // can do this simply by ORing them together. + bar->set_kind(bar->get_kind() | order_constraint); + BLOCK_COMMENT("merged membar"); + return; + } } + code()->set_last_insn(pc()); + dmb(Assembler::barrier(order_constraint)); } bool MacroAssembler::try_merge_ldst(Register rt, const Address &adr, size_t size_in_bytes, bool is_store) { diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index f7fe2f7dec8..baf8ba59476 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -183,6 +183,9 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) { FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true); } + if (FLAG_IS_DEFAULT(AlwaysMergeDMB)) { + FLAG_SET_DEFAULT(AlwaysMergeDMB, true); + } } // Cortex A53 diff --git a/test/micro/org/openjdk/bench/vm/compiler/FinalFieldInitialize.java b/test/micro/org/openjdk/bench/vm/compiler/FinalFieldInitialize.java new file mode 100644 index 00000000000..ee0779faecf --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/FinalFieldInitialize.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024, Alibaba Group Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.*; + +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.infra.Blackhole; + +/* test allocation speed of object with final field */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 5, time = 3, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS) +@Fork(value = 3) +public class FinalFieldInitialize { + final static int LEN = 100_000; + Object arr[] = null; + @Setup + public void setup(){ + arr = new Object[LEN]; + } + + @Benchmark + public void testAlloc(Blackhole bh) { + for (int i=0; i