From aefdbdc7e54ae92b5c2113504ce17abf00681e62 Mon Sep 17 00:00:00 2001 From: Robbin Ehn Date: Tue, 27 Aug 2024 08:42:06 +0000 Subject: [PATCH] 8338727: RISC-V: Avoid synthetic data dependency in nmethod barrier on Ztso Reviewed-by: mli, fyang --- .../gc/shared/barrierSetAssembler_riscv.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp index efdf3765965..d96d405aa22 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -265,7 +265,7 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo } case NMethodPatchingType::conc_instruction_and_data_patch: { - // If we patch code we need both a code patching and a loadload + // If we patch code we need both a cmodx fence and a loadload // fence. It's not super cheap, so we use a global epoch mechanism // to hide them in a slow path. // The high level idea of the global epoch mechanism is to detect @@ -273,11 +273,19 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo // last nmethod was disarmed. This implies that the required // fencing has been performed for all preceding nmethod disarms // as well. Therefore, we do not need any further fencing. + __ la(t1, ExternalAddress((address)&_patching_epoch)); - // Embed an artificial data dependency to order the guard load - // before the epoch load. - __ srli(ra, t0, 32); - __ orr(t1, t1, ra); + if (!UseZtso) { + // Embed a synthetic data dependency between the load of the guard and + // the load of the epoch. This guarantees that these loads occur in + // order, while allowing other independent instructions to be reordered. + // Note: This may be slower than using a membar(load|load) (fence r,r). + // Because processors will not start the second load until the first comes back. + // This means you can’t overlap the two loads, + // which is stronger than needed for ordering (stronger than TSO). + __ srli(ra, t0, 32); + __ orr(t1, t1, ra); + } // Read the global epoch value. __ lwu(t1, t1); // Combine the guard value (low order) with the epoch value (high order).