From 43787890291d71de61b28b8a4e3bf9aaba46757a Mon Sep 17 00:00:00 2001 From: Albert Mingkun Yang Date: Fri, 5 Dec 2025 19:17:45 +0000 Subject: [PATCH] 8373145: [BACKOUT] Remove ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch Reviewed-by: mdoerr, kvn --- src/hotspot/cpu/ppc/ppc.ad | 29 +++++++++++++++++++ .../gc/shared/threadLocalAllocBuffer.cpp | 28 +++++++++++++++++- .../gc/shared/threadLocalAllocBuffer.hpp | 1 + src/hotspot/share/opto/macro.cpp | 3 +- src/hotspot/share/runtime/vmStructs.cpp | 1 + .../runtime/ThreadLocalAllocBuffer.java | 3 +- .../classes/sun/jvm/hotspot/runtime/VM.java | 7 +++++ 7 files changed, 69 insertions(+), 3 deletions(-) diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index aa00609094e..2a0a9149bb3 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -6335,8 +6335,36 @@ instruct loadConD_Ex(regD dst, immD src) %{ // Prefetch instructions. // Must be safe to execute with invalid address (cannot fault). +// Special prefetch versions which use the dcbz instruction. +instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{ + match(PrefetchAllocation (AddP mem src)); + predicate(AllocatePrefetchStyle == 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %} + size(4); + ins_encode %{ + __ dcbz($src$$Register, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{ + match(PrefetchAllocation mem); + predicate(AllocatePrefetchStyle == 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %} + size(4); + ins_encode %{ + __ dcbz($mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{ match(PrefetchAllocation (AddP mem src)); + predicate(AllocatePrefetchStyle != 3); ins_cost(MEMORY_REF_COST); format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %} @@ -6349,6 +6377,7 @@ instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{ instruct prefetch_alloc_no_offset(indirectMemory mem) %{ match(PrefetchAllocation mem); + predicate(AllocatePrefetchStyle != 3); ins_cost(MEMORY_REF_COST); format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %} diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp index 2181e089663..9635ed4d0cb 100644 --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp @@ -37,6 +37,7 @@ #include "utilities/copy.hpp" size_t ThreadLocalAllocBuffer::_max_size = 0; +int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0; unsigned int ThreadLocalAllocBuffer::_target_refills = 0; ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() : @@ -224,6 +225,30 @@ void ThreadLocalAllocBuffer::startup_initialization() { // abort during VM initialization. _target_refills = MAX2(_target_refills, 2U); +#ifdef COMPILER2 + // If the C2 compiler is present, extra space is needed at the end of + // TLABs, otherwise prefetching instructions generated by the C2 + // compiler will fault (due to accessing memory outside of heap). + // The amount of space is the max of the number of lines to + // prefetch for array and for instance allocations. (Extra space must be + // reserved to accommodate both types of allocations.) + // + // Only SPARC-specific BIS instructions are known to fault. (Those + // instructions are generated if AllocatePrefetchStyle==3 and + // AllocatePrefetchInstr==1). To be on the safe side, however, + // extra space is reserved for all combinations of + // AllocatePrefetchStyle and AllocatePrefetchInstr. + // + // If the C2 compiler is not present, no space is reserved. + + // +1 for rounding up to next cache line, +1 to be safe + if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) { + int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2; + _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) / + (int)HeapWordSize; + } +#endif + // During jvm startup, the main thread is initialized // before the heap is initialized. So reinitialize it now. guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread"); @@ -429,7 +454,8 @@ void ThreadLocalAllocStats::publish() { } size_t ThreadLocalAllocBuffer::end_reserve() { - return CollectedHeap::lab_alignment_reserve(); + size_t reserve_size = CollectedHeap::lab_alignment_reserve(); + return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch); } const HeapWord* ThreadLocalAllocBuffer::start_relaxed() const { diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp index b64fa8d6ad1..59979646395 100644 --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp @@ -58,6 +58,7 @@ private: size_t _allocated_before_last_gc; // total bytes allocated up until the last gc static size_t _max_size; // maximum size of any TLAB + static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB static unsigned _target_refills; // expected number of refills between GCs unsigned _number_of_refills; diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp index 6f2171bbd75..90602bc2b35 100644 --- a/src/hotspot/share/opto/macro.cpp +++ b/src/hotspot/share/opto/macro.cpp @@ -1914,7 +1914,8 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false, transform_later(cache_adr); cache_adr = new CastP2XNode(needgc_false, cache_adr); transform_later(cache_adr); - // Address is aligned to execute prefetch to the beginning of cache line size. + // Address is aligned to execute prefetch to the beginning of cache line size + // (it is important when BIS instruction is used on SPARC as prefetch). Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1)); cache_adr = new AndXNode(cache_adr, mask); transform_later(cache_adr); diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 50748fd7e45..4ecc8f9ca01 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -353,6 +353,7 @@ nonstatic_field(ThreadLocalAllocBuffer, _pf_top, HeapWord*) \ nonstatic_field(ThreadLocalAllocBuffer, _desired_size, size_t) \ nonstatic_field(ThreadLocalAllocBuffer, _refill_waste_limit, size_t) \ + static_field(ThreadLocalAllocBuffer, _reserve_for_allocation_prefetch, int) \ static_field(ThreadLocalAllocBuffer, _target_refills, unsigned) \ nonstatic_field(ThreadLocalAllocBuffer, _number_of_refills, unsigned) \ nonstatic_field(ThreadLocalAllocBuffer, _refill_waste, unsigned) \ diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java index e23e63806bd..1dc67330d3d 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java @@ -76,9 +76,10 @@ public class ThreadLocalAllocBuffer extends VMObject { private long endReserve() { long labAlignmentReserve = VM.getVM().getLabAlignmentReserve(); + long reserveForAllocationPrefetch = VM.getVM().getReserveForAllocationPrefetch(); long heapWordSize = VM.getVM().getHeapWordSize(); - return labAlignmentReserve * heapWordSize; + return Math.max(labAlignmentReserve, reserveForAllocationPrefetch) * heapWordSize; } /** Support for iteration over heap -- not sure how this will diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java index 1607563150a..dc27a4fc59e 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java @@ -123,6 +123,7 @@ public class VM { private int invocationEntryBCI; private ReversePtrs revPtrs; private VMRegImpl vmregImpl; + private int reserveForAllocationPrefetch; private int labAlignmentReserve; // System.getProperties from debuggee VM @@ -446,6 +447,8 @@ public class VM { boolType = (CIntegerType) db.lookupType("bool"); Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer"); + CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch"); + reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType); Type collectedHeap = db.lookupType("CollectedHeap"); CIntegerField labAlignmentReserveField = collectedHeap.getCIntegerField("_lab_alignment_reserve"); @@ -912,6 +915,10 @@ public class VM { return vmInternalInfo; } + public int getReserveForAllocationPrefetch() { + return reserveForAllocationPrefetch; + } + public int getLabAlignmentReserve() { return labAlignmentReserve; }