fix performance problem on bfs-dfs mixed mode

This commit is contained in:
tstuefe 2026-01-23 11:16:22 +01:00
parent 599bab2953
commit 73c77ba145
4 changed files with 37 additions and 42 deletions

View File

@ -41,17 +41,6 @@
#include "utilities/stack.inline.hpp"
#ifdef ASSERT
void DFSClosure::log(const char* msg, ...) {
LogTarget(Debug, jfr, system, dfs) lt;
if (lt.is_enabled()) {
LogStream ls(lt);
va_list args;
va_start(args, msg);
ls.vprint(msg, args);
va_end(args);
ls.cr();
}
}
void DFSClosure::log_reference_stack() {
LogTarget(Trace, jfr, system, dfs) lt;
if (lt.is_enabled()) {
@ -87,7 +76,9 @@ void DFSClosure::find_leaks_from_edge(EdgeStore* edge_store,
// Depth-first search, starting from a BFS edge
DFSClosure dfs(edge_store, mark_bits, start_edge);
log_debug(jfr, system, dfs)("DFS: scanning from edge");
start_edge->pointee()->oop_iterate(&dfs);
const UnifiedOopRef ref = start_edge->reference();
const oop obj = ref.dereference();
dfs.probe_stack_push(ref, obj, 0);
dfs.drain_probe_stack();
log_debug(jfr, system, dfs)("DFS: done");
}
@ -114,7 +105,6 @@ void DFSClosure::find_leaks_from_root_set(EdgeStore* edge_store,
rs.process();
dfs.drain_probe_stack();
log_debug(jfr, system, dfs)("DFS: done");
}
// Memory usage of DFS search is dominated by probe stack usage. That
@ -133,10 +123,10 @@ void DFSClosure::find_leaks_from_root_set(EdgeStore* edge_store,
// need to be put on the probe stack (the backward reference points to an
// already marked object).
//
// Hence, with a max_dfs_depth of 3200, we get a typically max probe stack
// depth of <10000, which costs <~160KB - not a problem at all.
// Hence, with a max_dfs_depth of 4000, we get a typical probe stack at
// max depth of ~10000, which costs <~160KB - not a problem at all.
//
// But To be very sure we also limit the probe stack size itself. The max size
// But to be very sure we also limit the probe stack size itself. The max size
// is generous enough to make it very unlikely we ever hit it before hitting
// max_depth. When we hit it, we treat it the same way we treat max_depth -
// we just stop following that particular graph edge.
@ -153,7 +143,9 @@ DFSClosure::DFSClosure(EdgeStore* edge_store, JFRBitSet* mark_bits, const Edge*
_current_depth(0),
_current_chunkindex(0),
_num_objects_processed(0),
_num_sampled_objects_found(0)
_num_sampled_objects_found(0),
_times_max_depth_reached(0),
_times_probe_stack_full(0)
{
}
@ -161,14 +153,23 @@ DFSClosure::~DFSClosure() {
if (!GranularTimer::is_finished()) {
assert(_probe_stack.is_empty(), "We should have drained the probe stack?");
}
log_info(jfr, system, dfs)("DFS: " UINT64_FORMAT " objects processed, " UINT64_FORMAT " sample objects found",
_num_objects_processed, _num_sampled_objects_found);
log_info(jfr, system, dfs)("DFS: objects processed: " UINT64_FORMAT ","
" sampled objects found: " UINT64_FORMAT ","
" reached max graph depth: " UINT64_FORMAT ","
" reached max probe stack depth: " UINT64_FORMAT,
_num_objects_processed, _num_sampled_objects_found,
_times_max_depth_reached, _times_probe_stack_full);
}
void DFSClosure::probe_stack_push(UnifiedOopRef ref, oop pointee, size_t depth) {
assert(!ref.is_null(), "invariant");
if (_probe_stack.is_full()) {
_times_probe_stack_full ++;
return;
}
if (pointee == nullptr) {
return;
}
@ -178,12 +179,6 @@ void DFSClosure::probe_stack_push(UnifiedOopRef ref, oop pointee, size_t depth)
return;
}
if (_probe_stack.is_full()) {
log_debug(jfr, system, dfs)("Probe stack full (graph depth: %zu, probe stack size: %zu)",
_current_depth, _probe_stack.size());
return;
}
ProbeStackItem item { ref, checked_cast<unsigned>(depth), 0 };
_probe_stack.push(item);
}
@ -195,8 +190,7 @@ void DFSClosure::probe_stack_push_followup_chunk(UnifiedOopRef ref, oop pointee,
assert(chunkindex > 0, "invariant");
if (_probe_stack.is_full()) {
log_debug(jfr, system, dfs)("Probe stack full (graph depth: %zu, probe stack size: %zu)",
_current_depth, _probe_stack.size());
_times_probe_stack_full ++;
return;
}
@ -248,8 +242,7 @@ void DFSClosure::handle_oop() {
// trace children if needed
if (_current_depth == _max_depth - 1) {
log_debug(jfr, system, dfs)("max depth reached (%zu, probe stack size: %zu)",
_current_depth, _probe_stack.size());
_times_max_depth_reached ++;
return; // stop following this chain
}
@ -283,8 +276,7 @@ void DFSClosure::handle_objarrayoop() {
// trace children if needed
if (_current_depth == _max_depth - 1) {
log_debug(jfr, system, dfs)("max depth reached (%zu, probe stack size: %zu)",
_current_depth, _probe_stack.size());
_times_max_depth_reached ++;
return; // stop following this chain
}

View File

@ -39,7 +39,7 @@ class EdgeQueue;
// Class responsible for iterating the heap depth-first
class DFSClosure : public BasicOopIterateClosure {
private:
static const size_t max_dfs_depth = 3200;
static const size_t max_dfs_depth = 4000;
static UnifiedOopRef _reference_stack[max_dfs_depth];
EdgeStore* _edge_store;
@ -67,6 +67,8 @@ class DFSClosure : public BasicOopIterateClosure {
int _current_chunkindex;
uint64_t _num_objects_processed;
uint64_t _num_sampled_objects_found;
uint64_t _times_max_depth_reached;
uint64_t _times_probe_stack_full;
bool pointee_was_visited(const oop pointee) const { return _mark_bits->is_marked(pointee); }
void mark_pointee_as_visited(const oop pointee) { _mark_bits->mark_obj(pointee); }
@ -79,7 +81,6 @@ class DFSClosure : public BasicOopIterateClosure {
void handle_oop();
void handle_objarrayoop();
void log(const char* msg, ...) DEBUG_ONLY(;) NOT_DEBUG({})
void log_reference_stack() DEBUG_ONLY(;) NOT_DEBUG({})
public:

View File

@ -712,7 +712,6 @@ jdk/incubator/vector/LoadJsvmlTest.java 8305390 windows-
jdk/jfr/event/compiler/TestCodeSweeper.java 8338127 generic-all
jdk/jfr/event/oldobject/TestShenandoah.java 8342951 generic-all
jdk/jfr/event/runtime/TestResidentSetSizeEvent.java 8309846 aix-ppc64
jdk/jfr/jvm/TestWaste.java 8371630 generic-all
############################################################################

View File

@ -47,7 +47,7 @@ import jdk.test.lib.jfr.EventNames;
* @modules jdk.jfr/jdk.jfr.internal.test
* @library /test/lib /test/jdk
*
* @run main/othervm -XX:TLABSize=2k -Xmx256m jdk.jfr.jcmd.TestJcmdDumpPathToGCRootsBFSDFS dfs-only
* @run main/othervm -XX:TLABSize=2k -Xmx256m -Xlog:jfr+system+dfs jdk.jfr.jcmd.TestJcmdDumpPathToGCRootsBFSDFS dfs-only
*/
/**
@ -154,13 +154,14 @@ public class TestJcmdDumpPathToGCRootsBFSDFS {
System.out.println("No events found in recording. Retrying.");
continue;
}
boolean chains = hasChains(events);
if (expectedChains && !chains) {
int foundChains = numChainsFoundInEvents(events);
final int minChainsRequired = 30; // very conservative, should be in the low 100s normally
if (expectedChains && foundChains < minChainsRequired) {
System.out.println(events);
System.out.println("Expected chains but found none. Retrying.");
System.out.println("Expected chains but found not enough (" + foundChains + "). Retrying.");
continue;
}
if (!expectedChains && chains) {
if (!expectedChains && foundChains > 0) {
System.out.println(events);
System.out.println("Didn't expect chains but found some. Retrying.");
continue;
@ -175,14 +176,16 @@ public class TestJcmdDumpPathToGCRootsBFSDFS {
System.gc();
}
private static boolean hasChains(List<RecordedEvent> events) throws IOException {
private static int numChainsFoundInEvents(List<RecordedEvent> events) throws IOException {
int found = 0;
for (RecordedEvent e : events) {
RecordedObject ro = e.getValue("object");
if (ro.getValue("referrer") != null) {
return true;
found++;
}
}
return false;
return found;
}
private static void buildLeak(int objectCount) {