8371702: ZGC: NUMA-Affinity for Worker Threads in the Relocation Phase

Co-authored-by: Axel Boldt-Christmas <aboldtch@openjdk.org> Reviewed-by: aboldtch, eosterlund
2026-01-28 12:09:14 +00:00 · 2025-11-27 09:02:18 +00:00 · 2025-11-27 09:02:18 +00:00 · de546d0e03
commit de546d0e03
parent 7cd3d7f157
1 changed files with 27 additions and 24 deletions
--- a/src/hotspot/share/gc/z/zRelocate.cpp
+++ b/src/hotspot/share/gc/z/zRelocate.cpp
@ -1087,7 +1087,6 @@ private:
  ZRelocateSmallAllocator                   _small_allocator;
  ZRelocateMediumAllocator                  _medium_allocator;
  const size_t                              _total_forwardings;
-  volatile size_t                           _numa_local_forwardings;

 public:
  ZRelocateTask(ZRelocationSet* relocation_set,
@ -1104,8 +1103,7 @@ public:
      _medium_targets(medium_targets),
      _small_allocator(_generation),
      _medium_allocator(_generation, shared_medium_targets),
-      _total_forwardings(relocation_set->nforwardings()),
-      _numa_local_forwardings(0) {
+      _total_forwardings(relocation_set->nforwardings()) {

    for (uint32_t i = 0; i < ZNUMA::count(); i++) {
      ZRelocationSetParallelIterator* const iter = _iters->addr(i);
@ -1124,18 +1122,17 @@ public:

    // Signal that we're not using the queue anymore. Used mostly for asserts.
    _queue->deactivate();
-
-    if (ZNUMA::is_enabled()) {
-      log_debug(gc, reloc, numa)("Forwardings relocated NUMA-locally: %zu / %zu (%.0f%%)",
-                                 _numa_local_forwardings, _total_forwardings, percent_of(_numa_local_forwardings, _total_forwardings));
-    }
  }

  virtual void work() {
    ZRelocateWork<ZRelocateSmallAllocator> small(&_small_allocator, _small_targets->addr(), _generation);
    ZRelocateWork<ZRelocateMediumAllocator> medium(&_medium_allocator, _medium_targets->addr(), _generation);
+
    const uint32_t num_nodes = ZNUMA::count();
-    uint32_t numa_local_forwardings_worker = 0;
+    const uint32_t start_node = ZNUMA::id();
+    uint32_t current_node = start_node;
+    bool has_affinity = false;
+    bool has_affinity_current_node = false;

    const auto do_forwarding = [&](ZForwarding* forwarding) {
      ZPage* const page = forwarding->page();
@ -1167,26 +1164,30 @@ public:

    const auto do_forwarding_one_from_iter = [&]() {
      ZForwarding* forwarding;
-      const uint32_t start_node = ZNUMA::id();
-      uint32_t current_node = start_node;

-      for (uint32_t i = 0; i < num_nodes; i++) {
+      for (;;) {
        if (_iters->get(current_node).next_if(&forwarding, check_numa_local, current_node)) {
-          claim_and_do_forwarding(forwarding);
-
-          if (current_node == start_node) {
-            // Track if this forwarding was relocated on the local NUMA node
-            numa_local_forwardings_worker++;
+          // Set thread affinity for NUMA-local processing (if needed)
+          if (UseNUMA && !has_affinity_current_node) {
+            os::numa_set_thread_affinity(Thread::current(), ZNUMA::numa_id_to_node(current_node));
+            has_affinity = true;
+            has_affinity_current_node = true;
          }

+          // Perform the forwarding task
+          claim_and_do_forwarding(forwarding);
          return true;
        }

-        // Check next node.
+        // No work found on the current node, move to the next node
        current_node = (current_node + 1) % num_nodes;
-      }
+        has_affinity_current_node = false;

-      return false;
+        // If we've looped back to the starting node there's no more work to do
+        if (current_node == start_node) {
+          return false;
+        }
+      }
    };

    for (;;) {
@ -1209,11 +1210,13 @@ public:
      }
    }

-    if (ZNUMA::is_enabled()) {
-      AtomicAccess::add(&_numa_local_forwardings, numa_local_forwardings_worker, memory_order_relaxed);
-    }
-
    _queue->leave();
+
+    if (UseNUMA && has_affinity) {
+      // Restore the affinity of the thread so that it isn't bound to a specific
+      // node any more
+      os::numa_set_thread_affinity(Thread::current(), -1);
+    }
  }

  virtual void resize_workers(uint nworkers) {