8371701: Add ability to set NUMA-affinity for threads

Reviewed-by: aboldtch, ayang
This commit is contained in:
Joel Sikström 2025-11-24 13:40:55 +00:00
parent 8ae4ea8114
commit 0a963b612d
6 changed files with 103 additions and 0 deletions

View File

@ -1747,6 +1747,9 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
void os::numa_set_thread_affinity(Thread *thread, int node) {
}
void os::numa_make_global(char *addr, size_t bytes) {
}

View File

@ -1581,6 +1581,9 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
void os::numa_set_thread_affinity(Thread *thread, int node) {
}
void os::numa_make_global(char *addr, size_t bytes) {
}

View File

@ -2995,6 +2995,10 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
void os::numa_set_thread_affinity(Thread* thread, int node) {
Linux::numa_set_thread_affinity(thread->osthread()->thread_id(), node);
}
void os::numa_make_global(char *addr, size_t bytes) {
Linux::numa_interleave_memory(addr, bytes);
}
@ -3177,6 +3181,8 @@ bool os::Linux::libnuma_init() {
libnuma_dlsym(handle, "numa_set_bind_policy")));
set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t,
libnuma_dlsym(handle, "numa_bitmask_isbitset")));
set_numa_bitmask_clearbit(CAST_TO_FN_PTR(numa_bitmask_clearbit_func_t,
libnuma_dlsym(handle, "numa_bitmask_clearbit")));
set_numa_bitmask_equal(CAST_TO_FN_PTR(numa_bitmask_equal_func_t,
libnuma_dlsym(handle, "numa_bitmask_equal")));
set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
@ -3191,20 +3197,32 @@ bool os::Linux::libnuma_init() {
libnuma_dlsym(handle, "numa_set_preferred")));
set_numa_get_run_node_mask(CAST_TO_FN_PTR(numa_get_run_node_mask_func_t,
libnuma_v2_dlsym(handle, "numa_get_run_node_mask")));
set_numa_sched_setaffinity(CAST_TO_FN_PTR(numa_sched_setaffinity_func_t,
libnuma_v2_dlsym(handle, "numa_sched_setaffinity")));
set_numa_allocate_cpumask(CAST_TO_FN_PTR(numa_allocate_cpumask_func_t,
libnuma_v2_dlsym(handle, "numa_allocate_cpumask")));
if (numa_available() != -1) {
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
set_numa_all_cpus_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_cpus_ptr"));
set_numa_interleave_bitmask(_numa_get_interleave_mask());
set_numa_membind_bitmask(_numa_get_membind());
set_numa_cpunodebind_bitmask(_numa_get_run_node_mask());
// Create an index -> node mapping, since nodes are not always consecutive
_nindex_to_node = new (mtInternal) GrowableArray<int>(0, mtInternal);
rebuild_nindex_to_node_map();
// Create a cpu -> node mapping
_cpu_to_node = new (mtInternal) GrowableArray<int>(0, mtInternal);
rebuild_cpu_to_node_map();
// Create a node -> CPUs mapping
_numa_affinity_masks = new (mtInternal) GrowableArray<struct bitmask*>(0, mtInternal);
build_numa_affinity_masks();
return true;
}
}
@ -3240,6 +3258,42 @@ size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
return ((thr_type == java_thread || thr_type == compiler_thread) ? 0 : os::vm_page_size());
}
void os::Linux::build_numa_affinity_masks() {
// We only build the affinity masks if running libnuma v2 (_numa_node_to_cpus_v2
// is available) and we have the affinity mask of the process when it started.
if (_numa_node_to_cpus_v2 == nullptr || _numa_all_cpus_ptr == nullptr) {
return;
}
// It's important that we respect any user configuration by removing the
// CPUs we're not allowed to run on from the affinity mask. For example,
// if the user runs the JVM with "numactl -C 0-1,4-5" on a machine with
// the following NUMA setup:
// NUMA 0: CPUs 0-3, NUMA 1: CPUs 4-7
// We expect to get the following affinity masks:
// Affinity masks: idx 0 = (0, 1), idx 1 = (4, 5)
const int num_nodes = get_existing_num_nodes();
const unsigned num_cpus = (unsigned)os::processor_count();
for (int i = 0; i < num_nodes; i++) {
struct bitmask* affinity_mask = _numa_allocate_cpumask();
// Fill the affinity mask with all CPUs belonging to NUMA node i
_numa_node_to_cpus_v2(i, affinity_mask);
// Clear the bits of all CPUs that the process is not allowed to
// execute tasks on
for (unsigned j = 0; j < num_cpus; j++) {
if (!_numa_bitmask_isbitset(_numa_all_cpus_ptr, j)) {
_numa_bitmask_clearbit(affinity_mask, j);
}
}
_numa_affinity_masks->push(affinity_mask);
}
}
void os::Linux::rebuild_nindex_to_node_map() {
int highest_node_number = Linux::numa_max_node();
@ -3355,6 +3409,25 @@ int os::Linux::numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen)
return -1;
}
void os::Linux::numa_set_thread_affinity(pid_t tid, int node) {
// We only set affinity if running libnuma v2 (_numa_sched_setaffinity
// is available) and we have all affinity mask
if (_numa_sched_setaffinity == nullptr ||
_numa_all_cpus_ptr == nullptr ||
_numa_affinity_masks->is_empty()) {
return;
}
if (node == -1) {
// If the node is -1, the affinity is reverted to the original affinity
// of the thread when the VM was started
_numa_sched_setaffinity(tid, _numa_all_cpus_ptr);
} else {
// Normal case, set the affinity to the corresponding affinity mask
_numa_sched_setaffinity(tid, _numa_affinity_masks->at(node));
}
}
int os::Linux::get_node_by_cpu(int cpu_id) {
if (cpu_to_node() != nullptr && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
return cpu_to_node()->at(cpu_id);
@ -3364,6 +3437,7 @@ int os::Linux::get_node_by_cpu(int cpu_id) {
GrowableArray<int>* os::Linux::_cpu_to_node;
GrowableArray<int>* os::Linux::_nindex_to_node;
GrowableArray<struct bitmask*>* os::Linux::_numa_affinity_masks;
os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
os::Linux::numa_node_to_cpus_v2_func_t os::Linux::_numa_node_to_cpus_v2;
@ -3375,17 +3449,21 @@ os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v2;
os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
os::Linux::numa_bitmask_clearbit_func_t os::Linux::_numa_bitmask_clearbit;
os::Linux::numa_bitmask_equal_func_t os::Linux::_numa_bitmask_equal;
os::Linux::numa_distance_func_t os::Linux::_numa_distance;
os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
os::Linux::numa_get_run_node_mask_func_t os::Linux::_numa_get_run_node_mask;
os::Linux::numa_sched_setaffinity_func_t os::Linux::_numa_sched_setaffinity;
os::Linux::numa_allocate_cpumask_func_t os::Linux::_numa_allocate_cpumask;
os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages;
os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred;
os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
unsigned long* os::Linux::_numa_all_nodes;
struct bitmask* os::Linux::_numa_all_nodes_ptr;
struct bitmask* os::Linux::_numa_nodes_ptr;
struct bitmask* os::Linux::_numa_all_cpus_ptr;
struct bitmask* os::Linux::_numa_interleave_bitmask;
struct bitmask* os::Linux::_numa_membind_bitmask;
struct bitmask* os::Linux::_numa_cpunodebind_bitmask;

View File

@ -45,6 +45,10 @@ class os::Linux {
static GrowableArray<int>* _cpu_to_node;
static GrowableArray<int>* _nindex_to_node;
static GrowableArray<struct bitmask*>* _numa_affinity_masks;
static void build_numa_affinity_masks();
protected:
static physical_memory_size_type _physical_memory;
@ -230,8 +234,11 @@ class os::Linux {
typedef void (*numa_set_preferred_func_t)(int node);
typedef void (*numa_set_bind_policy_func_t)(int policy);
typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
typedef int (*numa_bitmask_clearbit_func_t)(struct bitmask *bmp, unsigned int n);
typedef int (*numa_bitmask_equal_func_t)(struct bitmask *bmp1, struct bitmask *bmp2);
typedef int (*numa_distance_func_t)(int node1, int node2);
typedef int (*numa_sched_setaffinity_func_t)(pid_t pid, struct bitmask* mask);
typedef struct bitmask* (*numa_allocate_cpumask_func_t)(void);
static sched_getcpu_func_t _sched_getcpu;
static numa_node_to_cpus_func_t _numa_node_to_cpus;
@ -244,6 +251,7 @@ class os::Linux {
static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
static numa_set_bind_policy_func_t _numa_set_bind_policy;
static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
static numa_bitmask_clearbit_func_t _numa_bitmask_clearbit;
static numa_bitmask_equal_func_t _numa_bitmask_equal;
static numa_distance_func_t _numa_distance;
static numa_get_membind_func_t _numa_get_membind;
@ -251,9 +259,12 @@ class os::Linux {
static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
static numa_move_pages_func_t _numa_move_pages;
static numa_set_preferred_func_t _numa_set_preferred;
static numa_sched_setaffinity_func_t _numa_sched_setaffinity;
static numa_allocate_cpumask_func_t _numa_allocate_cpumask;
static unsigned long* _numa_all_nodes;
static struct bitmask* _numa_all_nodes_ptr;
static struct bitmask* _numa_nodes_ptr;
static struct bitmask* _numa_all_cpus_ptr;
static struct bitmask* _numa_interleave_bitmask;
static struct bitmask* _numa_membind_bitmask;
static struct bitmask* _numa_cpunodebind_bitmask;
@ -269,6 +280,7 @@ class os::Linux {
static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
static void set_numa_bitmask_clearbit(numa_bitmask_clearbit_func_t func) { _numa_bitmask_clearbit = func; }
static void set_numa_bitmask_equal(numa_bitmask_equal_func_t func) { _numa_bitmask_equal = func; }
static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
@ -279,9 +291,12 @@ class os::Linux {
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
static void set_numa_all_cpus_ptr(struct bitmask **ptr) { _numa_all_cpus_ptr = (ptr == nullptr ? nullptr : *ptr); }
static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; }
static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; }
static void set_numa_cpunodebind_bitmask(struct bitmask* ptr) { _numa_cpunodebind_bitmask = ptr ; }
static void set_numa_sched_setaffinity(numa_sched_setaffinity_func_t func) { _numa_sched_setaffinity = func; }
static void set_numa_allocate_cpumask(numa_allocate_cpumask_func_t func) { _numa_allocate_cpumask = func; }
static int sched_getcpu_syscall(void);
enum NumaAllocationPolicy{
@ -292,6 +307,8 @@ class os::Linux {
static NumaAllocationPolicy _current_numa_policy;
public:
static void numa_set_thread_affinity(pid_t tid, int node);
static int sched_getcpu() { return _sched_getcpu != nullptr ? _sched_getcpu() : -1; }
static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
static int numa_max_node() { return _numa_max_node != nullptr ? _numa_max_node() : -1; }

View File

@ -3752,6 +3752,7 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
void os::numa_set_thread_affinity(Thread *thread, int node) { }
void os::numa_make_global(char *addr, size_t bytes) { }
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { }
size_t os::numa_get_groups_num() { return MAX2(numa_node_list_holder.get_count(), 1); }

View File

@ -534,6 +534,7 @@ class os: AllStatic {
static void realign_memory(char *addr, size_t bytes, size_t alignment_hint);
// NUMA-specific interface
static void numa_set_thread_affinity(Thread* thread, int node);
static void numa_make_local(char *addr, size_t bytes, int lgrp_hint);
static void numa_make_global(char *addr, size_t bytes);
static size_t numa_get_groups_num();