From 700ff1b5612a67bf4d6c6df254f8f019eeaff259 Mon Sep 17 00:00:00 2001 From: Robert Toyonaga Date: Thu, 19 Feb 2026 16:19:22 -0500 Subject: [PATCH] Use VirtualAlloc2 to fix races test working. need to focus on map refactor and one more test new test, comments, fix aix split memory accounting, use unified logging instead of warning() remove map API code. Rename to PlacholderRegion. Fix os::release remove map_memory API code fix race and simplify AOTMetaspace::reserve_address_space_for_archives clean up comments test: map_memory_to_file_aligned_larger remove old test and modify tests clean up fix aix and comments cleanup gate fix anothe rgate fix fix windows test account for wish address fix missing NMT accounting comments small cleanup trailing whitespaces --- src/hotspot/os/aix/os_aix.cpp | 43 +++ src/hotspot/os/bsd/os_bsd.cpp | 32 +++ src/hotspot/os/linux/os_linux.cpp | 32 +++ .../os/windows/gc/z/zSyscall_windows.cpp | 29 +- src/hotspot/os/windows/os_windows.cpp | 270 +++++++++++++++++- src/hotspot/os/windows/os_windows.hpp | 17 ++ src/hotspot/share/cds/aotMetaspace.cpp | 69 ++--- src/hotspot/share/runtime/os.cpp | 57 ++++ src/hotspot/share/runtime/os.hpp | 54 ++++ test/hotspot/gtest/runtime/test_os.cpp | 121 +++++++- .../hotspot/gtest/runtime/test_os_windows.cpp | 83 ++++++ 11 files changed, 731 insertions(+), 76 deletions(-) diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp index 7c08d6de2db..2fcd3ecc4f3 100644 --- a/src/hotspot/os/aix/os_aix.cpp +++ b/src/hotspot/os/aix/os_aix.cpp @@ -1798,6 +1798,49 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { } } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + // Always round to os::vm_page_size(), which may be larger than 4K. + bytes = align_up(bytes, os::vm_page_size()); + + // shmated memory cannot be split after allocation + if (os::vm_page_size() == 4*K || g_multipage_support.can_use_64K_mmap_pages) { + char* base = reserve_mmaped_memory(bytes, addr); + return PlaceholderRegion(base, base != nullptr ? bytes : 0); + } + return PlaceholderRegion(); +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + // On AIX, mmap regions are inherently splittable. Just do bookkeeping. + // pd_reserve_placeholder_memory guarantees mmaped (not shmated) memory. + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(offset > 0, "Offset must be positive"); + assert(offset < region_size, "Offset must be less than region size"); + + // update vmembk to reflect the split + vmembk_t* const vmi = vmembk_find(base); + guarantee(vmi != nullptr, "vmembk not found for splittable region at " PTR_FORMAT, p2i(base)); + guarantee(vmi->type != VMEM_SHMATED, "Cannot split shmated memory at " PTR_FORMAT, p2i(base)); + + vmembk_add(base, offset, vmi->pagesize, vmi->type); + vmi->addr = base + offset; + vmi->size = region_size - offset; + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + return region.base(); +} + bool os::pd_release_memory(char* addr, size_t size) { // Dynamically do different things for mmap/shmat. diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp index 29ebe65e0db..e0ec03560f4 100644 --- a/src/hotspot/os/bsd/os_bsd.cpp +++ b/src/hotspot/os/bsd/os_bsd.cpp @@ -1824,6 +1824,38 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { return anon_mmap(nullptr /* addr */, bytes, exec); } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + // mmap returns memory that is splittable by default. + char* base; + if (addr != nullptr) { + base = pd_attempt_reserve_memory_at(addr, bytes, exec); + } else { + base = pd_reserve_memory(bytes, exec); + } + return PlaceholderRegion(base, base != nullptr ? bytes : 0); +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + // On BSD, mmap regions are inherently splittable. Just do bookkeeping. + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(offset > 0, "Offset must be positive"); + assert(offset < region_size, "Offset must be less than region size"); + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + return region.base(); +} + bool os::pd_release_memory(char* addr, size_t size) { return anon_munmap(addr, size); } diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index 9c2fbab7535..84ad1a42e8f 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -3753,6 +3753,38 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { return anon_mmap(nullptr, bytes); } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + // mmap returns memory that is splittable by default. + char* base; + if (addr != nullptr) { + base = pd_attempt_reserve_memory_at(addr, bytes, exec); + } else { + base = pd_reserve_memory(bytes, exec); + } + return PlaceholderRegion(base, base != nullptr ? bytes : 0); +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + // On Linux, mmap regions are inherently splittable. Just do bookkeeping. + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(offset > 0, "Offset must be positive"); + assert(offset < region_size, "Offset must be less than region size"); + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + return region.base(); +} + bool os::pd_release_memory(char* addr, size_t size) { return anon_munmap(addr, size); } diff --git a/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp b/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp index 527958770c0..5b947739276 100644 --- a/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp +++ b/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp @@ -23,6 +23,7 @@ #include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zSyscall_windows.hpp" +#include "os_windows.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" @@ -33,31 +34,9 @@ ZSyscall::VirtualFreeExFn ZSyscall::VirtualFreeEx; ZSyscall::MapViewOfFile3Fn ZSyscall::MapViewOfFile3; ZSyscall::UnmapViewOfFile2Fn ZSyscall::UnmapViewOfFile2; -static void* lookup_kernelbase_library() { - const char* const name = "KernelBase"; - char ebuf[1024]; - void* const handle = os::dll_load(name, ebuf, sizeof(ebuf)); - if (handle == nullptr) { - log_error_p(gc)("Failed to load library: %s", name); - } - return handle; -} - -static void* lookup_kernelbase_symbol(const char* name) { - static void* const handle = lookup_kernelbase_library(); - if (handle == nullptr) { - return nullptr; - } - return os::dll_lookup(handle, name); -} - -static bool has_kernelbase_symbol(const char* name) { - return lookup_kernelbase_symbol(name) != nullptr; -} - template static void install_kernelbase_symbol(Fn*& fn, const char* name) { - fn = reinterpret_cast(lookup_kernelbase_symbol(name)); + fn = reinterpret_cast(os::win32::lookup_kernelbase_symbol(name)); } template @@ -83,10 +62,10 @@ void ZSyscall::initialize() { bool ZSyscall::is_supported() { // Available in Windows version 1803 and later - return has_kernelbase_symbol("VirtualAlloc2"); + return os::win32::lookup_kernelbase_symbol("VirtualAlloc2") != nullptr; } bool ZSyscall::is_large_pages_supported() { // Available in Windows version 1809 and later - return has_kernelbase_symbol("CreateFileMapping2"); + return os::win32::lookup_kernelbase_symbol("CreateFileMapping2") != nullptr; } diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp index 76f47640e5a..c882cf16160 100644 --- a/src/hotspot/os/windows/os_windows.cpp +++ b/src/hotspot/os/windows/os_windows.cpp @@ -243,6 +243,46 @@ static LPVOID virtualAllocExNuma(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSiz return result; } +// VirtualAlloc2 (since Windows version 1803). loaded from KernelBase in os::init_2() +os::win32::VirtualAlloc2Fn os::win32::VirtualAlloc2 = nullptr; + +// MapViewOfFile3 (since Windows version 1803). loaded from KernelBase in os::init_2() +os::win32::MapViewOfFile3Fn os::win32::MapViewOfFile3 = nullptr; + +static void* lookup_kernelbase_library() { + const char* const name = "KernelBase"; + char ebuf[1024]; + void* const handle = os::dll_load(name, ebuf, sizeof(ebuf)); + if (handle == nullptr) { + log_trace(os)("Failed to load library: %s", name); + } + return handle; +} + +void* os::win32::lookup_kernelbase_symbol(const char* name) { + static void* const handle = lookup_kernelbase_library(); + if (handle == nullptr) { + return nullptr; + } + return os::dll_lookup(handle, name); +} + +template +static void install_kernelbase_symbol(Fn*& fn, const char* name) { + fn = reinterpret_cast(os::win32::lookup_kernelbase_symbol(name)); +} + +static void initialize_kernelbase_apis() { + install_kernelbase_symbol(os::win32::VirtualAlloc2, "VirtualAlloc2"); + log_info(os)("VirtualAlloc2 is%s available.", os::win32::VirtualAlloc2 == nullptr ? " not" : ""); + install_kernelbase_symbol(os::win32::MapViewOfFile3, "MapViewOfFile3"); + log_info(os)("MapViewOfFile3 is%s available.", os::win32::MapViewOfFile3 == nullptr ? " not" : ""); +} + +static bool is_VirtualAlloc2_supported() { + return os::win32::VirtualAlloc2 != nullptr; +} + // Logging wrapper for MapViewOfFileEx static LPVOID mapViewOfFileEx(HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) { @@ -2896,6 +2936,18 @@ LONG WINAPI topLevelUnhandledExceptionFilter(struct _EXCEPTION_POINTERS* excepti #define MEM_LARGE_PAGES 0x20000000 #endif +#ifndef MEM_PRESERVE_PLACEHOLDER + #define MEM_PRESERVE_PLACEHOLDER 0x00000002 +#endif + +#ifndef MEM_REPLACE_PLACEHOLDER + #define MEM_REPLACE_PLACEHOLDER 0x00004000 +#endif + +#ifndef MEM_RESERVE_PLACEHOLDER + #define MEM_RESERVE_PLACEHOLDER 0x00040000 +#endif + // Container for NUMA node list info class NUMANodeListHolder { private: @@ -3257,7 +3309,7 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in return map_memory_to_file(base, size, fd); } -// Multiple threads can race in this code but it's not possible to unmap small sections of +// Multiple threads can race in this code (if VirtualAlloc2 is not supported) but it's not possible to unmap small sections of // virtual space to get requested alignment, like posix-like os's. // Windows prevents multiple thread from remapping over each other so this loop is thread-safe. static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc, MemTag mem_tag) { @@ -3266,6 +3318,59 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi assert(is_aligned(size, os::vm_allocation_granularity()), "Size must be a multiple of allocation granularity (page size)"); + // VirtualAlloc2 and MapViewOfFile3 support alignment natively. + // This avoids the race prone retry loop below. + if (is_VirtualAlloc2_supported() && is_power_of_2(alignment) && + alignment >= os::vm_allocation_granularity()) { + + MEM_ADDRESS_REQUIREMENTS requirements = {0}; + requirements.Alignment = alignment; + + MEM_EXTENDED_PARAMETER param = {0}; + param.Type = MemExtendedParameterAddressRequirements; + param.Pointer = &requirements; + + char* aligned_base = nullptr; + + if (file_desc != -1 && os::win32::MapViewOfFile3 != nullptr) { + // File-backed aligned mapping. + HANDLE fh = (HANDLE)_get_osfhandle(file_desc); + HANDLE fileMapping = CreateFileMapping(fh, nullptr, PAGE_READWRITE,(DWORD)(size >> 32), (DWORD)(size & 0xFFFFFFFF), nullptr); + if (fileMapping != nullptr) { + aligned_base = (char*)os::win32::MapViewOfFile3( + fileMapping, + GetCurrentProcess(), + nullptr, // let the system choose an aligned address + 0, // offset + size, + 0, // no special allocation type flags + PAGE_READWRITE, + ¶m, 1); + CloseHandle(fileMapping); + } + } else if (file_desc == -1) { + // Anonymous aligned reservation. + aligned_base = (char*)os::win32::VirtualAlloc2( + GetCurrentProcess(), + nullptr, // let the system choose an aligned address + size, + MEM_RESERVE, + PAGE_READWRITE, + ¶m, 1); + } + + if (aligned_base != nullptr) { + assert(is_aligned(aligned_base, alignment), "Result must be aligned"); + if (file_desc == -1) { + MemTracker::record_virtual_memory_reserve(aligned_base, size, CALLER_PC, mem_tag); + } else { + MemTracker::record_virtual_memory_reserve_and_commit(aligned_base, size, CALLER_PC, mem_tag); + } + return aligned_base; + } + log_trace(os)("Aligned allocation via VirtualAlloc2/MapViewOfFile3 failed, falling back to retry loop."); + } + size_t extra_size = size + alignment; assert(extra_size >= size, "overflow, size is too large to allow alignment"); @@ -3357,6 +3462,147 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { return pd_attempt_reserve_memory_at(nullptr /* addr */, bytes, exec); } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + if (!is_VirtualAlloc2_supported()) { + return PlaceholderRegion(); + } + + char* res = (char*)os::win32::VirtualAlloc2( + GetCurrentProcess(), + addr, + bytes, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, + nullptr, 0); + + if (res != nullptr) { + if (addr != nullptr && res != addr) { + // Got a different address than requested; release and fail. + virtualFree(res, 0, MEM_RELEASE); + log_warning(os)("VirtualAlloc2 placeholder at requested " PTR_FORMAT " returned different address " PTR_FORMAT ", released.", p2i(addr), p2i(res)); + return PlaceholderRegion(); + } + log_trace(os)("VirtualAlloc2 placeholder of size (%zu) returned " PTR_FORMAT ".", bytes, p2i(res)); + return PlaceholderRegion(res, bytes); + } else { + PreserveLastError ple; + log_warning(os)("VirtualAlloc2 placeholder reservation of size (%zu) at " PTR_FORMAT " failed (%u).", bytes, p2i(addr), ple.v); + return PlaceholderRegion(); + } +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + guarantee(is_VirtualAlloc2_supported(), "pd_split_memory requires VirtualAlloc2 on Windows."); + + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null."); + assert(offset > 0, "Offset must be positive (nothing to split at 0)."); + assert(offset < region_size, "Offset must be less than region size."); + + // VirtualFree with MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER splits the + // placeholder [base, base+region_size) into two placeholders: + // [base, base+offset) and [base+offset, base+region_size) + // + // With correct inputs, this should not fail. + // A failure indicates either a programming error (e.g., bad alignment, + // region not actually a placeholder) or a catastrophic system problem. + // Crashing with a diagnostic is more useful than attempting recovery. + BOOL result = virtualFree(base, offset, MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER); + guarantee(result != FALSE, + "Failed to split placeholder at " PTR_FORMAT " (offset %zu): error %lu.", + p2i(base), offset, GetLastError()); + + log_trace(os)("Split placeholder " RANGE_FORMAT " at offset %zu.", + RANGE_FORMAT_ARGS(base, region_size), offset); + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + return os::win32::convert_placeholder_to_reserved(region); +} + +// This function is for convenience to help with reserve_with_numa_placeholder. +char* os::win32::convert_placeholder_to_reserved(PlaceholderRegion region, int numa_node) { + guarantee(is_VirtualAlloc2_supported(), "convert_placeholder_to_reserved requires VirtualAlloc2"); + + char* base = region.base(); + size_t size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(size > 0, "Region size must be positive"); + + MEM_EXTENDED_PARAMETER param = { 0 }; + MEM_EXTENDED_PARAMETER* param_ptr = nullptr; + ULONG param_count = 0; + + if (numa_node >= 0) { + param.Type = MemExtendedParameterNumaNode; + param.ULong = (DWORD)numa_node; + param_ptr = ¶m; + param_count = 1; + } + + char* reserved = (char*)os::win32::VirtualAlloc2( + GetCurrentProcess(), + base, + size, + MEM_RESERVE | MEM_REPLACE_PLACEHOLDER, + PAGE_READWRITE, + param_ptr, param_count); + guarantee(reserved != nullptr, + "Failed to convert placeholder to reservation at " PTR_FORMAT " (%zu, numa node %d): error %lu.", + p2i(base), size, numa_node, GetLastError()); + + if (numa_node >= 0) { + log_trace(os)("Converted placeholder " RANGE_FORMAT " to reservation on NUMA node %d.", RANGE_FORMAT_ARGS(reserved, size), numa_node); + } else { + log_trace(os)("Converted placeholder " RANGE_FORMAT " to reservation.", RANGE_FORMAT_ARGS(reserved, size)); + } + + return reserved; +} + +// Reserve a region split across NUMA nodes. +// This uses VirtualAlloc2 placeholders in order to avoid races when splitting up the initial reservation into chunks assigned to different nodes. +// Returns the base address of the reserved range, or nullptr on failure. +char* os::win32::reserve_with_numa_placeholder(char* addr, size_t bytes) { + assert(is_VirtualAlloc2_supported(), "requires VirtualAlloc2"); + + const size_t chunk_size = NUMAInterleaveGranularity; + + // Reserve the full range as a placeholder. + // If we requested an address, pd_reserve_placeholder_memory will obtain it or fail. + PlaceholderRegion remaining = os::pd_reserve_placeholder_memory(bytes, false, addr); + if (remaining.is_empty()) { + log_warning(os)("Failed to reserve placeholder for NUMA interleaving (" PTR_FORMAT ", %zu).", p2i(addr), bytes); + return nullptr; + } + + char* const base = remaining.base(); + log_trace(os)("Created VirtualAlloc2 NUMA placeholder at " RANGE_FORMAT " (%zu bytes).", RANGE_FORMAT_ARGS(base, bytes), bytes); + + int count = 0; + const int node_count = numa_node_list_holder.get_count(); + + while (!remaining.is_empty()) { + size_t bytes_to_rq = MIN2(remaining.size(), chunk_size - ((size_t)remaining.base() % chunk_size)); + PlaceholderRegion chunk = os::split_memory(remaining, bytes_to_rq); + + DWORD node = node_count > 0 ? numa_node_list_holder.get_node_list_entry(count % node_count) : 0; // Assign 0 for testing on UMA systems + convert_placeholder_to_reserved(chunk, (int)node); + count++; + } + + return base; +} + // Reserve memory at an arbitrary address, only if that area is // available (and not reserved for something else). char* os::pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool exec) { @@ -3366,23 +3612,33 @@ char* os::pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool exec) { char* res; // note that if UseLargePages is on, all the areas that require interleaving // will go thru reserve_memory_special rather than thru here. - bool use_individual = (UseNUMAInterleaving && !UseLargePages); - if (!use_individual) { - res = (char*)virtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE); - } else { + bool use_numa_interleaving = (UseNUMAInterleaving && !UseLargePages); + + if (use_numa_interleaving && is_VirtualAlloc2_supported()) { + // Splittable NUMA interleaving with VirtualAlloc2 placeholders. + res = win32::reserve_with_numa_placeholder(addr, bytes); + if (res == nullptr) { + log_warning(os)("NUMA allocation using placeholders failed"); + } + } else if (use_numa_interleaving) { + // Non-splittable NUMA interleaving: allocate_pages_individually (possible races). elapsedTimer reserveTimer; if (Verbose && PrintMiscellaneous) reserveTimer.start(); // in numa interleaving, we have to allocate pages individually // (well really chunks of NUMAInterleaveGranularity size) res = allocate_pages_individually(bytes, addr, MEM_RESERVE, PAGE_READWRITE); if (res == nullptr) { - warning("NUMA page allocation failed"); + log_warning(os)("NUMA page allocation failed"); } if (Verbose && PrintMiscellaneous) { reserveTimer.stop(); tty->print_cr("reserve_memory of %zx bytes took " JLONG_FORMAT " ms (" JLONG_FORMAT " ticks)", bytes, reserveTimer.milliseconds(), reserveTimer.ticks()); } + } else { + // Standard reservation. Callers who need splittable placeholders should use + // pd_reserve_placeholder_memory instead. + res = (char*)virtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE); } assert(res == nullptr || addr == nullptr || addr == res, "Unexpected address from reserve."); @@ -4582,6 +4838,8 @@ jint os::init_2(void) { } log_info(os, thread)("The SetThreadDescription API is%s available.", _SetThreadDescription == nullptr ? " not" : ""); + // Prepare KernelBase APIs (VirtualAlloc2, MapViewOfFile3) if available (Windows version 1803). + initialize_kernelbase_apis(); return JNI_OK; } diff --git a/src/hotspot/os/windows/os_windows.hpp b/src/hotspot/os/windows/os_windows.hpp index d4a7d51c59b..c384247bf8d 100644 --- a/src/hotspot/os/windows/os_windows.hpp +++ b/src/hotspot/os/windows/os_windows.hpp @@ -109,11 +109,28 @@ class os::win32 { // load dll from Windows system directory or Windows directory static HINSTANCE load_Windows_dll(const char* name, char *ebuf, int ebuflen); + // Resolve a symbol from KernelBase.dll, returns nullptr if not found. + static void* lookup_kernelbase_symbol(const char* name); + + // VirtualAlloc2 (since Windows version 1803) + // Resolved from KernelBase during os::init_2() or nullptr if unavailable. + typedef PVOID (WINAPI *VirtualAlloc2Fn)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + static VirtualAlloc2Fn VirtualAlloc2; + + // MapViewOfFile3 (since Windows version 1803) + // Resolved from KernelBase during os::init_2() or nullptr if unavailable. + typedef PVOID (WINAPI *MapViewOfFile3Fn)(HANDLE, HANDLE, PVOID, ULONG64, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + static MapViewOfFile3Fn MapViewOfFile3; + private: static void initialize_performance_counter(); static void initialize_windows_version(); static DWORD active_processors_in_job_object(DWORD* active_processor_groups = nullptr); + static char* reserve_with_numa_placeholder(char* addr, size_t bytes); + // Replaces a placeholder with a reserved region via VirtualAlloc2(MEM_REPLACE_PLACEHOLDER). + // If numa_node >= 0, binds the reservation to that NUMA node. + static char* convert_placeholder_to_reserved(PlaceholderRegion region, int numa_node = -1); public: // Generic interface: diff --git a/src/hotspot/share/cds/aotMetaspace.cpp b/src/hotspot/share/cds/aotMetaspace.cpp index b75d7628aa9..03cd24fabc9 100644 --- a/src/hotspot/share/cds/aotMetaspace.cpp +++ b/src/hotspot/share/cds/aotMetaspace.cpp @@ -1688,7 +1688,7 @@ MapArchiveResult AOTMetaspace::map_archives(FileMapInfo* static_mapinfo, FileMap // this with use_requested_addr, since we're going to patch all the // pointers anyway so there's no benefit to mmap. if (use_requested_addr) { - assert(!total_space_rs.is_reserved(), "Should not be reserved for Windows"); + assert(!total_space_rs.is_reserved(), "Should not be reserved when use_requested_addr is true"); aot_log_info(aot)("Windows mmap workaround: releasing archive space."); MemoryReserver::release(archive_space_rs); // Mark as not reserved @@ -1879,10 +1879,10 @@ MapArchiveResult AOTMetaspace::map_archives(FileMapInfo* static_mapinfo, FileMap // Return: // // - On success: -// - total_space_rs will be reserved as whole for archive_space_rs and -// class_space_rs if UseCompressedClassPointers is true. -// On Windows, try reserve archive_space_rs and class_space_rs -// separately first if use_archive_base_addr is true. +// - If UseCompressedClassPointers and use_archive_base_addr are both true, +// archive_space_rs and class_space_rs are reserved as independent regions (placeholder split). +// total_space_rs is not set in this case. But if use_archive_base_addr is false, +// total_space_rs is reserved as one block for archive_space_rs and class_space_rs. // - archive_space_rs will be reserved and large enough to host static and // if needed dynamic archive: [Base, A). // archive_space_rs.base and size will be aligned to CDS reserve @@ -1978,46 +1978,37 @@ char* AOTMetaspace::reserve_address_space_for_archives(FileMapInfo* static_mapin } assert(total_range_size > ccs_begin_offset, "must be"); - if (use_windows_memory_mapping() && use_archive_base_addr) { - if (base_address != nullptr) { - // On Windows, we cannot safely split a reserved memory space into two (see JDK-8255917). - // Hence, we optimistically reserve archive space and class space side-by-side. We only - // do this for use_archive_base_addr=true since for use_archive_base_addr=false case - // caller will not split the combined space for mapping, instead read the archive data - // via sequential file IO. - address ccs_base = base_address + archive_space_size + gap_size; - archive_space_rs = MemoryReserver::reserve((char*)base_address, - archive_space_size, - archive_space_alignment, - os::vm_page_size(), - mtNone); - class_space_rs = MemoryReserver::reserve((char*)ccs_base, - class_space_size, - class_space_alignment, - os::vm_page_size(), - mtNone); + if (use_archive_base_addr && base_address != nullptr) { + os::PlaceholderRegion placeholder = os::reserve_placeholder_memory(total_range_size, mtNone, false /* exec */, (char*)base_address); + + if (!placeholder.is_empty()) { + os::PlaceholderRegion archive_placeholder = os::split_memory(placeholder, ccs_begin_offset); + // placeholder has been shrunk to [base+ccs_begin_offset, end) = class space + + char* archive_base = os::convert_to_reserved(archive_placeholder); + char* class_base = os::convert_to_reserved(placeholder); + + archive_space_rs = ReservedSpace(archive_base, ccs_begin_offset, + archive_space_alignment, os::vm_page_size(), + false /* exec */, false /* special */); + class_space_rs = ReservedSpace(class_base, class_space_size, + class_space_alignment, os::vm_page_size(), + false /* exec */, false /* special */); + MemTracker::record_virtual_memory_split_reserved(archive_base, total_range_size, + ccs_begin_offset, mtClassShared, mtClass); } + if (!archive_space_rs.is_reserved() || !class_space_rs.is_reserved()) { release_reserved_spaces(total_space_rs, archive_space_rs, class_space_rs); return nullptr; } - MemTracker::record_virtual_memory_tag(archive_space_rs, mtClassShared); - MemTracker::record_virtual_memory_tag(class_space_rs, mtClass); } else { - if (use_archive_base_addr && base_address != nullptr) { - total_space_rs = MemoryReserver::reserve((char*) base_address, - total_range_size, - base_address_alignment, - os::vm_page_size(), - mtNone); - } else { - // We did not manage to reserve at the preferred address, or were instructed to relocate. In that - // case we reserve wherever possible, but the start address needs to be encodable as narrow Klass - // encoding base since the archived heap objects contain narrow Klass IDs pre-calculated toward the start - // of the shared Metaspace. That prevents us from using zero-based encoding and therefore we won't - // try allocating in low-address regions. - total_space_rs = Metaspace::reserve_address_space_for_compressed_classes(total_range_size, false /* optimize_for_zero_base */); - } + // We did not manage to reserve at the preferred address, or were instructed to relocate. In that + // case we reserve wherever possible, but the start address needs to be encodable as narrow Klass + // encoding base since the archived heap objects contain narrow Klass IDs pre-calculated toward the start + // of the shared Metaspace. That prevents us from using zero-based encoding and therefore we won't + // try allocating in low-address regions. + total_space_rs = Metaspace::reserve_address_space_for_compressed_classes(total_range_size, false /* optimize_for_zero_base */); if (!total_space_rs.is_reserved()) { return nullptr; diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp index 16335f97fdb..c060cd5cedc 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -1971,6 +1971,63 @@ char* os::reserve_memory(size_t bytes, MemTag mem_tag, bool executable) { return result; } +os::PlaceholderRegion os::reserve_placeholder_memory(size_t bytes, MemTag mem_tag, bool executable, char* addr) { + assert(bytes > 0, "Size must be a value greater than 0"); + PlaceholderRegion result = pd_reserve_placeholder_memory(bytes, executable, addr); + if (!result.is_empty()) { + MemTracker::record_virtual_memory_reserve(result.base(), result.size(), CALLER_PC, mem_tag); + log_debug(os, map)("Reserved placeholder memory " RANGEFMT, RANGEFMTARGS(result.base(), result.size())); + } else { + log_info(os, map)("Reserve placeholder memory failed (%zu bytes)", bytes); + } + return result; +} + +os::PlaceholderRegion os::split_memory(PlaceholderRegion& region, size_t offset) { + assert(!region.is_empty(), "Region cannot be empty"); + assert(offset > 0, "Offset must be a value greater than 0"); + assert(offset <= region.size(), "Offset must be less than or equal to region size"); + assert(is_aligned(region.base(), os::vm_page_size()), "Region base should be page-aligned"); + assert(is_aligned(offset, os::vm_page_size()), "Offset should be page-aligned"); + + char* original_base = region.base(); + size_t original_size = region.size(); + + if (offset == original_size) { + // No split needed. Return the original region. + PlaceholderRegion result = region; + // The trailing piece is empty now. Nothing left. + region = PlaceholderRegion(); + log_debug(os, map)("Split memory consumed the whole region: " RANGEFMT, RANGEFMTARGS(original_base, original_size)); + return result; + } + + PlaceholderRegion leading = pd_split_memory(region, offset); + + if (leading.is_empty()) { + fatal("Split memory at offset %zu failed. Region: " RANGEFMT, offset, RANGEFMTARGS(original_base, original_size)); + } + log_debug(os, map)("Split memory at offset %zu: " RANGEFMT " -> " RANGEFMT " + " RANGEFMT, + offset, + RANGEFMTARGS(original_base, original_size), + RANGEFMTARGS(leading.base(), leading.size()), + RANGEFMTARGS(region.base(), region.size())); + return leading; +} + +char* os::convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + assert(is_aligned(region.base(), os::vm_page_size()), "Region base should be page-aligned"); + assert(is_aligned(region.size(), os::vm_page_size()), "Region size should be page-aligned"); + + char* result = pd_convert_to_reserved(region); + if (result == nullptr) { + fatal("Convert placeholder region " RANGEFMT " to reserved region failed", RANGEFMTARGS(region.base(), region.size())); + } + log_debug(os, map)("Converted placeholder region " RANGEFMT " to reserved region at " PTR_FORMAT, RANGEFMTARGS(region.base(), region.size()), p2i(result)); + return result; +} + char* os::attempt_reserve_memory_at(char* addr, size_t bytes, MemTag mem_tag, bool executable) { char* result = SimulateFullAddressSpace ? nullptr : pd_attempt_reserve_memory_at(addr, bytes, executable); if (result != nullptr) { diff --git a/src/hotspot/share/runtime/os.hpp b/src/hotspot/share/runtime/os.hpp index e185188384f..6a282f9439e 100644 --- a/src/hotspot/share/runtime/os.hpp +++ b/src/hotspot/share/runtime/os.hpp @@ -205,6 +205,26 @@ class os: AllStatic { void print_on(outputStream* st) const; }; + // A "reserved" region of address space that can be split or converted to a + // normal reservation. Conceptually distinct from a reserved region: + // callers must NOT call commit_memory, map_memory, or other operations + // directly on the raw address. They must first convert it via + // convert_to_reserved(). + // + // On Windows, this wraps a placeholder allocation (VirtualAlloc2 with + // MEM_RESERVE_PLACEHOLDER). On POSIX platforms, any mmap'd region is + // inherently splittable, so this is a thin wrapper. + class PlaceholderRegion { + char* _base; + size_t _size; + public: + PlaceholderRegion() : _base(nullptr), _size(0) {} + PlaceholderRegion(char* base, size_t size) : _base(base), _size(size) {} + char* base() const { return _base; } + size_t size() const { return _size; } + bool is_empty() const { return _base == nullptr; } + }; + private: static OSThread* _starting_thread; static PageSizes _page_sizes; @@ -216,6 +236,22 @@ class os: AllStatic { static char* pd_reserve_memory(size_t bytes, bool executable); + // On Windows, this allocates a placeholder via VirtualAlloc2(MEM_RESERVE_PLACEHOLDER). + // On POSIX, this is a normal mmap(PROT_NONE) allocation (inherently splittable). + // If addr is non-null, attempts to place the reservation at that address. + // If the returned PlaceholderRegion is empty, the reservation failed. + static PlaceholderRegion pd_reserve_placeholder_memory(size_t bytes, bool executable, char* addr = nullptr); + + // On Windows, splits the placeholder with VirtualFree(MEM_PRESERVE_PLACEHOLDER). + // On POSIX, this just does bookkeeping (updates fields of PlaceholderRegion). + // Returns the leading piece [base, base+offset). Shrinks 'region' to become the + // trailing piece [base+offset, base+original_size). + static PlaceholderRegion pd_split_memory(PlaceholderRegion& region, size_t offset); + + // On Windows, replaces the placeholder via VirtualAlloc2(MEM_REPLACE_PLACEHOLDER). + // On POSIX, this is just a no-op. + static char* pd_convert_to_reserved(PlaceholderRegion region); + static char* pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool executable); static bool pd_commit_memory(char* addr, size_t bytes, bool executable); @@ -513,6 +549,24 @@ class os: AllStatic { // Reserves virtual memory. static char* reserve_memory(size_t bytes, MemTag mem_tag, bool executable = false); + // Reserves a virtual memory region that can be split after allocation. + // The returned region must be converted via convert_to_reserved() before committing. + // This can fail recoverably if this is a Windows system that does not support VirtualAlloc2 + // (an empty PlaceholderRegion is returned). + // If the returned PlaceholderRegion is empty, the reservation failed. + // If addr is non-null, attempts to place the reservation at that address. + static PlaceholderRegion reserve_placeholder_memory(size_t bytes, MemTag mem_tag, bool executable = false, char* addr = nullptr); + + // Split 'region' at 'offset'. Returns the leading piece [base, base+offset), + // shrinks 'region' to the trailing piece [base+offset, base+original_size). + // Offset must be page-aligned. + // If offset == region.size(), returns the entire region and sets region to empty. + static PlaceholderRegion split_memory(PlaceholderRegion& region, size_t offset); + + // Convert a placeholder region into a regular reserved region. + // After conversion the Placeholder region should no longer be used. + static char* convert_to_reserved(PlaceholderRegion region); + // Reserves virtual memory that starts at an address that is aligned to 'alignment'. static char* reserve_memory_aligned(size_t size, size_t alignment, MemTag mem_tag, bool executable = false); diff --git a/test/hotspot/gtest/runtime/test_os.cpp b/test/hotspot/gtest/runtime/test_os.cpp index 094f16a4262..f4e97c13cb2 100644 --- a/test/hotspot/gtest/runtime/test_os.cpp +++ b/test/hotspot/gtest/runtime/test_os.cpp @@ -1200,16 +1200,22 @@ TEST_VM(os, map_unmap_memory) { TEST_VM(os, map_memory_to_file_aligned) { const char* letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - const size_t size = strlen(letters) + 1; + const size_t content_size = strlen(letters) + 1; + const size_t granularity = os::vm_allocation_granularity(); + const size_t alignments[] = { granularity, 2 * granularity, 4 * granularity, 16 * granularity, 1 * M }; int fd = os::open("map_memory_to_file.txt", O_RDWR | O_CREAT, 0666); EXPECT_TRUE(fd > 0); - EXPECT_TRUE(os::write(fd, letters, size)); + ASSERT_TRUE(os::write(fd, letters, content_size)); - char* result = os::map_memory_to_file_aligned(os::vm_allocation_granularity(), os::vm_allocation_granularity(), fd, mtTest); - ASSERT_NOT_NULL(result); - EXPECT_EQ(strcmp(letters, result), 0); - os::unmap_memory(result, os::vm_allocation_granularity()); + const size_t size = granularity; + for (size_t alignment : alignments) { + char* result = os::map_memory_to_file_aligned(size, alignment, fd, mtTest); + ASSERT_NOT_NULL(result) << "Mapping failed for alignment=" << alignment; + EXPECT_TRUE(is_aligned(result, alignment)) << "Failed to aligned to " << alignment; + EXPECT_EQ(strcmp(letters, result), 0) << "Text mismatch at alignment=" << alignment; + os::unmap_memory(result, size); + } ::close(fd); } @@ -1220,3 +1226,106 @@ TEST_VM(os, dll_load_null_error_buf) { void* lib = os::dll_load("NoSuchLib", nullptr, 0); ASSERT_NULL(lib); } + +// --- Splittable Memory API tests --- + +#define SKIP_IF_SPLITTABLE_NOT_SUPPORTED() \ + WINDOWS_ONLY(if (os::win32::VirtualAlloc2 == nullptr) GTEST_SKIP() << "VirtualAlloc2 not available";) + +TEST_VM(os, splittable_reserve_and_convert) { + SKIP_IF_SPLITTABLE_NOT_SUPPORTED(); + + const size_t size = 4 * os::vm_allocation_granularity(); + + os::PlaceholderRegion region = os::reserve_placeholder_memory(size, mtTest); + ASSERT_FALSE(region.is_empty()); + ASSERT_EQ(region.size(), size); + ASSERT_NE(region.base(), (char*)nullptr); + + char* reserved = os::convert_to_reserved(region); + ASSERT_EQ(reserved, region.base()); + + ASSERT_TRUE(os::commit_memory(reserved, size, false)); + // Touch the memory to confirm it's usable. + memset(reserved, 0xAB, size); + EXPECT_EQ((unsigned char)reserved[0], 0xAB); + EXPECT_EQ((unsigned char)reserved[size - 1], 0xAB); + + os::release_memory(reserved, size); +} + +TEST_VM(os, splittable_split_two_way) { + SKIP_IF_SPLITTABLE_NOT_SUPPORTED(); + + const size_t granularity = os::vm_allocation_granularity(); + const size_t total = 4 * granularity; + const size_t split_offset = 3 * granularity; + + os::PlaceholderRegion region = os::reserve_placeholder_memory(total, mtTest); + ASSERT_FALSE(region.is_empty()); + + char* original_base = region.base(); + os::PlaceholderRegion leading = os::split_memory(region, split_offset); + + // Leading piece: [base, base+split_offset) + ASSERT_EQ(leading.base(), original_base); + ASSERT_EQ(leading.size(), split_offset); + + // Trailing piece (region): [base+split_offset, base+total) + ASSERT_EQ(region.base(), original_base + split_offset); + ASSERT_EQ(region.size(), total - split_offset); + + // Convert both and commit. + char* addr1 = os::convert_to_reserved(leading); + char* addr2 = os::convert_to_reserved(region); + ASSERT_EQ(addr1, original_base); + ASSERT_EQ(addr2, original_base + split_offset); + + ASSERT_TRUE(os::commit_memory(addr1, split_offset, false)); + ASSERT_TRUE(os::commit_memory(addr2, total - split_offset, false)); + + // Touch the memory to confirm it's usable. + memset(addr1, 0x11, split_offset); + memset(addr2, 0x22, total - split_offset); + EXPECT_EQ((unsigned char)addr1[0], 0x11); + EXPECT_EQ((unsigned char)addr2[0], 0x22); + + // Verify we can release the parts separately. + os::release_memory(addr1, split_offset); + os::release_memory(addr2, total - split_offset); +} + +// --- Aligned allocation tests --- + +TEST_VM(os, reserve_memory_aligned_basic) { + const size_t granularity = os::vm_allocation_granularity(); + const size_t alignments[] = { granularity, 2 * granularity, 4 * granularity, 16 * granularity }; + + for (size_t alignment : alignments) { + const size_t size = alignment; + char* result = os::reserve_memory_aligned(size, alignment, mtTest); + ASSERT_NE(result, (char*)nullptr) << "reserve_memory_aligned failed for alignment=" << alignment; + EXPECT_TRUE(is_aligned(result, alignment)) << "Result " << result << " not aligned to " << alignment; + + ASSERT_TRUE(os::commit_memory(result, size, false)); + memset(result, 0xCD, size); + EXPECT_EQ((unsigned char)result[0], 0xCD); + + os::release_memory(result, size); + } +} + +TEST_VM(os, reserve_memory_aligned_large) { + const size_t alignment = 1 * M; + const size_t size = alignment; + + char* result = os::reserve_memory_aligned(size, alignment, mtTest); + ASSERT_NE(result, (char*)nullptr); + EXPECT_TRUE(is_aligned(result, alignment)); + + ASSERT_TRUE(os::commit_memory(result, size, false)); + memset(result, 0xEF, size); + EXPECT_EQ((unsigned char)result[size - 1], 0xEF); + + os::release_memory(result, size); +} diff --git a/test/hotspot/gtest/runtime/test_os_windows.cpp b/test/hotspot/gtest/runtime/test_os_windows.cpp index 13574dcbdb3..14a7a527b8e 100644 --- a/test/hotspot/gtest/runtime/test_os_windows.cpp +++ b/test/hotspot/gtest/runtime/test_os_windows.cpp @@ -28,9 +28,12 @@ #include "runtime/flags/flagSetting.hpp" #include "runtime/globals_extension.hpp" #include "runtime/os.inline.hpp" +#include "os_windows.hpp" #include "concurrentTestRunner.inline.hpp" #include "unittest.hpp" +#include + namespace { class MemoryReleaser { char* const _ptr; @@ -840,4 +843,84 @@ TEST_VM(os_windows, reserve_memory_special_concurrent) { testRunner.run(); } +// Test that reserve_with_numa_placeholder works correctly when +// UseNUMAInterleaving is enabled and VirtualAlloc2 is available. +// On UMA systems with a single NUMA node, the interleaving is trivial +// (all chunks go to node 0) but the placeholder split/replace path +// is still exercised. +TEST_VM(os_windows, numa_placeholder_reserve_commit) { + if (!os::win32::VirtualAlloc2) { + GTEST_SKIP() << "VirtualAlloc2 not available pre-Windows version 1803"; + } + + const size_t num_nodes = os::numa_get_groups_num(); + + // Enable NUMA interleaving for this test. + AutoSaveRestore FLAG_GUARD(UseNUMAInterleaving); + AutoSaveRestore FLAG_GUARD(UseLargePages); + FLAG_SET_CMDLINE(UseNUMAInterleaving, true); + FLAG_SET_CMDLINE(UseLargePages, false); + + // Allocate a region large enough to span multiple NUMA interleave chunks. + // NUMAInterleaveGranularity defaults to 2MB + const size_t chunk_size = NUMAInterleaveGranularity; + const size_t num_chunks = 4; + const size_t size = num_chunks * chunk_size; + + char* result = os::attempt_reserve_memory_at(nullptr, size, mtTest); + ASSERT_TRUE(result != nullptr) << "Failed to reserve memory"; + + + ASSERT_TRUE(is_aligned(result, os::vm_allocation_granularity())); + ASSERT_TRUE(os::commit_memory(result, size, false)); + + // Walk (and touch) the chunks using the same alignment logic as reserve_with_numa_placeholder: + // the first chunk may be shorter (up to the next chunk_size boundary), + // then full chunk_size pieces, with a possible shorter trailing chunk. + PSAPI_WORKING_SET_EX_INFORMATION wsi[num_chunks + 1]; + memset(wsi, 0, sizeof(wsi)); + size_t bytes_remaining = size; + char* addr = result; + size_t actual_chunks = 0; + + while (bytes_remaining > 0) { + size_t this_chunk_size = MIN2(bytes_remaining, chunk_size - ((size_t)addr % chunk_size)); + + memset(addr, 0xDA, this_chunk_size); + + wsi[actual_chunks] = {0}; + wsi[actual_chunks].VirtualAddress = addr; + actual_chunks++; + + bytes_remaining -= this_chunk_size; + addr += this_chunk_size; + } + + BOOL query_ok = QueryWorkingSetEx(GetCurrentProcess(), wsi, sizeof(wsi)); + ASSERT_TRUE(query_ok) << "QueryWorkingSetEx failed: " << GetLastError(); + + // Verify all pages are valid (in the working set). + for (size_t i = 0; i < actual_chunks; i++) { + EXPECT_TRUE(wsi[i].VirtualAttributes.Valid) << "Chunk " << i << " page not valid in working set"; + } + + if (num_nodes > 1) { + // On a multi-NUMA system, verify that not all chunks landed on the same node. + ULONG first_node = (ULONG)wsi[0].VirtualAttributes.Node; + bool found_different_node = false; + for (size_t i = 1; i < actual_chunks; i++) { + if (wsi[i].VirtualAttributes.Valid && + (ULONG)wsi[i].VirtualAttributes.Node != first_node) { + found_different_node = true; + break; + } + } + EXPECT_TRUE(found_different_node) + << "All " << actual_chunks << " chunks landed on NUMA node " << first_node + << "; expected interleaving across " << num_nodes << " nodes"; + } + + os::release_memory(result, size); +} + #endif