diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp index 7c08d6de2db..2fcd3ecc4f3 100644 --- a/src/hotspot/os/aix/os_aix.cpp +++ b/src/hotspot/os/aix/os_aix.cpp @@ -1798,6 +1798,49 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { } } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + // Always round to os::vm_page_size(), which may be larger than 4K. + bytes = align_up(bytes, os::vm_page_size()); + + // shmated memory cannot be split after allocation + if (os::vm_page_size() == 4*K || g_multipage_support.can_use_64K_mmap_pages) { + char* base = reserve_mmaped_memory(bytes, addr); + return PlaceholderRegion(base, base != nullptr ? bytes : 0); + } + return PlaceholderRegion(); +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + // On AIX, mmap regions are inherently splittable. Just do bookkeeping. + // pd_reserve_placeholder_memory guarantees mmaped (not shmated) memory. + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(offset > 0, "Offset must be positive"); + assert(offset < region_size, "Offset must be less than region size"); + + // update vmembk to reflect the split + vmembk_t* const vmi = vmembk_find(base); + guarantee(vmi != nullptr, "vmembk not found for splittable region at " PTR_FORMAT, p2i(base)); + guarantee(vmi->type != VMEM_SHMATED, "Cannot split shmated memory at " PTR_FORMAT, p2i(base)); + + vmembk_add(base, offset, vmi->pagesize, vmi->type); + vmi->addr = base + offset; + vmi->size = region_size - offset; + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + return region.base(); +} + bool os::pd_release_memory(char* addr, size_t size) { // Dynamically do different things for mmap/shmat. diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp index 29ebe65e0db..e0ec03560f4 100644 --- a/src/hotspot/os/bsd/os_bsd.cpp +++ b/src/hotspot/os/bsd/os_bsd.cpp @@ -1824,6 +1824,38 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { return anon_mmap(nullptr /* addr */, bytes, exec); } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + // mmap returns memory that is splittable by default. + char* base; + if (addr != nullptr) { + base = pd_attempt_reserve_memory_at(addr, bytes, exec); + } else { + base = pd_reserve_memory(bytes, exec); + } + return PlaceholderRegion(base, base != nullptr ? bytes : 0); +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + // On BSD, mmap regions are inherently splittable. Just do bookkeeping. + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(offset > 0, "Offset must be positive"); + assert(offset < region_size, "Offset must be less than region size"); + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + return region.base(); +} + bool os::pd_release_memory(char* addr, size_t size) { return anon_munmap(addr, size); } diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index 9c2fbab7535..84ad1a42e8f 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -3753,6 +3753,38 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { return anon_mmap(nullptr, bytes); } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + // mmap returns memory that is splittable by default. + char* base; + if (addr != nullptr) { + base = pd_attempt_reserve_memory_at(addr, bytes, exec); + } else { + base = pd_reserve_memory(bytes, exec); + } + return PlaceholderRegion(base, base != nullptr ? bytes : 0); +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + // On Linux, mmap regions are inherently splittable. Just do bookkeeping. + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(offset > 0, "Offset must be positive"); + assert(offset < region_size, "Offset must be less than region size"); + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + return region.base(); +} + bool os::pd_release_memory(char* addr, size_t size) { return anon_munmap(addr, size); } diff --git a/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp b/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp index 527958770c0..5b947739276 100644 --- a/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp +++ b/src/hotspot/os/windows/gc/z/zSyscall_windows.cpp @@ -23,6 +23,7 @@ #include "gc/shared/gcLogPrecious.hpp" #include "gc/z/zSyscall_windows.hpp" +#include "os_windows.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" @@ -33,31 +34,9 @@ ZSyscall::VirtualFreeExFn ZSyscall::VirtualFreeEx; ZSyscall::MapViewOfFile3Fn ZSyscall::MapViewOfFile3; ZSyscall::UnmapViewOfFile2Fn ZSyscall::UnmapViewOfFile2; -static void* lookup_kernelbase_library() { - const char* const name = "KernelBase"; - char ebuf[1024]; - void* const handle = os::dll_load(name, ebuf, sizeof(ebuf)); - if (handle == nullptr) { - log_error_p(gc)("Failed to load library: %s", name); - } - return handle; -} - -static void* lookup_kernelbase_symbol(const char* name) { - static void* const handle = lookup_kernelbase_library(); - if (handle == nullptr) { - return nullptr; - } - return os::dll_lookup(handle, name); -} - -static bool has_kernelbase_symbol(const char* name) { - return lookup_kernelbase_symbol(name) != nullptr; -} - template static void install_kernelbase_symbol(Fn*& fn, const char* name) { - fn = reinterpret_cast(lookup_kernelbase_symbol(name)); + fn = reinterpret_cast(os::win32::lookup_kernelbase_symbol(name)); } template @@ -83,10 +62,10 @@ void ZSyscall::initialize() { bool ZSyscall::is_supported() { // Available in Windows version 1803 and later - return has_kernelbase_symbol("VirtualAlloc2"); + return os::win32::lookup_kernelbase_symbol("VirtualAlloc2") != nullptr; } bool ZSyscall::is_large_pages_supported() { // Available in Windows version 1809 and later - return has_kernelbase_symbol("CreateFileMapping2"); + return os::win32::lookup_kernelbase_symbol("CreateFileMapping2") != nullptr; } diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp index 76f47640e5a..c882cf16160 100644 --- a/src/hotspot/os/windows/os_windows.cpp +++ b/src/hotspot/os/windows/os_windows.cpp @@ -243,6 +243,46 @@ static LPVOID virtualAllocExNuma(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSiz return result; } +// VirtualAlloc2 (since Windows version 1803). loaded from KernelBase in os::init_2() +os::win32::VirtualAlloc2Fn os::win32::VirtualAlloc2 = nullptr; + +// MapViewOfFile3 (since Windows version 1803). loaded from KernelBase in os::init_2() +os::win32::MapViewOfFile3Fn os::win32::MapViewOfFile3 = nullptr; + +static void* lookup_kernelbase_library() { + const char* const name = "KernelBase"; + char ebuf[1024]; + void* const handle = os::dll_load(name, ebuf, sizeof(ebuf)); + if (handle == nullptr) { + log_trace(os)("Failed to load library: %s", name); + } + return handle; +} + +void* os::win32::lookup_kernelbase_symbol(const char* name) { + static void* const handle = lookup_kernelbase_library(); + if (handle == nullptr) { + return nullptr; + } + return os::dll_lookup(handle, name); +} + +template +static void install_kernelbase_symbol(Fn*& fn, const char* name) { + fn = reinterpret_cast(os::win32::lookup_kernelbase_symbol(name)); +} + +static void initialize_kernelbase_apis() { + install_kernelbase_symbol(os::win32::VirtualAlloc2, "VirtualAlloc2"); + log_info(os)("VirtualAlloc2 is%s available.", os::win32::VirtualAlloc2 == nullptr ? " not" : ""); + install_kernelbase_symbol(os::win32::MapViewOfFile3, "MapViewOfFile3"); + log_info(os)("MapViewOfFile3 is%s available.", os::win32::MapViewOfFile3 == nullptr ? " not" : ""); +} + +static bool is_VirtualAlloc2_supported() { + return os::win32::VirtualAlloc2 != nullptr; +} + // Logging wrapper for MapViewOfFileEx static LPVOID mapViewOfFileEx(HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) { @@ -2896,6 +2936,18 @@ LONG WINAPI topLevelUnhandledExceptionFilter(struct _EXCEPTION_POINTERS* excepti #define MEM_LARGE_PAGES 0x20000000 #endif +#ifndef MEM_PRESERVE_PLACEHOLDER + #define MEM_PRESERVE_PLACEHOLDER 0x00000002 +#endif + +#ifndef MEM_REPLACE_PLACEHOLDER + #define MEM_REPLACE_PLACEHOLDER 0x00004000 +#endif + +#ifndef MEM_RESERVE_PLACEHOLDER + #define MEM_RESERVE_PLACEHOLDER 0x00040000 +#endif + // Container for NUMA node list info class NUMANodeListHolder { private: @@ -3257,7 +3309,7 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in return map_memory_to_file(base, size, fd); } -// Multiple threads can race in this code but it's not possible to unmap small sections of +// Multiple threads can race in this code (if VirtualAlloc2 is not supported) but it's not possible to unmap small sections of // virtual space to get requested alignment, like posix-like os's. // Windows prevents multiple thread from remapping over each other so this loop is thread-safe. static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc, MemTag mem_tag) { @@ -3266,6 +3318,59 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi assert(is_aligned(size, os::vm_allocation_granularity()), "Size must be a multiple of allocation granularity (page size)"); + // VirtualAlloc2 and MapViewOfFile3 support alignment natively. + // This avoids the race prone retry loop below. + if (is_VirtualAlloc2_supported() && is_power_of_2(alignment) && + alignment >= os::vm_allocation_granularity()) { + + MEM_ADDRESS_REQUIREMENTS requirements = {0}; + requirements.Alignment = alignment; + + MEM_EXTENDED_PARAMETER param = {0}; + param.Type = MemExtendedParameterAddressRequirements; + param.Pointer = &requirements; + + char* aligned_base = nullptr; + + if (file_desc != -1 && os::win32::MapViewOfFile3 != nullptr) { + // File-backed aligned mapping. + HANDLE fh = (HANDLE)_get_osfhandle(file_desc); + HANDLE fileMapping = CreateFileMapping(fh, nullptr, PAGE_READWRITE,(DWORD)(size >> 32), (DWORD)(size & 0xFFFFFFFF), nullptr); + if (fileMapping != nullptr) { + aligned_base = (char*)os::win32::MapViewOfFile3( + fileMapping, + GetCurrentProcess(), + nullptr, // let the system choose an aligned address + 0, // offset + size, + 0, // no special allocation type flags + PAGE_READWRITE, + ¶m, 1); + CloseHandle(fileMapping); + } + } else if (file_desc == -1) { + // Anonymous aligned reservation. + aligned_base = (char*)os::win32::VirtualAlloc2( + GetCurrentProcess(), + nullptr, // let the system choose an aligned address + size, + MEM_RESERVE, + PAGE_READWRITE, + ¶m, 1); + } + + if (aligned_base != nullptr) { + assert(is_aligned(aligned_base, alignment), "Result must be aligned"); + if (file_desc == -1) { + MemTracker::record_virtual_memory_reserve(aligned_base, size, CALLER_PC, mem_tag); + } else { + MemTracker::record_virtual_memory_reserve_and_commit(aligned_base, size, CALLER_PC, mem_tag); + } + return aligned_base; + } + log_trace(os)("Aligned allocation via VirtualAlloc2/MapViewOfFile3 failed, falling back to retry loop."); + } + size_t extra_size = size + alignment; assert(extra_size >= size, "overflow, size is too large to allow alignment"); @@ -3357,6 +3462,147 @@ char* os::pd_reserve_memory(size_t bytes, bool exec) { return pd_attempt_reserve_memory_at(nullptr /* addr */, bytes, exec); } +os::PlaceholderRegion os::pd_reserve_placeholder_memory(size_t bytes, bool exec, char* addr) { + if (!is_VirtualAlloc2_supported()) { + return PlaceholderRegion(); + } + + char* res = (char*)os::win32::VirtualAlloc2( + GetCurrentProcess(), + addr, + bytes, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, + nullptr, 0); + + if (res != nullptr) { + if (addr != nullptr && res != addr) { + // Got a different address than requested; release and fail. + virtualFree(res, 0, MEM_RELEASE); + log_warning(os)("VirtualAlloc2 placeholder at requested " PTR_FORMAT " returned different address " PTR_FORMAT ", released.", p2i(addr), p2i(res)); + return PlaceholderRegion(); + } + log_trace(os)("VirtualAlloc2 placeholder of size (%zu) returned " PTR_FORMAT ".", bytes, p2i(res)); + return PlaceholderRegion(res, bytes); + } else { + PreserveLastError ple; + log_warning(os)("VirtualAlloc2 placeholder reservation of size (%zu) at " PTR_FORMAT " failed (%u).", bytes, p2i(addr), ple.v); + return PlaceholderRegion(); + } +} + +os::PlaceholderRegion os::pd_split_memory(PlaceholderRegion& region, size_t offset) { + guarantee(is_VirtualAlloc2_supported(), "pd_split_memory requires VirtualAlloc2 on Windows."); + + char* base = region.base(); + size_t region_size = region.size(); + + assert(base != nullptr, "Region base cannot be null."); + assert(offset > 0, "Offset must be positive (nothing to split at 0)."); + assert(offset < region_size, "Offset must be less than region size."); + + // VirtualFree with MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER splits the + // placeholder [base, base+region_size) into two placeholders: + // [base, base+offset) and [base+offset, base+region_size) + // + // With correct inputs, this should not fail. + // A failure indicates either a programming error (e.g., bad alignment, + // region not actually a placeholder) or a catastrophic system problem. + // Crashing with a diagnostic is more useful than attempting recovery. + BOOL result = virtualFree(base, offset, MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER); + guarantee(result != FALSE, + "Failed to split placeholder at " PTR_FORMAT " (offset %zu): error %lu.", + p2i(base), offset, GetLastError()); + + log_trace(os)("Split placeholder " RANGE_FORMAT " at offset %zu.", + RANGE_FORMAT_ARGS(base, region_size), offset); + + // Shrink region to the trailing piece. + region = PlaceholderRegion(base + offset, region_size - offset); + + // Return the leading piece. + return PlaceholderRegion(base, offset); +} + +char* os::pd_convert_to_reserved(PlaceholderRegion region) { + return os::win32::convert_placeholder_to_reserved(region); +} + +// This function is for convenience to help with reserve_with_numa_placeholder. +char* os::win32::convert_placeholder_to_reserved(PlaceholderRegion region, int numa_node) { + guarantee(is_VirtualAlloc2_supported(), "convert_placeholder_to_reserved requires VirtualAlloc2"); + + char* base = region.base(); + size_t size = region.size(); + + assert(base != nullptr, "Region base cannot be null"); + assert(size > 0, "Region size must be positive"); + + MEM_EXTENDED_PARAMETER param = { 0 }; + MEM_EXTENDED_PARAMETER* param_ptr = nullptr; + ULONG param_count = 0; + + if (numa_node >= 0) { + param.Type = MemExtendedParameterNumaNode; + param.ULong = (DWORD)numa_node; + param_ptr = ¶m; + param_count = 1; + } + + char* reserved = (char*)os::win32::VirtualAlloc2( + GetCurrentProcess(), + base, + size, + MEM_RESERVE | MEM_REPLACE_PLACEHOLDER, + PAGE_READWRITE, + param_ptr, param_count); + guarantee(reserved != nullptr, + "Failed to convert placeholder to reservation at " PTR_FORMAT " (%zu, numa node %d): error %lu.", + p2i(base), size, numa_node, GetLastError()); + + if (numa_node >= 0) { + log_trace(os)("Converted placeholder " RANGE_FORMAT " to reservation on NUMA node %d.", RANGE_FORMAT_ARGS(reserved, size), numa_node); + } else { + log_trace(os)("Converted placeholder " RANGE_FORMAT " to reservation.", RANGE_FORMAT_ARGS(reserved, size)); + } + + return reserved; +} + +// Reserve a region split across NUMA nodes. +// This uses VirtualAlloc2 placeholders in order to avoid races when splitting up the initial reservation into chunks assigned to different nodes. +// Returns the base address of the reserved range, or nullptr on failure. +char* os::win32::reserve_with_numa_placeholder(char* addr, size_t bytes) { + assert(is_VirtualAlloc2_supported(), "requires VirtualAlloc2"); + + const size_t chunk_size = NUMAInterleaveGranularity; + + // Reserve the full range as a placeholder. + // If we requested an address, pd_reserve_placeholder_memory will obtain it or fail. + PlaceholderRegion remaining = os::pd_reserve_placeholder_memory(bytes, false, addr); + if (remaining.is_empty()) { + log_warning(os)("Failed to reserve placeholder for NUMA interleaving (" PTR_FORMAT ", %zu).", p2i(addr), bytes); + return nullptr; + } + + char* const base = remaining.base(); + log_trace(os)("Created VirtualAlloc2 NUMA placeholder at " RANGE_FORMAT " (%zu bytes).", RANGE_FORMAT_ARGS(base, bytes), bytes); + + int count = 0; + const int node_count = numa_node_list_holder.get_count(); + + while (!remaining.is_empty()) { + size_t bytes_to_rq = MIN2(remaining.size(), chunk_size - ((size_t)remaining.base() % chunk_size)); + PlaceholderRegion chunk = os::split_memory(remaining, bytes_to_rq); + + DWORD node = node_count > 0 ? numa_node_list_holder.get_node_list_entry(count % node_count) : 0; // Assign 0 for testing on UMA systems + convert_placeholder_to_reserved(chunk, (int)node); + count++; + } + + return base; +} + // Reserve memory at an arbitrary address, only if that area is // available (and not reserved for something else). char* os::pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool exec) { @@ -3366,23 +3612,33 @@ char* os::pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool exec) { char* res; // note that if UseLargePages is on, all the areas that require interleaving // will go thru reserve_memory_special rather than thru here. - bool use_individual = (UseNUMAInterleaving && !UseLargePages); - if (!use_individual) { - res = (char*)virtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE); - } else { + bool use_numa_interleaving = (UseNUMAInterleaving && !UseLargePages); + + if (use_numa_interleaving && is_VirtualAlloc2_supported()) { + // Splittable NUMA interleaving with VirtualAlloc2 placeholders. + res = win32::reserve_with_numa_placeholder(addr, bytes); + if (res == nullptr) { + log_warning(os)("NUMA allocation using placeholders failed"); + } + } else if (use_numa_interleaving) { + // Non-splittable NUMA interleaving: allocate_pages_individually (possible races). elapsedTimer reserveTimer; if (Verbose && PrintMiscellaneous) reserveTimer.start(); // in numa interleaving, we have to allocate pages individually // (well really chunks of NUMAInterleaveGranularity size) res = allocate_pages_individually(bytes, addr, MEM_RESERVE, PAGE_READWRITE); if (res == nullptr) { - warning("NUMA page allocation failed"); + log_warning(os)("NUMA page allocation failed"); } if (Verbose && PrintMiscellaneous) { reserveTimer.stop(); tty->print_cr("reserve_memory of %zx bytes took " JLONG_FORMAT " ms (" JLONG_FORMAT " ticks)", bytes, reserveTimer.milliseconds(), reserveTimer.ticks()); } + } else { + // Standard reservation. Callers who need splittable placeholders should use + // pd_reserve_placeholder_memory instead. + res = (char*)virtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE); } assert(res == nullptr || addr == nullptr || addr == res, "Unexpected address from reserve."); @@ -4582,6 +4838,8 @@ jint os::init_2(void) { } log_info(os, thread)("The SetThreadDescription API is%s available.", _SetThreadDescription == nullptr ? " not" : ""); + // Prepare KernelBase APIs (VirtualAlloc2, MapViewOfFile3) if available (Windows version 1803). + initialize_kernelbase_apis(); return JNI_OK; } diff --git a/src/hotspot/os/windows/os_windows.hpp b/src/hotspot/os/windows/os_windows.hpp index d4a7d51c59b..c384247bf8d 100644 --- a/src/hotspot/os/windows/os_windows.hpp +++ b/src/hotspot/os/windows/os_windows.hpp @@ -109,11 +109,28 @@ class os::win32 { // load dll from Windows system directory or Windows directory static HINSTANCE load_Windows_dll(const char* name, char *ebuf, int ebuflen); + // Resolve a symbol from KernelBase.dll, returns nullptr if not found. + static void* lookup_kernelbase_symbol(const char* name); + + // VirtualAlloc2 (since Windows version 1803) + // Resolved from KernelBase during os::init_2() or nullptr if unavailable. + typedef PVOID (WINAPI *VirtualAlloc2Fn)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + static VirtualAlloc2Fn VirtualAlloc2; + + // MapViewOfFile3 (since Windows version 1803) + // Resolved from KernelBase during os::init_2() or nullptr if unavailable. + typedef PVOID (WINAPI *MapViewOfFile3Fn)(HANDLE, HANDLE, PVOID, ULONG64, SIZE_T, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); + static MapViewOfFile3Fn MapViewOfFile3; + private: static void initialize_performance_counter(); static void initialize_windows_version(); static DWORD active_processors_in_job_object(DWORD* active_processor_groups = nullptr); + static char* reserve_with_numa_placeholder(char* addr, size_t bytes); + // Replaces a placeholder with a reserved region via VirtualAlloc2(MEM_REPLACE_PLACEHOLDER). + // If numa_node >= 0, binds the reservation to that NUMA node. + static char* convert_placeholder_to_reserved(PlaceholderRegion region, int numa_node = -1); public: // Generic interface: diff --git a/src/hotspot/share/cds/aotMetaspace.cpp b/src/hotspot/share/cds/aotMetaspace.cpp index b75d7628aa9..03cd24fabc9 100644 --- a/src/hotspot/share/cds/aotMetaspace.cpp +++ b/src/hotspot/share/cds/aotMetaspace.cpp @@ -1688,7 +1688,7 @@ MapArchiveResult AOTMetaspace::map_archives(FileMapInfo* static_mapinfo, FileMap // this with use_requested_addr, since we're going to patch all the // pointers anyway so there's no benefit to mmap. if (use_requested_addr) { - assert(!total_space_rs.is_reserved(), "Should not be reserved for Windows"); + assert(!total_space_rs.is_reserved(), "Should not be reserved when use_requested_addr is true"); aot_log_info(aot)("Windows mmap workaround: releasing archive space."); MemoryReserver::release(archive_space_rs); // Mark as not reserved @@ -1879,10 +1879,10 @@ MapArchiveResult AOTMetaspace::map_archives(FileMapInfo* static_mapinfo, FileMap // Return: // // - On success: -// - total_space_rs will be reserved as whole for archive_space_rs and -// class_space_rs if UseCompressedClassPointers is true. -// On Windows, try reserve archive_space_rs and class_space_rs -// separately first if use_archive_base_addr is true. +// - If UseCompressedClassPointers and use_archive_base_addr are both true, +// archive_space_rs and class_space_rs are reserved as independent regions (placeholder split). +// total_space_rs is not set in this case. But if use_archive_base_addr is false, +// total_space_rs is reserved as one block for archive_space_rs and class_space_rs. // - archive_space_rs will be reserved and large enough to host static and // if needed dynamic archive: [Base, A). // archive_space_rs.base and size will be aligned to CDS reserve @@ -1978,46 +1978,37 @@ char* AOTMetaspace::reserve_address_space_for_archives(FileMapInfo* static_mapin } assert(total_range_size > ccs_begin_offset, "must be"); - if (use_windows_memory_mapping() && use_archive_base_addr) { - if (base_address != nullptr) { - // On Windows, we cannot safely split a reserved memory space into two (see JDK-8255917). - // Hence, we optimistically reserve archive space and class space side-by-side. We only - // do this for use_archive_base_addr=true since for use_archive_base_addr=false case - // caller will not split the combined space for mapping, instead read the archive data - // via sequential file IO. - address ccs_base = base_address + archive_space_size + gap_size; - archive_space_rs = MemoryReserver::reserve((char*)base_address, - archive_space_size, - archive_space_alignment, - os::vm_page_size(), - mtNone); - class_space_rs = MemoryReserver::reserve((char*)ccs_base, - class_space_size, - class_space_alignment, - os::vm_page_size(), - mtNone); + if (use_archive_base_addr && base_address != nullptr) { + os::PlaceholderRegion placeholder = os::reserve_placeholder_memory(total_range_size, mtNone, false /* exec */, (char*)base_address); + + if (!placeholder.is_empty()) { + os::PlaceholderRegion archive_placeholder = os::split_memory(placeholder, ccs_begin_offset); + // placeholder has been shrunk to [base+ccs_begin_offset, end) = class space + + char* archive_base = os::convert_to_reserved(archive_placeholder); + char* class_base = os::convert_to_reserved(placeholder); + + archive_space_rs = ReservedSpace(archive_base, ccs_begin_offset, + archive_space_alignment, os::vm_page_size(), + false /* exec */, false /* special */); + class_space_rs = ReservedSpace(class_base, class_space_size, + class_space_alignment, os::vm_page_size(), + false /* exec */, false /* special */); + MemTracker::record_virtual_memory_split_reserved(archive_base, total_range_size, + ccs_begin_offset, mtClassShared, mtClass); } + if (!archive_space_rs.is_reserved() || !class_space_rs.is_reserved()) { release_reserved_spaces(total_space_rs, archive_space_rs, class_space_rs); return nullptr; } - MemTracker::record_virtual_memory_tag(archive_space_rs, mtClassShared); - MemTracker::record_virtual_memory_tag(class_space_rs, mtClass); } else { - if (use_archive_base_addr && base_address != nullptr) { - total_space_rs = MemoryReserver::reserve((char*) base_address, - total_range_size, - base_address_alignment, - os::vm_page_size(), - mtNone); - } else { - // We did not manage to reserve at the preferred address, or were instructed to relocate. In that - // case we reserve wherever possible, but the start address needs to be encodable as narrow Klass - // encoding base since the archived heap objects contain narrow Klass IDs pre-calculated toward the start - // of the shared Metaspace. That prevents us from using zero-based encoding and therefore we won't - // try allocating in low-address regions. - total_space_rs = Metaspace::reserve_address_space_for_compressed_classes(total_range_size, false /* optimize_for_zero_base */); - } + // We did not manage to reserve at the preferred address, or were instructed to relocate. In that + // case we reserve wherever possible, but the start address needs to be encodable as narrow Klass + // encoding base since the archived heap objects contain narrow Klass IDs pre-calculated toward the start + // of the shared Metaspace. That prevents us from using zero-based encoding and therefore we won't + // try allocating in low-address regions. + total_space_rs = Metaspace::reserve_address_space_for_compressed_classes(total_range_size, false /* optimize_for_zero_base */); if (!total_space_rs.is_reserved()) { return nullptr; diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp index 16335f97fdb..c060cd5cedc 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -1971,6 +1971,63 @@ char* os::reserve_memory(size_t bytes, MemTag mem_tag, bool executable) { return result; } +os::PlaceholderRegion os::reserve_placeholder_memory(size_t bytes, MemTag mem_tag, bool executable, char* addr) { + assert(bytes > 0, "Size must be a value greater than 0"); + PlaceholderRegion result = pd_reserve_placeholder_memory(bytes, executable, addr); + if (!result.is_empty()) { + MemTracker::record_virtual_memory_reserve(result.base(), result.size(), CALLER_PC, mem_tag); + log_debug(os, map)("Reserved placeholder memory " RANGEFMT, RANGEFMTARGS(result.base(), result.size())); + } else { + log_info(os, map)("Reserve placeholder memory failed (%zu bytes)", bytes); + } + return result; +} + +os::PlaceholderRegion os::split_memory(PlaceholderRegion& region, size_t offset) { + assert(!region.is_empty(), "Region cannot be empty"); + assert(offset > 0, "Offset must be a value greater than 0"); + assert(offset <= region.size(), "Offset must be less than or equal to region size"); + assert(is_aligned(region.base(), os::vm_page_size()), "Region base should be page-aligned"); + assert(is_aligned(offset, os::vm_page_size()), "Offset should be page-aligned"); + + char* original_base = region.base(); + size_t original_size = region.size(); + + if (offset == original_size) { + // No split needed. Return the original region. + PlaceholderRegion result = region; + // The trailing piece is empty now. Nothing left. + region = PlaceholderRegion(); + log_debug(os, map)("Split memory consumed the whole region: " RANGEFMT, RANGEFMTARGS(original_base, original_size)); + return result; + } + + PlaceholderRegion leading = pd_split_memory(region, offset); + + if (leading.is_empty()) { + fatal("Split memory at offset %zu failed. Region: " RANGEFMT, offset, RANGEFMTARGS(original_base, original_size)); + } + log_debug(os, map)("Split memory at offset %zu: " RANGEFMT " -> " RANGEFMT " + " RANGEFMT, + offset, + RANGEFMTARGS(original_base, original_size), + RANGEFMTARGS(leading.base(), leading.size()), + RANGEFMTARGS(region.base(), region.size())); + return leading; +} + +char* os::convert_to_reserved(PlaceholderRegion region) { + assert(!region.is_empty(), "Region cannot be empty"); + assert(is_aligned(region.base(), os::vm_page_size()), "Region base should be page-aligned"); + assert(is_aligned(region.size(), os::vm_page_size()), "Region size should be page-aligned"); + + char* result = pd_convert_to_reserved(region); + if (result == nullptr) { + fatal("Convert placeholder region " RANGEFMT " to reserved region failed", RANGEFMTARGS(region.base(), region.size())); + } + log_debug(os, map)("Converted placeholder region " RANGEFMT " to reserved region at " PTR_FORMAT, RANGEFMTARGS(region.base(), region.size()), p2i(result)); + return result; +} + char* os::attempt_reserve_memory_at(char* addr, size_t bytes, MemTag mem_tag, bool executable) { char* result = SimulateFullAddressSpace ? nullptr : pd_attempt_reserve_memory_at(addr, bytes, executable); if (result != nullptr) { diff --git a/src/hotspot/share/runtime/os.hpp b/src/hotspot/share/runtime/os.hpp index e185188384f..6a282f9439e 100644 --- a/src/hotspot/share/runtime/os.hpp +++ b/src/hotspot/share/runtime/os.hpp @@ -205,6 +205,26 @@ class os: AllStatic { void print_on(outputStream* st) const; }; + // A "reserved" region of address space that can be split or converted to a + // normal reservation. Conceptually distinct from a reserved region: + // callers must NOT call commit_memory, map_memory, or other operations + // directly on the raw address. They must first convert it via + // convert_to_reserved(). + // + // On Windows, this wraps a placeholder allocation (VirtualAlloc2 with + // MEM_RESERVE_PLACEHOLDER). On POSIX platforms, any mmap'd region is + // inherently splittable, so this is a thin wrapper. + class PlaceholderRegion { + char* _base; + size_t _size; + public: + PlaceholderRegion() : _base(nullptr), _size(0) {} + PlaceholderRegion(char* base, size_t size) : _base(base), _size(size) {} + char* base() const { return _base; } + size_t size() const { return _size; } + bool is_empty() const { return _base == nullptr; } + }; + private: static OSThread* _starting_thread; static PageSizes _page_sizes; @@ -216,6 +236,22 @@ class os: AllStatic { static char* pd_reserve_memory(size_t bytes, bool executable); + // On Windows, this allocates a placeholder via VirtualAlloc2(MEM_RESERVE_PLACEHOLDER). + // On POSIX, this is a normal mmap(PROT_NONE) allocation (inherently splittable). + // If addr is non-null, attempts to place the reservation at that address. + // If the returned PlaceholderRegion is empty, the reservation failed. + static PlaceholderRegion pd_reserve_placeholder_memory(size_t bytes, bool executable, char* addr = nullptr); + + // On Windows, splits the placeholder with VirtualFree(MEM_PRESERVE_PLACEHOLDER). + // On POSIX, this just does bookkeeping (updates fields of PlaceholderRegion). + // Returns the leading piece [base, base+offset). Shrinks 'region' to become the + // trailing piece [base+offset, base+original_size). + static PlaceholderRegion pd_split_memory(PlaceholderRegion& region, size_t offset); + + // On Windows, replaces the placeholder via VirtualAlloc2(MEM_REPLACE_PLACEHOLDER). + // On POSIX, this is just a no-op. + static char* pd_convert_to_reserved(PlaceholderRegion region); + static char* pd_attempt_reserve_memory_at(char* addr, size_t bytes, bool executable); static bool pd_commit_memory(char* addr, size_t bytes, bool executable); @@ -513,6 +549,24 @@ class os: AllStatic { // Reserves virtual memory. static char* reserve_memory(size_t bytes, MemTag mem_tag, bool executable = false); + // Reserves a virtual memory region that can be split after allocation. + // The returned region must be converted via convert_to_reserved() before committing. + // This can fail recoverably if this is a Windows system that does not support VirtualAlloc2 + // (an empty PlaceholderRegion is returned). + // If the returned PlaceholderRegion is empty, the reservation failed. + // If addr is non-null, attempts to place the reservation at that address. + static PlaceholderRegion reserve_placeholder_memory(size_t bytes, MemTag mem_tag, bool executable = false, char* addr = nullptr); + + // Split 'region' at 'offset'. Returns the leading piece [base, base+offset), + // shrinks 'region' to the trailing piece [base+offset, base+original_size). + // Offset must be page-aligned. + // If offset == region.size(), returns the entire region and sets region to empty. + static PlaceholderRegion split_memory(PlaceholderRegion& region, size_t offset); + + // Convert a placeholder region into a regular reserved region. + // After conversion the Placeholder region should no longer be used. + static char* convert_to_reserved(PlaceholderRegion region); + // Reserves virtual memory that starts at an address that is aligned to 'alignment'. static char* reserve_memory_aligned(size_t size, size_t alignment, MemTag mem_tag, bool executable = false); diff --git a/test/hotspot/gtest/runtime/test_os.cpp b/test/hotspot/gtest/runtime/test_os.cpp index 094f16a4262..f4e97c13cb2 100644 --- a/test/hotspot/gtest/runtime/test_os.cpp +++ b/test/hotspot/gtest/runtime/test_os.cpp @@ -1200,16 +1200,22 @@ TEST_VM(os, map_unmap_memory) { TEST_VM(os, map_memory_to_file_aligned) { const char* letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - const size_t size = strlen(letters) + 1; + const size_t content_size = strlen(letters) + 1; + const size_t granularity = os::vm_allocation_granularity(); + const size_t alignments[] = { granularity, 2 * granularity, 4 * granularity, 16 * granularity, 1 * M }; int fd = os::open("map_memory_to_file.txt", O_RDWR | O_CREAT, 0666); EXPECT_TRUE(fd > 0); - EXPECT_TRUE(os::write(fd, letters, size)); + ASSERT_TRUE(os::write(fd, letters, content_size)); - char* result = os::map_memory_to_file_aligned(os::vm_allocation_granularity(), os::vm_allocation_granularity(), fd, mtTest); - ASSERT_NOT_NULL(result); - EXPECT_EQ(strcmp(letters, result), 0); - os::unmap_memory(result, os::vm_allocation_granularity()); + const size_t size = granularity; + for (size_t alignment : alignments) { + char* result = os::map_memory_to_file_aligned(size, alignment, fd, mtTest); + ASSERT_NOT_NULL(result) << "Mapping failed for alignment=" << alignment; + EXPECT_TRUE(is_aligned(result, alignment)) << "Failed to aligned to " << alignment; + EXPECT_EQ(strcmp(letters, result), 0) << "Text mismatch at alignment=" << alignment; + os::unmap_memory(result, size); + } ::close(fd); } @@ -1220,3 +1226,106 @@ TEST_VM(os, dll_load_null_error_buf) { void* lib = os::dll_load("NoSuchLib", nullptr, 0); ASSERT_NULL(lib); } + +// --- Splittable Memory API tests --- + +#define SKIP_IF_SPLITTABLE_NOT_SUPPORTED() \ + WINDOWS_ONLY(if (os::win32::VirtualAlloc2 == nullptr) GTEST_SKIP() << "VirtualAlloc2 not available";) + +TEST_VM(os, splittable_reserve_and_convert) { + SKIP_IF_SPLITTABLE_NOT_SUPPORTED(); + + const size_t size = 4 * os::vm_allocation_granularity(); + + os::PlaceholderRegion region = os::reserve_placeholder_memory(size, mtTest); + ASSERT_FALSE(region.is_empty()); + ASSERT_EQ(region.size(), size); + ASSERT_NE(region.base(), (char*)nullptr); + + char* reserved = os::convert_to_reserved(region); + ASSERT_EQ(reserved, region.base()); + + ASSERT_TRUE(os::commit_memory(reserved, size, false)); + // Touch the memory to confirm it's usable. + memset(reserved, 0xAB, size); + EXPECT_EQ((unsigned char)reserved[0], 0xAB); + EXPECT_EQ((unsigned char)reserved[size - 1], 0xAB); + + os::release_memory(reserved, size); +} + +TEST_VM(os, splittable_split_two_way) { + SKIP_IF_SPLITTABLE_NOT_SUPPORTED(); + + const size_t granularity = os::vm_allocation_granularity(); + const size_t total = 4 * granularity; + const size_t split_offset = 3 * granularity; + + os::PlaceholderRegion region = os::reserve_placeholder_memory(total, mtTest); + ASSERT_FALSE(region.is_empty()); + + char* original_base = region.base(); + os::PlaceholderRegion leading = os::split_memory(region, split_offset); + + // Leading piece: [base, base+split_offset) + ASSERT_EQ(leading.base(), original_base); + ASSERT_EQ(leading.size(), split_offset); + + // Trailing piece (region): [base+split_offset, base+total) + ASSERT_EQ(region.base(), original_base + split_offset); + ASSERT_EQ(region.size(), total - split_offset); + + // Convert both and commit. + char* addr1 = os::convert_to_reserved(leading); + char* addr2 = os::convert_to_reserved(region); + ASSERT_EQ(addr1, original_base); + ASSERT_EQ(addr2, original_base + split_offset); + + ASSERT_TRUE(os::commit_memory(addr1, split_offset, false)); + ASSERT_TRUE(os::commit_memory(addr2, total - split_offset, false)); + + // Touch the memory to confirm it's usable. + memset(addr1, 0x11, split_offset); + memset(addr2, 0x22, total - split_offset); + EXPECT_EQ((unsigned char)addr1[0], 0x11); + EXPECT_EQ((unsigned char)addr2[0], 0x22); + + // Verify we can release the parts separately. + os::release_memory(addr1, split_offset); + os::release_memory(addr2, total - split_offset); +} + +// --- Aligned allocation tests --- + +TEST_VM(os, reserve_memory_aligned_basic) { + const size_t granularity = os::vm_allocation_granularity(); + const size_t alignments[] = { granularity, 2 * granularity, 4 * granularity, 16 * granularity }; + + for (size_t alignment : alignments) { + const size_t size = alignment; + char* result = os::reserve_memory_aligned(size, alignment, mtTest); + ASSERT_NE(result, (char*)nullptr) << "reserve_memory_aligned failed for alignment=" << alignment; + EXPECT_TRUE(is_aligned(result, alignment)) << "Result " << result << " not aligned to " << alignment; + + ASSERT_TRUE(os::commit_memory(result, size, false)); + memset(result, 0xCD, size); + EXPECT_EQ((unsigned char)result[0], 0xCD); + + os::release_memory(result, size); + } +} + +TEST_VM(os, reserve_memory_aligned_large) { + const size_t alignment = 1 * M; + const size_t size = alignment; + + char* result = os::reserve_memory_aligned(size, alignment, mtTest); + ASSERT_NE(result, (char*)nullptr); + EXPECT_TRUE(is_aligned(result, alignment)); + + ASSERT_TRUE(os::commit_memory(result, size, false)); + memset(result, 0xEF, size); + EXPECT_EQ((unsigned char)result[size - 1], 0xEF); + + os::release_memory(result, size); +} diff --git a/test/hotspot/gtest/runtime/test_os_windows.cpp b/test/hotspot/gtest/runtime/test_os_windows.cpp index 13574dcbdb3..14a7a527b8e 100644 --- a/test/hotspot/gtest/runtime/test_os_windows.cpp +++ b/test/hotspot/gtest/runtime/test_os_windows.cpp @@ -28,9 +28,12 @@ #include "runtime/flags/flagSetting.hpp" #include "runtime/globals_extension.hpp" #include "runtime/os.inline.hpp" +#include "os_windows.hpp" #include "concurrentTestRunner.inline.hpp" #include "unittest.hpp" +#include + namespace { class MemoryReleaser { char* const _ptr; @@ -840,4 +843,84 @@ TEST_VM(os_windows, reserve_memory_special_concurrent) { testRunner.run(); } +// Test that reserve_with_numa_placeholder works correctly when +// UseNUMAInterleaving is enabled and VirtualAlloc2 is available. +// On UMA systems with a single NUMA node, the interleaving is trivial +// (all chunks go to node 0) but the placeholder split/replace path +// is still exercised. +TEST_VM(os_windows, numa_placeholder_reserve_commit) { + if (!os::win32::VirtualAlloc2) { + GTEST_SKIP() << "VirtualAlloc2 not available pre-Windows version 1803"; + } + + const size_t num_nodes = os::numa_get_groups_num(); + + // Enable NUMA interleaving for this test. + AutoSaveRestore FLAG_GUARD(UseNUMAInterleaving); + AutoSaveRestore FLAG_GUARD(UseLargePages); + FLAG_SET_CMDLINE(UseNUMAInterleaving, true); + FLAG_SET_CMDLINE(UseLargePages, false); + + // Allocate a region large enough to span multiple NUMA interleave chunks. + // NUMAInterleaveGranularity defaults to 2MB + const size_t chunk_size = NUMAInterleaveGranularity; + const size_t num_chunks = 4; + const size_t size = num_chunks * chunk_size; + + char* result = os::attempt_reserve_memory_at(nullptr, size, mtTest); + ASSERT_TRUE(result != nullptr) << "Failed to reserve memory"; + + + ASSERT_TRUE(is_aligned(result, os::vm_allocation_granularity())); + ASSERT_TRUE(os::commit_memory(result, size, false)); + + // Walk (and touch) the chunks using the same alignment logic as reserve_with_numa_placeholder: + // the first chunk may be shorter (up to the next chunk_size boundary), + // then full chunk_size pieces, with a possible shorter trailing chunk. + PSAPI_WORKING_SET_EX_INFORMATION wsi[num_chunks + 1]; + memset(wsi, 0, sizeof(wsi)); + size_t bytes_remaining = size; + char* addr = result; + size_t actual_chunks = 0; + + while (bytes_remaining > 0) { + size_t this_chunk_size = MIN2(bytes_remaining, chunk_size - ((size_t)addr % chunk_size)); + + memset(addr, 0xDA, this_chunk_size); + + wsi[actual_chunks] = {0}; + wsi[actual_chunks].VirtualAddress = addr; + actual_chunks++; + + bytes_remaining -= this_chunk_size; + addr += this_chunk_size; + } + + BOOL query_ok = QueryWorkingSetEx(GetCurrentProcess(), wsi, sizeof(wsi)); + ASSERT_TRUE(query_ok) << "QueryWorkingSetEx failed: " << GetLastError(); + + // Verify all pages are valid (in the working set). + for (size_t i = 0; i < actual_chunks; i++) { + EXPECT_TRUE(wsi[i].VirtualAttributes.Valid) << "Chunk " << i << " page not valid in working set"; + } + + if (num_nodes > 1) { + // On a multi-NUMA system, verify that not all chunks landed on the same node. + ULONG first_node = (ULONG)wsi[0].VirtualAttributes.Node; + bool found_different_node = false; + for (size_t i = 1; i < actual_chunks; i++) { + if (wsi[i].VirtualAttributes.Valid && + (ULONG)wsi[i].VirtualAttributes.Node != first_node) { + found_different_node = true; + break; + } + } + EXPECT_TRUE(found_different_node) + << "All " << actual_chunks << " chunks landed on NUMA node " << first_node + << "; expected interleaving across " << num_nodes << " nodes"; + } + + os::release_memory(result, size); +} + #endif