8386448: Enable dumping of AVX registers (YMM/ZMM and K registers) in JVM fatal error logs

Reviewed-by: kvn, drwhite, sviswanathan
This commit is contained in:
Srinivas Vamsi Parasa 2026-06-29 20:15:49 +00:00
parent 58f118dd1c
commit c2348e6452
4 changed files with 299 additions and 5 deletions

View File

@ -489,6 +489,25 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ jmp(wrapup);
__ bind(start_simd_check);
// Query CPUID 0xD sub-leaf 5, 6, and 7 offsets for AVX-512 XSAVE components
__ movl(rax, 0xD);
__ movl(rcx, 5);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::opmask_xstate_offset_offset())));
__ movl(Address(rsi, 0), rbx);
__ movl(rax, 0xD);
__ movl(rcx, 6);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm0to15_hi256_xstate_offset_offset())));
__ movl(Address(rsi, 0), rbx);
__ movl(rax, 0xD);
__ movl(rcx, 7);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm16to31_xstate_offset_offset())));
__ movl(Address(rsi, 0), rbx);
//
// Some OSs have a bug when upper 128/256bits of YMM/ZMM
// registers are not restored after a signal processing.

View File

@ -683,6 +683,11 @@ protected:
uint32_t apx_xstate_size; // EAX: size of APX state (128)
uint32_t apx_xstate_offset; // EBX: offset in standard XSAVE area
// cpuid function 0xD, subleaf 5, 6 and 7 (AVX-512 extended state)
uint32_t opmask_xstate_offset; // EBX: offset of Opmask component
uint32_t zmm0to15_hi256_xstate_offset; // EBX: offset of ZMM_Hi256 component
uint32_t zmm16to31_xstate_offset; // EBX: offset of Hi16_ZMM component
VM_Features feature_flags() const;
// Asserts
@ -748,9 +753,15 @@ public:
static ByteSize apx_save_offset() { return byte_offset_of(CpuidInfo, apx_save); }
static ByteSize apx_xstate_offset_offset() { return byte_offset_of(CpuidInfo, apx_xstate_offset); }
static ByteSize apx_xstate_size_offset() { return byte_offset_of(CpuidInfo, apx_xstate_size); }
static ByteSize opmask_xstate_offset_offset() { return byte_offset_of(CpuidInfo, opmask_xstate_offset); }
static ByteSize zmm0to15_hi256_xstate_offset_offset() { return byte_offset_of(CpuidInfo, zmm0to15_hi256_xstate_offset); }
static ByteSize zmm16to31_xstate_offset_offset() { return byte_offset_of(CpuidInfo, zmm16to31_xstate_offset); }
static uint32_t apx_xstate_offset() { return _cpuid_info.apx_xstate_offset; }
static uint32_t apx_xstate_size() { return _cpuid_info.apx_xstate_size; }
static uint32_t opmask_xstate_offset() { return _cpuid_info.opmask_xstate_offset; }
static uint32_t zmm0to15_hi256_xstate_offset() { return _cpuid_info.zmm0to15_hi256_xstate_offset; }
static uint32_t zmm16to31_xstate_offset() { return _cpuid_info.zmm16to31_xstate_offset; }
// The value used to check ymm register after signal handle
static int ymm_test_value() { return 0xCAFEBABE; }

View File

@ -381,9 +381,22 @@ size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
/////////////////////////////////////////////////////////////////////////////
// helper functions for fatal error handler
// XSAVE Buffer Layout (Intel SDM Vol. 1, Section 13.4.1)
// Bytes 0-511: Legacy x87/FPU and SSE state (includes XMM0-15)
// Bytes 512-575: XSAVE Header (64 bytes)
// Bytes 576-831: YMMH state (upper 128 bits of YMM0-15)
// YMMH[i] at: buffer + 576 + (i * 16)
// Bytes 832+: Extended state components (e.g., AVX-512, APX, etc.).
// Component offsets and sizes are
// enumerated by CPUID.(EAX=0xD, ECX=n).
// XSAVE constants - from Intel SDM Vol. 1, Chapter 13
#define XSAVE_HDR_OFFSET 512
#define XSAVE_HDR_SIZE 64
#define XFEATURE_APX (1ULL << 19)
#define XFEATURE_YMM (1ULL << 2)
#define XFEATURE_OPMASK (1ULL << 5)
#define XFEATURE_ZMM_HI256 (1ULL << 6)
#define XFEATURE_HI16_ZMM (1ULL << 7)
// XSAVE header structure
// See: Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header"
@ -417,6 +430,118 @@ static apx_state* get_apx_state(const ucontext_t* uc) {
return (apx_state*)(xsave + offset);
}
static void print_xmm_registers(outputStream* st, const ucontext_t* uc) {
for (int i = 0; i < 16; ++i) {
const uint64_t* xmm = (const uint64_t*)&uc->uc_mcontext.fpregs->_xmm[i];
st->print_cr("XMM[%d]=" INTPTR_FORMAT " " INTPTR_FORMAT, i, xmm[1], xmm[0]);
}
}
static void print_ymm_registers(outputStream* st, const ucontext_t* uc, bool has_ymm_hi128) {
const char* xsave = (const char*)uc->uc_mcontext.fpregs;
for (int i = 0; i < 16; ++i) {
const uint64_t* xmm = (const uint64_t*)&uc->uc_mcontext.fpregs->_xmm[i];
uint64_t values[4] = {xmm[0], xmm[1], 0, 0};
if (has_ymm_hi128) {
const uint64_t* ymmh = (const uint64_t*)(xsave + XSAVE_HDR_OFFSET + XSAVE_HDR_SIZE + (i * 16));
values[2] = ymmh[0];
values[3] = ymmh[1];
}
st->print("YMM[%d]=", i);
for (int j = 3; j >= 0; --j) {
st->print("%s" INTPTR_FORMAT, (j == 3) ? "" : " ", values[j]);
}
st->cr();
}
}
static void print_zmm_registers(outputStream* st, const ucontext_t* uc, bool has_ymm_hi128,
bool has_zmm_hi256, bool has_hi16_zmm) {
const char* xsave = (const char*)uc->uc_mcontext.fpregs;
for (int i = 0; i < 32; ++i) {
uint64_t values[8] = {0, 0, 0, 0, 0, 0, 0, 0};
if (i < 16) {
const uint64_t* xmm = (const uint64_t*)&uc->uc_mcontext.fpregs->_xmm[i];
values[0] = xmm[0];
values[1] = xmm[1];
if (has_ymm_hi128) {
const uint64_t* ymmh = (const uint64_t*)(xsave + XSAVE_HDR_OFFSET + XSAVE_HDR_SIZE + (i * 16));
values[2] = ymmh[0];
values[3] = ymmh[1];
}
if (has_zmm_hi256) {
const uint32_t zmm_hi256_offset = VM_Version::zmm0to15_hi256_xstate_offset();
const uint64_t* zmm_hi256 = (const uint64_t*)(xsave + zmm_hi256_offset + (i * 32));
values[4] = zmm_hi256[0];
values[5] = zmm_hi256[1];
values[6] = zmm_hi256[2];
values[7] = zmm_hi256[3];
}
} else if (has_hi16_zmm) {
const uint32_t hi16_zmm_offset = VM_Version::zmm16to31_xstate_offset();
const uint64_t* zmm = (const uint64_t*)(xsave + hi16_zmm_offset + ((i - 16) * 64));
values[0] = zmm[0];
values[1] = zmm[1];
values[2] = zmm[2];
values[3] = zmm[3];
values[4] = zmm[4];
values[5] = zmm[5];
values[6] = zmm[6];
values[7] = zmm[7];
}
st->print("ZMM[%d]=", i);
for (int j = 7; j >= 0; --j) {
st->print("%s" INTPTR_FORMAT, (j == 7) ? "" : " ", values[j]);
}
st->cr();
}
}
static void print_kmask_registers(outputStream* st, const ucontext_t* uc, bool has_opmask) {
const uint32_t opmask_offset = VM_Version::opmask_xstate_offset();
if (!has_opmask || opmask_offset == 0) {
return;
}
const char* xsave = (const char*)uc->uc_mcontext.fpregs;
const uint64_t* kmask = (const uint64_t*)(xsave + opmask_offset);
for (int i = 0; i < 8; ++i) {
st->print_cr("K[%d]=" INTPTR_FORMAT, i, kmask[i]);
}
st->cr();
}
static void print_vector_registers(outputStream* st, const ucontext_t* uc) {
if (uc->uc_mcontext.fpregs == nullptr) {
return;
}
if (UseAVX < 2) {
return print_xmm_registers(st, uc);
}
const char* xsave = (const char*)uc->uc_mcontext.fpregs;
const uint64_t* xstate_hdr_ptr = (const uint64_t*)(xsave + XSAVE_HDR_OFFSET);
const uint64_t xsave_state_bitmap = xstate_hdr_ptr[0];
const bool has_ymm_hi128 = (xsave_state_bitmap & XFEATURE_YMM) != 0;
const bool has_opmask = (xsave_state_bitmap & XFEATURE_OPMASK) != 0;
const bool has_zmm_hi256 = (xsave_state_bitmap & XFEATURE_ZMM_HI256) != 0;
const bool has_hi16_zmm = (xsave_state_bitmap & XFEATURE_HI16_ZMM) != 0;
const bool should_print_zmm_registers = (UseAVX > 2) && (has_zmm_hi256 || has_hi16_zmm);
if (!should_print_zmm_registers) {
return print_ymm_registers(st, uc, has_ymm_hi128);
}
print_kmask_registers(st, uc, has_opmask);
print_zmm_registers(st, uc, has_ymm_hi128, has_zmm_hi256, has_hi16_zmm);
}
void os::print_context(outputStream *st, const void *context) {
if (context == nullptr) return;
@ -458,7 +583,7 @@ void os::print_context(outputStream *st, const void *context) {
st->print(", ERR=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[REG_ERR]);
st->cr();
st->print(" TRAPNO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[REG_TRAPNO]);
// Add XMM registers + MXCSR. Note that C2 uses XMM to spill GPR values including pointers.
// Add vector registers + MXCSR. Note that C2 uses XMM to spill GPR values including pointers.
st->cr();
st->cr();
// Sanity check: fpregs should point into the context.
@ -467,10 +592,7 @@ void os::print_context(outputStream *st, const void *context) {
st->print_cr("bad uc->uc_mcontext.fpregs: " INTPTR_FORMAT " (uc: " INTPTR_FORMAT ")",
p2i(uc->uc_mcontext.fpregs), p2i(uc));
} else {
for (int i = 0; i < 16; ++i) {
const int64_t* xmm_val_addr = (int64_t*)&(uc->uc_mcontext.fpregs->_xmm[i]);
st->print_cr("XMM[%d]=" INTPTR_FORMAT " " INTPTR_FORMAT, i, xmm_val_addr[1], xmm_val_addr[0]);
}
print_vector_registers(st, uc);
st->print(" MXCSR=" UINT32_FORMAT_X_0, uc->uc_mcontext.fpregs->mxcsr);
}
st->cr();

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @summary Test that YMM and ZMM registers are correctly dumped in hs_err for different UseAVX settings
* @library /test/lib
* @requires os.family == "linux" & os.arch == "amd64"
* @requires vm.debug == true
* @modules java.base/jdk.internal.misc
* @build jdk.test.whitebox.WhiteBox
* @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
* @run driver TestAVXRegisterDump
*/
// Note: this test can only run on debug since it relies on VMError::controlled_crash() which
// only exists in debug builds.
import java.io.File;
import java.util.regex.Pattern;
import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
import jdk.test.whitebox.WhiteBox;
public class TestAVXRegisterDump {
public static void main(String[] args) throws Exception {
if (args.length > 0 && args[0].equals("crash")) {
WhiteBox.getWhiteBox().controlledCrash(2);
throw new RuntimeException("Still alive?");
}
// Test UseAVX=1 (XMM only)
testWithUseAVX(1);
// Test UseAVX=2 (YMM)
testWithUseAVX(2);
// Test UseAVX=3 (ZMM + K masks if available)
testWithUseAVX(3);
}
static void testWithUseAVX(int useAVX) throws Exception {
ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder(
"-Xbootclasspath/a:.",
"-XX:+UnlockDiagnosticVMOptions",
"-XX:+WhiteBoxAPI",
"-XX:UseAVX=" + useAVX,
"-XX:-CreateCoredumpOnCrash",
"-Xmx100M",
TestAVXRegisterDump.class.getName(), "crash");
OutputAnalyzer output = new OutputAnalyzer(pb.start());
output.shouldMatch("# A fatal error has been detected by the Java Runtime Environment:.*");
File hsErrFile = HsErrFileUtils.openHsErrFileFromOutput(output);
validateRegisterContent(hsErrFile, useAVX);
}
static Pattern[] createRegisterPatterns(String regType, int count) {
Pattern[] patterns = new Pattern[count];
for (int i = 0; i < count; i++) {
// Create regex pattern to match entire register line (e.g., "XMM[0]=0xHEX 0xHEX")
// Used with Matcher.matches() which requires matching the entire line
patterns[i] = Pattern.compile(regType + "\\[" + i + "\\]=.*");
}
return patterns;
}
static void validateRegisterContent(File hsErrFile, int useAVX) throws Exception {
if (useAVX == 1) {
validateRegistersUseAVX1(hsErrFile);
} else if (useAVX == 2) {
validateRegistersUseAVX2(hsErrFile);
} else if (useAVX == 3) {
validateRegistersUseAVX3(hsErrFile);
}
}
static void validateRegistersUseAVX1(File hsErrFile) throws Exception {
// UseAVX=1: XMM registers only (0-15)
Pattern[] positivePatterns = createRegisterPatterns("XMM", 16);
Pattern[] negativePatterns = new Pattern[] {
Pattern.compile("YMM\\[.*\\]=.*"),
Pattern.compile("ZMM\\[.*\\]=.*"),
};
HsErrFileUtils.checkHsErrFileContent(hsErrFile, positivePatterns, negativePatterns, false, false);
}
static void validateRegistersUseAVX2(File hsErrFile) throws Exception {
// UseAVX=2: YMM registers only (0-15)
Pattern[] positivePatterns = createRegisterPatterns("YMM", 16);
Pattern[] negativePatterns = new Pattern[] {
Pattern.compile("XMM\\[.*\\]=.*"),
Pattern.compile("ZMM\\[.*\\]=.*"),
};
HsErrFileUtils.checkHsErrFileContent(hsErrFile, positivePatterns, negativePatterns, false, false);
}
static void validateRegistersUseAVX3(File hsErrFile) throws Exception {
// UseAVX=3: ZMM + K masks (if available) or fallback to YMM
// Try ZMM first, then fallback to YMM if CPU doesn't support AVX-512
try {
Pattern[] zmmPatterns = createRegisterPatterns("ZMM", 32);
Pattern[] zmmNegativePatterns = new Pattern[] {
Pattern.compile("XMM\\[.*\\]=.*"),
};
HsErrFileUtils.checkHsErrFileContent(hsErrFile, zmmPatterns, zmmNegativePatterns, false, false);
Pattern[] kPatterns = createRegisterPatterns("K", 8);
HsErrFileUtils.checkHsErrFileContent(hsErrFile, kPatterns, null, false, false);
} catch (RuntimeException e) {
// If ZMM not found, try YMM
Pattern[] ymmPatterns = createRegisterPatterns("YMM", 16);
Pattern[] ymmNegativePatterns = new Pattern[] {
Pattern.compile("XMM\\[.*\\]=.*"),
};
HsErrFileUtils.checkHsErrFileContent(hsErrFile, ymmPatterns, ymmNegativePatterns, false, false);
}
}
}