diff --git a/doc/testing.html b/doc/testing.html
index b9838735e4f..31f4fbd1778 100644
--- a/doc/testing.html
+++ b/doc/testing.html
@@ -535,6 +535,8 @@ failure. This helps to reproduce intermittent test failures. Defaults to
REPORT
Use this report style when reporting test results (sent to JTReg as
-report). Defaults to files.
+MANUAL
+Set to true to execute manual tests only.
Gtest keywords
REPEAT
The number of times to repeat the tests
diff --git a/doc/testing.md b/doc/testing.md
index 0144610a5bf..b95f59de9fd 100644
--- a/doc/testing.md
+++ b/doc/testing.md
@@ -512,6 +512,10 @@ helps to reproduce intermittent test failures. Defaults to 0.
Use this report style when reporting test results (sent to JTReg as `-report`).
Defaults to `files`.
+#### MANUAL
+
+Set to `true` to execute manual tests only.
+
### Gtest keywords
#### REPEAT
diff --git a/make/RunTests.gmk b/make/RunTests.gmk
index 947389f64f9..1f50b97531b 100644
--- a/make/RunTests.gmk
+++ b/make/RunTests.gmk
@@ -206,7 +206,7 @@ $(eval $(call ParseKeywordVariable, JTREG, \
SINGLE_KEYWORDS := JOBS TIMEOUT_FACTOR FAILURE_HANDLER_TIMEOUT \
TEST_MODE ASSERT VERBOSE RETAIN TEST_THREAD_FACTORY JVMTI_STRESS_AGENT \
MAX_MEM RUN_PROBLEM_LISTS RETRY_COUNT REPEAT_COUNT MAX_OUTPUT REPORT \
- AOT_JDK $(CUSTOM_JTREG_SINGLE_KEYWORDS), \
+ AOT_JDK MANUAL $(CUSTOM_JTREG_SINGLE_KEYWORDS), \
STRING_KEYWORDS := OPTIONS JAVA_OPTIONS VM_OPTIONS KEYWORDS \
EXTRA_PROBLEM_LISTS LAUNCHER_OPTIONS \
$(CUSTOM_JTREG_STRING_KEYWORDS), \
@@ -911,7 +911,13 @@ define SetupRunJtregTestBody
-vmoption:-Dtest.boot.jdk="$$(BOOT_JDK)" \
-vmoption:-Djava.io.tmpdir="$$($1_TEST_TMP_DIR)"
- $1_JTREG_BASIC_OPTIONS += -automatic -ignore:quiet
+ $1_JTREG_BASIC_OPTIONS += -ignore:quiet
+
+ ifeq ($$(JTREG_MANUAL), true)
+ $1_JTREG_BASIC_OPTIONS += -manual
+ else
+ $1_JTREG_BASIC_OPTIONS += -automatic
+ endif
# Make it possible to specify the JIB_DATA_DIR for tests using the
# JIB Artifact resolver
@@ -1151,6 +1157,7 @@ define SetupRunJtregTestBody
$$(EXPR) $$($1_PASSED) + $$($1_FAILED) + $$($1_ERROR) + $$($1_SKIPPED))) \
, \
$$(eval $1_PASSED_AND_RUNTIME_SKIPPED := 0) \
+ $$(eval $1_PASSED := 0) \
$$(eval $1_RUNTIME_SKIPPED := 0) \
$$(eval $1_SKIPPED := 0) \
$$(eval $1_FAILED := 0) \
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index 9d58a280998..6298bcae416 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -282,10 +282,17 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
C_O_FLAG_DEBUG_JVM="-O0"
C_O_FLAG_NONE="-O0"
+ if test "x$TOOLCHAIN_TYPE" = xgcc; then
+ C_O_FLAG_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing -fno-fat-lto-objects"
+ else
+ C_O_FLAG_LTO="-flto -fno-strict-aliasing"
+ fi
+
if test "x$TOOLCHAIN_TYPE" = xclang && test "x$OPENJDK_TARGET_OS" = xaix; then
C_O_FLAG_HIGHEST_JVM="${C_O_FLAG_HIGHEST_JVM} -finline-functions"
C_O_FLAG_HIGHEST="${C_O_FLAG_HIGHEST} -finline-functions"
C_O_FLAG_HI="${C_O_FLAG_HI} -finline-functions"
+ C_O_FLAG_LTO="${C_O_FLAG_LTO} -ffat-lto-objects"
fi
# -D_FORTIFY_SOURCE=2 hardening option needs optimization (at least -O1) enabled
@@ -317,6 +324,7 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
C_O_FLAG_DEBUG_JVM=""
C_O_FLAG_NONE="-Od"
C_O_FLAG_SIZE="-O1"
+ C_O_FLAG_LTO="-GL"
fi
# Now copy to C++ flags
@@ -328,6 +336,7 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
CXX_O_FLAG_DEBUG_JVM="$C_O_FLAG_DEBUG_JVM"
CXX_O_FLAG_NONE="$C_O_FLAG_NONE"
CXX_O_FLAG_SIZE="$C_O_FLAG_SIZE"
+ CXX_O_FLAG_LTO="$C_O_FLAG_LTO"
# Adjust optimization flags according to debug level.
case $DEBUG_LEVEL in
@@ -360,12 +369,15 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
AC_SUBST(C_O_FLAG_NORM)
AC_SUBST(C_O_FLAG_NONE)
AC_SUBST(C_O_FLAG_SIZE)
+ AC_SUBST(C_O_FLAG_LTO)
+
AC_SUBST(CXX_O_FLAG_HIGHEST_JVM)
AC_SUBST(CXX_O_FLAG_HIGHEST)
AC_SUBST(CXX_O_FLAG_HI)
AC_SUBST(CXX_O_FLAG_NORM)
AC_SUBST(CXX_O_FLAG_NONE)
AC_SUBST(CXX_O_FLAG_SIZE)
+ AC_SUBST(CXX_O_FLAG_LTO)
])
AC_DEFUN([FLAGS_SETUP_CFLAGS],
diff --git a/make/autoconf/flags-ldflags.m4 b/make/autoconf/flags-ldflags.m4
index 66f8904db89..572790b567b 100644
--- a/make/autoconf/flags-ldflags.m4
+++ b/make/autoconf/flags-ldflags.m4
@@ -50,7 +50,14 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
# add -z,relro (mark relocations read only) for all libs
# add -z,now ("full relro" - more of the Global Offset Table GOT is marked read only)
# add --no-as-needed to disable default --as-needed link flag on some GCC toolchains
+ # add --icf=all (Identical Code Folding — merges identical functions)
BASIC_LDFLAGS="-Wl,-z,defs -Wl,-z,relro -Wl,-z,now -Wl,--no-as-needed -Wl,--exclude-libs,ALL"
+ if test "x$LINKER_TYPE" = "xgold"; then
+ if test x$DEBUG_LEVEL = xrelease; then
+ BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,--icf=all"
+ fi
+ fi
+
# Linux : remove unused code+data in link step
if test "x$ENABLE_LINKTIME_GC" = xtrue; then
if test "x$OPENJDK_TARGET_CPU" = xs390x; then
@@ -61,6 +68,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
fi
BASIC_LDFLAGS_JVM_ONLY=""
+ LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing"
LDFLAGS_CXX_PARTIAL_LINKING="$MACHINE_FLAG -r"
@@ -68,6 +76,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
BASIC_LDFLAGS_JVM_ONLY="-mno-omit-leaf-frame-pointer -mstack-alignment=16 \
-fPIC"
+ LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing"
LDFLAGS_CXX_PARTIAL_LINKING="$MACHINE_FLAG -r"
if test "x$OPENJDK_TARGET_OS" = xlinux; then
@@ -87,6 +96,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
BASIC_LDFLAGS="-opt:ref"
BASIC_LDFLAGS_JDK_ONLY="-incremental:no"
BASIC_LDFLAGS_JVM_ONLY="-opt:icf,8 -subsystem:windows"
+ LDFLAGS_LTO="-LTCG:INCREMENTAL"
fi
if (test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang) \
@@ -148,6 +158,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
# Export some intermediate variables for compatibility
LDFLAGS_CXX_JDK="$DEBUGLEVEL_LDFLAGS_JDK_ONLY"
+ AC_SUBST(LDFLAGS_LTO)
AC_SUBST(LDFLAGS_CXX_JDK)
AC_SUBST(LDFLAGS_CXX_PARTIAL_LINKING)
])
diff --git a/make/autoconf/spec.gmk.template b/make/autoconf/spec.gmk.template
index 0b336721d65..b3d58704c50 100644
--- a/make/autoconf/spec.gmk.template
+++ b/make/autoconf/spec.gmk.template
@@ -513,12 +513,14 @@ C_O_FLAG_HI := @C_O_FLAG_HI@
C_O_FLAG_NORM := @C_O_FLAG_NORM@
C_O_FLAG_NONE := @C_O_FLAG_NONE@
C_O_FLAG_SIZE := @C_O_FLAG_SIZE@
+C_O_FLAG_LTO := @C_O_FLAG_LTO@
CXX_O_FLAG_HIGHEST_JVM := @CXX_O_FLAG_HIGHEST_JVM@
CXX_O_FLAG_HIGHEST := @CXX_O_FLAG_HIGHEST@
CXX_O_FLAG_HI := @CXX_O_FLAG_HI@
CXX_O_FLAG_NORM := @CXX_O_FLAG_NORM@
CXX_O_FLAG_NONE := @CXX_O_FLAG_NONE@
CXX_O_FLAG_SIZE := @CXX_O_FLAG_SIZE@
+CXX_O_FLAG_LTO := @CXX_O_FLAG_LTO@
GENDEPS_FLAGS := @GENDEPS_FLAGS@
@@ -587,6 +589,9 @@ LDFLAGS_CXX_JDK := @LDFLAGS_CXX_JDK@
# LDFLAGS specific to partial linking.
LDFLAGS_CXX_PARTIAL_LINKING := @LDFLAGS_CXX_PARTIAL_LINKING@
+# LDFLAGS specific to link time optimization
+LDFLAGS_LTO := @LDFLAGS_LTO@
+
# Sometimes a different linker is needed for c++ libs
LDCXX := @LDCXX@
# The flags for linking libstdc++ linker.
diff --git a/make/autoconf/toolchain.m4 b/make/autoconf/toolchain.m4
index 4662c62d901..15210efe4a7 100644
--- a/make/autoconf/toolchain.m4
+++ b/make/autoconf/toolchain.m4
@@ -516,6 +516,7 @@ AC_DEFUN([TOOLCHAIN_EXTRACT_LD_VERSION],
if [ [[ "$LINKER_VERSION_STRING" == *gold* ]] ]; then
[ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
$SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*) .*/\1/'` ]
+ LINKER_TYPE=gold
else
[ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
$SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*/\1/'` ]
diff --git a/make/common/NativeCompilation.gmk b/make/common/NativeCompilation.gmk
index 9721f1c0aca..28e186adf5f 100644
--- a/make/common/NativeCompilation.gmk
+++ b/make/common/NativeCompilation.gmk
@@ -98,6 +98,7 @@ include native/Paths.gmk
# SYSROOT_CFLAGS the compiler flags for using the specific sysroot
# SYSROOT_LDFLAGS the linker flags for using the specific sysroot
# OPTIMIZATION sets optimization level to NONE, LOW, HIGH, HIGHEST, HIGHEST_JVM, SIZE
+# LINK_TIME_OPTIMIZATION if set to true, enables link time optimization
# DISABLED_WARNINGS_ Disable the given warnings for the specified toolchain
# DISABLED_WARNINGS__ Disable the given warnings for the specified
# toolchain and target OS
diff --git a/make/common/native/Flags.gmk b/make/common/native/Flags.gmk
index 747e090b816..843701cb4db 100644
--- a/make/common/native/Flags.gmk
+++ b/make/common/native/Flags.gmk
@@ -194,6 +194,11 @@ define SetupCompilerFlags
$1_EXTRA_CXXFLAGS += $(CFLAGS_WARNINGS_ARE_ERRORS)
endif
+ ifeq (true, $$($1_LINK_TIME_OPTIMIZATION))
+ $1_EXTRA_CFLAGS += $(C_O_FLAG_LTO)
+ $1_EXTRA_CXXFLAGS += $(CXX_O_FLAG_LTO)
+ endif
+
ifeq (NONE, $$($1_OPTIMIZATION))
$1_OPT_CFLAGS := $(C_O_FLAG_NONE)
$1_OPT_CXXFLAGS := $(CXX_O_FLAG_NONE)
@@ -222,6 +227,10 @@ define SetupLinkerFlags
# Pickup extra OPENJDK_TARGET_OS_TYPE, OPENJDK_TARGET_OS and TOOLCHAIN_TYPE
# dependent variables for LDFLAGS and LIBS, and additionally the pair dependent
# TOOLCHAIN_TYPE plus OPENJDK_TARGET_OS
+ ifeq ($$($1_LINK_TIME_OPTIMIZATION), true)
+ $1_EXTRA_LDFLAGS += $(LDFLAGS_LTO)
+ endif
+
$1_EXTRA_LDFLAGS += $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS)) \
$$($1_LDFLAGS_$(TOOLCHAIN_TYPE)) $$($1_LDFLAGS_$(TOOLCHAIN_TYPE)_$(OPENJDK_TARGET_OS))
$1_EXTRA_LIBS += $$($1_LIBS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LIBS_$(OPENJDK_TARGET_OS)) \
diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk
index a8b90c92e4d..b0ea27e5081 100644
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@@ -234,6 +234,7 @@ $(eval $(call SetupJdkLibrary, BUILD_LIBJVM, \
LDFLAGS := $(JVM_LDFLAGS), \
LIBS := $(JVM_LIBS), \
OPTIMIZATION := $(JVM_OPTIMIZATION), \
+ LINK_TIME_OPTIMIZATION := $(JVM_LTO), \
OBJECT_DIR := $(JVM_OUTPUTDIR)/objs, \
STRIPFLAGS := $(JVM_STRIPFLAGS), \
EMBED_MANIFEST := true, \
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 79bbd6a4106..90ea8a985e3 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -175,22 +175,12 @@ ifeq ($(call check-jvm-feature, link-time-opt), true)
# Set JVM_OPTIMIZATION directly so other jvm-feature flags can override it
# later on if desired
JVM_OPTIMIZATION := HIGHEST_JVM
- ifeq ($(call isCompiler, gcc), true)
- JVM_CFLAGS_FEATURES += -flto=auto -fuse-linker-plugin -fno-strict-aliasing \
- -fno-fat-lto-objects
- JVM_LDFLAGS_FEATURES += $(CXX_O_FLAG_HIGHEST_JVM) -flto=auto \
- -fuse-linker-plugin -fno-strict-aliasing
- else ifeq ($(call isCompiler, clang), true)
- JVM_CFLAGS_FEATURES += -flto -fno-strict-aliasing
- ifeq ($(call isBuildOs, aix), true)
- JVM_CFLAGS_FEATURES += -ffat-lto-objects
- endif
- JVM_LDFLAGS_FEATURES += $(CXX_O_FLAG_HIGHEST_JVM) -flto -fno-strict-aliasing
- else ifeq ($(call isCompiler, microsoft), true)
- JVM_CFLAGS_FEATURES += -GL
- JVM_LDFLAGS_FEATURES += -LTCG:INCREMENTAL
+ JVM_LTO := true
+ ifneq ($(call isCompiler, microsoft), true)
+ JVM_LDFLAGS_FEATURES += $(CXX_O_FLAG_HIGHEST_JVM)
endif
else
+ JVM_LTO := false
ifeq ($(call isCompiler, gcc), true)
JVM_LDFLAGS_FEATURES += -O1
endif
diff --git a/make/modules/java.desktop/lib/ClientLibraries.gmk b/make/modules/java.desktop/lib/ClientLibraries.gmk
index 2c29092cdd6..4cd7f5bac90 100644
--- a/make/modules/java.desktop/lib/ClientLibraries.gmk
+++ b/make/modules/java.desktop/lib/ClientLibraries.gmk
@@ -226,6 +226,7 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
EXCLUDE_FILES := imageioJPEG.c jpegdecoder.c pngtest.c, \
EXCLUDES := $(LIBSPLASHSCREEN_EXCLUDES), \
OPTIMIZATION := SIZE, \
+ LINK_TIME_OPTIMIZATION := true, \
CFLAGS := $(LIBSPLASHSCREEN_CFLAGS) \
$(GIFLIB_CFLAGS) $(LIBJPEG_CFLAGS) $(PNG_CFLAGS) $(LIBZ_CFLAGS) \
$(ICONV_CFLAGS), \
@@ -236,7 +237,7 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
DISABLED_WARNINGS_gcc_dgif_lib.c := sign-compare, \
DISABLED_WARNINGS_gcc_jcmaster.c := implicit-fallthrough, \
DISABLED_WARNINGS_gcc_jdphuff.c := shift-negative-value, \
- DISABLED_WARNINGS_gcc_png.c := maybe-uninitialized unused-function, \
+ DISABLED_WARNINGS_gcc_png.c := maybe-uninitialized, \
DISABLED_WARNINGS_gcc_pngerror.c := maybe-uninitialized, \
DISABLED_WARNINGS_gcc_splashscreen_gfx_impl.c := implicit-fallthrough \
maybe-uninitialized, \
@@ -247,7 +248,6 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
DISABLED_WARNINGS_clang := deprecated-non-prototype, \
DISABLED_WARNINGS_clang_dgif_lib.c := sign-compare, \
DISABLED_WARNINGS_clang_gzwrite.c := format-nonliteral, \
- DISABLED_WARNINGS_clang_png.c := unused-function, \
DISABLED_WARNINGS_clang_splashscreen_impl.c := sign-compare \
unused-but-set-variable unused-function, \
DISABLED_WARNINGS_clang_splashscreen_png.c := \
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index e8f9733fe7e..fc53c10311b 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1,6 +1,7 @@
//
// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
+// Copyright 2025 Arm Limited and/or its affiliates.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -1194,15 +1195,10 @@ class HandlerImpl {
public:
- static int emit_exception_handler(C2_MacroAssembler *masm);
static int emit_deopt_handler(C2_MacroAssembler* masm);
- static uint size_exception_handler() {
- return MacroAssembler::far_codestub_branch_size();
- }
-
static uint size_deopt_handler() {
- // count one adr and one far branch instruction
+ // count one branch instruction and one far call instruction sequence
return NativeInstruction::instruction_size + MacroAssembler::far_codestub_branch_size();
}
};
@@ -2261,25 +2257,6 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
//=============================================================================
-// Emit exception handler code.
-int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
-{
- // mov rscratch1 #exception_blob_entry_point
- // br rscratch1
- // Note that the code buffer's insts_mark is always relative to insts.
- // That's why we must use the macroassembler to generate a handler.
- address base = __ start_a_stub(size_exception_handler());
- if (base == nullptr) {
- ciEnv::current()->record_failure("CodeCache is full");
- return 0; // CodeBuffer::expand failed
- }
- int offset = __ offset();
- __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
- __ end_a_stub();
- return offset;
-}
-
// Emit deopt handler code.
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
{
@@ -2290,14 +2267,20 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
ciEnv::current()->record_failure("CodeCache is full");
return 0; // CodeBuffer::expand failed
}
- int offset = __ offset();
- __ adr(lr, __ pc());
- __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ int offset = __ offset();
+ Label start;
+ __ bind(start);
+ __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ int entry_offset = __ offset();
+ __ b(start);
assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
+ assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
// REQUIRED MATCHER CODE
@@ -2473,6 +2456,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
+bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
+ return false;
+}
+
bool Matcher::is_generic_vector(MachOper* opnd) {
return opnd->opcode() == VREG;
}
@@ -3388,28 +3375,28 @@ encode %{
// aarch64_enc_cmpxchg_acq is that we use load-acquire in the
// CompareAndSwap sequence to serve as a barrier on acquiring a
// lock.
- enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+ enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegL oldval, iRegL newval) %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
%}
- enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegI oldval, iRegI newval) %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
%}
- enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegI oldval, iRegI newval) %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
%}
- enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+ enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegI oldval, iRegI newval) %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ true, /*release*/ true,
@@ -3417,7 +3404,7 @@ encode %{
%}
// auxiliary used for CompareAndSwapX to set result register
- enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
+ enc_class aarch64_enc_cset_eq(iRegI res) %{
Register res_reg = as_Register($res$$reg);
__ cset(res_reg, Assembler::EQ);
%}
@@ -8403,7 +8390,7 @@ instruct castVVMask(pRegGov dst)
// XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
// can't match them
-instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapB mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
@@ -8421,7 +8408,7 @@ instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoS
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapS mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
@@ -8439,7 +8426,7 @@ instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoS
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
@@ -8457,7 +8444,7 @@ instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoS
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
@@ -8494,7 +8481,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
predicate(n->as_LoadStore()->barrier_data() == 0);
@@ -8515,7 +8502,7 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoS
// alternative CompareAndSwapX when we are eliding barriers
-instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapB mem (Binary oldval newval)));
@@ -8534,7 +8521,7 @@ instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegI
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapS mem (Binary oldval newval)));
@@ -8553,7 +8540,7 @@ instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegI
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
@@ -8572,7 +8559,7 @@ instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegI
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
@@ -8610,7 +8597,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new
ins_pipe(pipe_slow);
%}
-instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 9ab463125fe..37a6a130e0d 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -449,12 +449,20 @@ int LIR_Assembler::emit_deopt_handler() {
int offset = code_offset();
- __ adr(lr, pc());
- __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ Label start;
+ __ bind(start);
+
+ __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ int entry_offset = __ offset();
+ __ b(start);
+
guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
+ assert(code_offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
index 12b941fc4f7..729cd2827b7 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
@@ -71,7 +71,7 @@ friend class ArrayCopyStub;
// CompiledDirectCall::to_trampoline_stub_size()
_call_stub_size = 13 * NativeInstruction::instruction_size,
_exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
- _deopt_handler_size = 7 * NativeInstruction::instruction_size
+ _deopt_handler_size = 4 * NativeInstruction::instruction_size
};
public:
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
index 5a7fececafa..f2003dd9b55 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
@@ -394,12 +394,6 @@ void NativePostCallNop::make_deopt() {
NativeDeoptInstruction::insert(addr_at(0));
}
-#ifdef ASSERT
-static bool is_movk_to_zr(uint32_t insn) {
- return ((insn & 0xffe0001f) == 0xf280001f);
-}
-#endif
-
bool NativePostCallNop::patch(int32_t oopmap_slot, int32_t cb_offset) {
if (((oopmap_slot & 0xff) != oopmap_slot) || ((cb_offset & 0xffffff) != cb_offset)) {
return false; // cannot encode
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index df5d97c2376..c30cb911d96 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -526,14 +526,31 @@ inline NativeLdSt* NativeLdSt_at(address addr) {
// can store an offset from the initial nop to the nmethod.
class NativePostCallNop: public NativeInstruction {
+private:
+ static bool is_movk_to_zr(uint32_t insn) {
+ return ((insn & 0xffe0001f) == 0xf280001f);
+ }
+
public:
+ enum AArch64_specific_constants {
+ // The two parts should be checked separately to prevent out of bounds access in case
+ // the return address points to the deopt handler stub code entry point which could be
+ // at the end of page.
+ first_check_size = instruction_size
+ };
+
bool check() const {
- uint64_t insns = *(uint64_t*)addr_at(0);
- // Check for two instructions: nop; movk zr, xx
- // These instructions only ever appear together in a post-call
- // NOP, so it's unnecessary to check that the third instruction is
- // a MOVK as well.
- return (insns & 0xffe0001fffffffff) == 0xf280001fd503201f;
+ // Check the first instruction is NOP.
+ if (is_nop()) {
+ uint32_t insn = *(uint32_t*)addr_at(first_check_size);
+ // Check next instruction is MOVK zr, xx.
+ // These instructions only ever appear together in a post-call
+ // NOP, so it's unnecessary to check that the third instruction is
+ // a MOVK as well.
+ return is_movk_to_zr(insn);
+ }
+
+ return false;
}
bool decode(int32_t& oopmap_slot, int32_t& cb_offset) const {
diff --git a/src/hotspot/cpu/aarch64/runtime_aarch64.cpp b/src/hotspot/cpu/aarch64/runtime_aarch64.cpp
index d45f9865bd2..e36aa21b567 100644
--- a/src/hotspot/cpu/aarch64/runtime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/runtime_aarch64.cpp
@@ -260,8 +260,6 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
//------------------------------generate_exception_blob---------------------------
// creates exception blob at the end
-// Using exception blob, this code is jumped from a compiled method.
-// (see emit_exception_handler in aarch64.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 92c0df68deb..606275d7666 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -105,14 +105,8 @@ class HandlerImpl {
public:
- static int emit_exception_handler(C2_MacroAssembler *masm);
static int emit_deopt_handler(C2_MacroAssembler* masm);
- static uint size_exception_handler() {
- return ( 3 * 4 );
- }
-
-
static uint size_deopt_handler() {
return ( 9 * 4 );
}
@@ -876,26 +870,6 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
//=============================================================================
-// Emit exception handler code.
-int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
- address base = __ start_a_stub(size_exception_handler());
- if (base == nullptr) {
- ciEnv::current()->record_failure("CodeCache is full");
- return 0; // CodeBuffer::expand failed
- }
-
- int offset = __ offset();
-
- // OK to trash LR, because exception blob will kill it
- __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, LR_tmp);
-
- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
-
- __ end_a_stub();
-
- return offset;
-}
-
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
// Can't use any of the current frame's registers as we may have deopted
// at a poll and everything can be live.
@@ -906,19 +880,28 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
}
int offset = __ offset();
- address deopt_pc = __ pc();
- __ sub(SP, SP, wordSize); // make room for saved PC
- __ push(LR); // save LR that may be live when we get here
- __ mov_relative_address(LR, deopt_pc);
- __ str(LR, Address(SP, wordSize)); // save deopt PC
- __ pop(LR); // restore LR
+ Label start;
+ __ bind(start);
+
__ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);
+ int entry_offset = __ offset();
+ address deopt_pc = __ pc();
+ // Preserve R0 and reserve space for the address of the entry point
+ __ push(RegisterSet(R0) | RegisterSet(R1));
+ // Store the entry point address
+ __ mov_relative_address(R0, deopt_pc);
+ __ str(R0, Address(SP, wordSize));
+ __ pop(R0); // restore R0
+ __ b(start);
+
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+ assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
bool Matcher::match_rule_supported(int opcode) {
@@ -1080,6 +1063,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
+bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
+ return false;
+}
+
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;
diff --git a/src/hotspot/cpu/arm/arm_32.ad b/src/hotspot/cpu/arm/arm_32.ad
index 00bf3bd61e4..9438e8da8b5 100644
--- a/src/hotspot/cpu/arm/arm_32.ad
+++ b/src/hotspot/cpu/arm/arm_32.ad
@@ -62,22 +62,22 @@ register %{
// Integer/Long Registers
// ----------------------------
-reg_def R_R0 (SOC, SOC, Op_RegI, 0, R(0)->as_VMReg());
-reg_def R_R1 (SOC, SOC, Op_RegI, 1, R(1)->as_VMReg());
-reg_def R_R2 (SOC, SOC, Op_RegI, 2, R(2)->as_VMReg());
-reg_def R_R3 (SOC, SOC, Op_RegI, 3, R(3)->as_VMReg());
-reg_def R_R4 (SOC, SOE, Op_RegI, 4, R(4)->as_VMReg());
-reg_def R_R5 (SOC, SOE, Op_RegI, 5, R(5)->as_VMReg());
-reg_def R_R6 (SOC, SOE, Op_RegI, 6, R(6)->as_VMReg());
-reg_def R_R7 (SOC, SOE, Op_RegI, 7, R(7)->as_VMReg());
-reg_def R_R8 (SOC, SOE, Op_RegI, 8, R(8)->as_VMReg());
-reg_def R_R9 (SOC, SOE, Op_RegI, 9, R(9)->as_VMReg());
-reg_def R_R10(NS, SOE, Op_RegI, 10, R(10)->as_VMReg());
-reg_def R_R11(NS, SOE, Op_RegI, 11, R(11)->as_VMReg());
-reg_def R_R12(SOC, SOC, Op_RegI, 12, R(12)->as_VMReg());
-reg_def R_R13(NS, NS, Op_RegI, 13, R(13)->as_VMReg());
-reg_def R_R14(SOC, SOC, Op_RegI, 14, R(14)->as_VMReg());
-reg_def R_R15(NS, NS, Op_RegI, 15, R(15)->as_VMReg());
+reg_def R_R0 (SOC, SOC, Op_RegI, 0, as_Register(0)->as_VMReg());
+reg_def R_R1 (SOC, SOC, Op_RegI, 1, as_Register(1)->as_VMReg());
+reg_def R_R2 (SOC, SOC, Op_RegI, 2, as_Register(2)->as_VMReg());
+reg_def R_R3 (SOC, SOC, Op_RegI, 3, as_Register(3)->as_VMReg());
+reg_def R_R4 (SOC, SOE, Op_RegI, 4, as_Register(4)->as_VMReg());
+reg_def R_R5 (SOC, SOE, Op_RegI, 5, as_Register(5)->as_VMReg());
+reg_def R_R6 (SOC, SOE, Op_RegI, 6, as_Register(6)->as_VMReg());
+reg_def R_R7 (SOC, SOE, Op_RegI, 7, as_Register(7)->as_VMReg());
+reg_def R_R8 (SOC, SOE, Op_RegI, 8, as_Register(8)->as_VMReg());
+reg_def R_R9 (SOC, SOE, Op_RegI, 9, as_Register(9)->as_VMReg());
+reg_def R_R10(NS, SOE, Op_RegI, 10, as_Register(10)->as_VMReg());
+reg_def R_R11(NS, SOE, Op_RegI, 11, as_Register(11)->as_VMReg());
+reg_def R_R12(SOC, SOC, Op_RegI, 12, as_Register(12)->as_VMReg());
+reg_def R_R13(NS, NS, Op_RegI, 13, as_Register(13)->as_VMReg());
+reg_def R_R14(SOC, SOC, Op_RegI, 14, as_Register(14)->as_VMReg());
+reg_def R_R15(NS, NS, Op_RegI, 15, as_Register(15)->as_VMReg());
// ----------------------------
// Float/Double Registers
diff --git a/src/hotspot/cpu/arm/assembler_arm_32.hpp b/src/hotspot/cpu/arm/assembler_arm_32.hpp
index ae13644ecf9..d6524f08680 100644
--- a/src/hotspot/cpu/arm/assembler_arm_32.hpp
+++ b/src/hotspot/cpu/arm/assembler_arm_32.hpp
@@ -114,7 +114,7 @@ class RegisterSet {
}
RegisterSet(Register first, Register last) {
- assert(first < last, "encoding constraint");
+ assert(first->encoding() < last->encoding(), "encoding constraint");
_encoding = (1 << (last->encoding() + 1)) - (1 << first->encoding());
}
diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
index 8e49cfcbcaa..3ef02e44b65 100644
--- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
@@ -181,7 +181,7 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
const Register lock_reg = _lock_reg->as_pointer_register();
ce->verify_reserved_argument_area_size(2);
- if (obj_reg < lock_reg) {
+ if (obj_reg->encoding() < lock_reg->encoding()) {
__ stmia(SP, RegisterSet(obj_reg) | RegisterSet(lock_reg));
} else {
__ str(obj_reg, Address(SP));
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index 219c49d1f14..b314577c2c8 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -272,14 +272,22 @@ int LIR_Assembler::emit_deopt_handler() {
int offset = code_offset();
- __ mov_relative_address(LR, __ pc());
- __ push(LR); // stub expects LR to be saved
+ Label start;
+ __ bind(start);
+
__ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);
+ int entry_offset = __ offset();
+ __ mov_relative_address(LR, __ pc());
+ __ push(LR); // stub expects LR to be saved
+ __ b(start);
+
assert(code_offset() - offset <= deopt_handler_size(), "overflow");
+ assert(code_offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
@@ -2631,11 +2639,11 @@ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
const Register src_hi = src->as_register_hi();
assert(addr->index()->is_illegal() && addr->disp() == 0, "The address is simple already");
- if (src_lo < src_hi) {
+ if (src_lo->encoding() < src_hi->encoding()) {
null_check_offset = __ offset();
__ stmia(addr->base()->as_register(), RegisterSet(src_lo) | RegisterSet(src_hi));
} else {
- assert(src_lo < Rtemp, "Rtemp is higher than any allocatable register");
+ assert(src_lo->encoding() < Rtemp->encoding(), "Rtemp is higher than any allocatable register");
__ mov(Rtemp, src_hi);
null_check_offset = __ offset();
__ stmia(addr->base()->as_register(), RegisterSet(src_lo) | RegisterSet(Rtemp));
@@ -2648,10 +2656,10 @@ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
assert(addr->index()->is_illegal() && addr->disp() == 0, "The address is simple already");
null_check_offset = __ offset();
- if (dest_lo < dest_hi) {
+ if (dest_lo->encoding() < dest_hi->encoding()) {
__ ldmia(addr->base()->as_register(), RegisterSet(dest_lo) | RegisterSet(dest_hi));
} else {
- assert(dest_lo < Rtemp, "Rtemp is higher than any allocatable register");
+ assert(dest_lo->encoding() < Rtemp->encoding(), "Rtemp is higher than any allocatable register");
__ ldmia(addr->base()->as_register(), RegisterSet(dest_lo) | RegisterSet(Rtemp));
__ mov(dest_hi, Rtemp);
}
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp
index 77d13532685..615d2f188ff 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp
@@ -54,7 +54,7 @@
enum {
_call_stub_size = 16,
_exception_handler_size = PRODUCT_ONLY(68) NOT_PRODUCT(68+60),
- _deopt_handler_size = 16
+ _deopt_handler_size = 20
};
public:
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp
index 720413c9c5b..23ecea24eb2 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -409,7 +409,7 @@ void InterpreterMacroAssembler::pop_i(Register r) {
void InterpreterMacroAssembler::pop_l(Register lo, Register hi) {
assert_different_registers(lo, hi);
- assert(lo < hi, "lo must be < hi");
+ assert(lo->encoding() < hi->encoding(), "lo must be < hi");
pop(RegisterSet(lo) | RegisterSet(hi));
}
@@ -459,7 +459,7 @@ void InterpreterMacroAssembler::push_i(Register r) {
void InterpreterMacroAssembler::push_l(Register lo, Register hi) {
assert_different_registers(lo, hi);
- assert(lo < hi, "lo must be < hi");
+ assert(lo->encoding() < hi->encoding(), "lo must be < hi");
push(RegisterSet(lo) | RegisterSet(hi));
}
diff --git a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp
index ee856bcfe60..82385bf0244 100644
--- a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp
+++ b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp
@@ -430,6 +430,13 @@ inline NativeCall* nativeCall_before(address return_address) {
class NativePostCallNop: public NativeInstruction {
public:
+ enum arm_specific_constants {
+ // If the check is adjusted to read beyond size of the instruction sequence at the deopt
+ // handler stub code entry point, it has to happen in two stages - to prevent out of bounds
+ // access in case the return address points to the entry point which could be at
+ // the end of page.
+ first_check_size = instruction_size
+ };
bool check() const { return is_nop(); }
bool decode(int32_t& oopmap_slot, int32_t& cb_offset) const { return false; }
bool patch(int32_t oopmap_slot, int32_t cb_offset) { return false; }
diff --git a/src/hotspot/cpu/arm/register_arm.cpp b/src/hotspot/cpu/arm/register_arm.cpp
index ea3ef87e670..296c55e2e16 100644
--- a/src/hotspot/cpu/arm/register_arm.cpp
+++ b/src/hotspot/cpu/arm/register_arm.cpp
@@ -25,12 +25,19 @@
#include "register_arm.hpp"
#include "utilities/debug.hpp"
-const int ConcreteRegisterImpl::max_gpr = ConcreteRegisterImpl::num_gpr;
-const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::num_fpr +
- ConcreteRegisterImpl::max_gpr;
+Register::RegisterImpl all_RegisterImpls [Register::number_of_registers + 1];
+FloatRegister::FloatRegisterImpl all_FloatRegisterImpls [FloatRegister::number_of_registers + 1];
+VFPSystemRegister::VFPSystemRegisterImpl all_VFPSystemRegisterImpls [VFPSystemRegister::number_of_registers + 1] {
+ { -1 }, //vfpsnoreg
+ { VFPSystemRegister::FPSID },
+ { VFPSystemRegister::FPSCR },
+ { VFPSystemRegister::MVFR0 },
+ { VFPSystemRegister::MVFR1 }
+};
-const char* RegisterImpl::name() const {
- const char* names[number_of_registers] = {
+const char* Register::RegisterImpl::name() const {
+ static const char* names[number_of_registers + 1] = {
+ "noreg",
"r0", "r1", "r2", "r3", "r4", "r5", "r6",
#if (FP_REG_NUM == 7)
"fp",
@@ -45,13 +52,14 @@ const char* RegisterImpl::name() const {
#endif
"r12", "sp", "lr", "pc"
};
- return is_valid() ? names[encoding()] : "noreg";
+ return names[encoding() + 1];
}
-const char* FloatRegisterImpl::name() const {
- const char* names[number_of_registers] = {
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
- "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+const char* FloatRegister::FloatRegisterImpl::name() const {
+ static const char* names[number_of_registers + 1] = {
+ "fnoreg",
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+ "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"
#ifdef COMPILER2
@@ -61,5 +69,5 @@ const char* FloatRegisterImpl::name() const {
"s56", "s57?","s58", "s59?","s60", "s61?","s62", "s63?"
#endif
};
- return is_valid() ? names[encoding()] : "fnoreg";
+ return names[encoding() + 1];
}
diff --git a/src/hotspot/cpu/arm/register_arm.hpp b/src/hotspot/cpu/arm/register_arm.hpp
index e0688af0d36..401d25a4fce 100644
--- a/src/hotspot/cpu/arm/register_arm.hpp
+++ b/src/hotspot/cpu/arm/register_arm.hpp
@@ -31,26 +31,6 @@
class VMRegImpl;
typedef VMRegImpl* VMReg;
-// These are declared ucontext.h
-#undef R0
-#undef R1
-#undef R2
-#undef R3
-#undef R4
-#undef R5
-#undef R6
-#undef R7
-#undef R8
-#undef R9
-#undef R10
-#undef R11
-#undef R12
-#undef R13
-#undef R14
-#undef R15
-
-#define R(r) ((Register)(r))
-
/////////////////////////////////
// Support for different ARM ABIs
// Note: default ABI is for linux
@@ -94,25 +74,86 @@ typedef VMRegImpl* VMReg;
#define ALIGN_WIDE_ARGUMENTS 1
#endif
-#define R0 ((Register)0)
-#define R1 ((Register)1)
-#define R2 ((Register)2)
-#define R3 ((Register)3)
-#define R4 ((Register)4)
-#define R5 ((Register)5)
-#define R6 ((Register)6)
-#define R7 ((Register)7)
-#define R8 ((Register)8)
-#define R9 ((Register)9)
-#define R10 ((Register)10)
-#define R11 ((Register)11)
-#define R12 ((Register)12)
-#define R13 ((Register)13)
-#define R14 ((Register)14)
-#define R15 ((Register)15)
+class Register {
+ private:
+ int _encoding;
+
+ constexpr explicit Register(int encoding) : _encoding(encoding) {}
+
+ public:
+ enum {
+ number_of_registers = 16,
+ max_slots_per_register = 1
+ };
+
+ class RegisterImpl : public AbstractRegisterImpl {
+ friend class Register;
+
+ static constexpr const RegisterImpl* first();
+
+ public:
+
+ // accessors and testers
+ int raw_encoding() const { return this - first(); }
+ int encoding() const { assert(is_valid(), "invalid register"); return raw_encoding(); }
+ bool is_valid() const { return 0 <= raw_encoding() && raw_encoding() < number_of_registers; }
+
+ inline Register successor() const;
+
+ VMReg as_VMReg() const;
+
+ const char* name() const;
+ };
-#define FP ((Register)FP_REG_NUM)
+ inline friend constexpr Register as_Register(int encoding);
+
+ constexpr Register() : _encoding(-1) {} //noreg
+
+ int operator==(const Register r) const { return _encoding == r._encoding; }
+ int operator!=(const Register r) const { return _encoding != r._encoding; }
+
+ const RegisterImpl* operator->() const { return RegisterImpl::first() + _encoding; }
+};
+
+extern Register::RegisterImpl all_RegisterImpls[Register::number_of_registers + 1] INTERNAL_VISIBILITY;
+
+inline constexpr const Register::RegisterImpl* Register::RegisterImpl::first() {
+ return all_RegisterImpls + 1;
+}
+
+constexpr Register noreg = Register();
+
+inline constexpr Register as_Register(int encoding) {
+ if (0 <= encoding && encoding < Register::number_of_registers) {
+ return Register(encoding);
+ }
+ return noreg;
+}
+
+inline Register Register::RegisterImpl::successor() const {
+ assert(is_valid(), "sainty");
+ return as_Register(encoding() + 1);
+}
+
+constexpr Register R0 = as_Register( 0);
+constexpr Register R1 = as_Register( 1);
+constexpr Register R2 = as_Register( 2);
+constexpr Register R3 = as_Register( 3);
+constexpr Register R4 = as_Register( 4);
+constexpr Register R5 = as_Register( 5);
+constexpr Register R6 = as_Register( 6);
+constexpr Register R7 = as_Register( 7);
+constexpr Register R8 = as_Register( 8);
+constexpr Register R9 = as_Register( 9);
+constexpr Register R10 = as_Register(10);
+constexpr Register R11 = as_Register(11);
+constexpr Register R12 = as_Register(12);
+constexpr Register R13 = as_Register(13);
+constexpr Register R14 = as_Register(14);
+constexpr Register R15 = as_Register(15);
+
+constexpr Register FP = as_Register(FP_REG_NUM);
// Safe use of registers which may be FP on some platforms.
//
@@ -122,185 +163,170 @@ typedef VMRegImpl* VMReg;
// as FP on supported ABIs (and replace R# by altFP_#_11). altFP_#_11
// must be #define to R11 if and only if # is FP_REG_NUM.
#if (FP_REG_NUM == 7)
-#define altFP_7_11 ((Register)11)
+constexpr Register altFP_7_11 = R11;
#else
-#define altFP_7_11 ((Register)7)
+constexpr Register altFP_7_11 = R7;
#endif
-#define SP R13
-#define LR R14
-#define PC R15
+constexpr Register SP = R13;
+constexpr Register LR = R14;
+constexpr Register PC = R15;
-class RegisterImpl;
-typedef RegisterImpl* Register;
+class FloatRegister {
+ private:
+ int _encoding;
-inline Register as_Register(int encoding) {
- return (Register)(intptr_t)encoding;
-}
+ constexpr explicit FloatRegister(int encoding) : _encoding(encoding) {}
-class RegisterImpl : public AbstractRegisterImpl {
public:
enum {
- number_of_registers = 16
+ number_of_registers = NOT_COMPILER2(32) COMPILER2_PRESENT(64),
+ max_slots_per_register = 1
};
- Register successor() const { return as_Register(encoding() + 1); }
+ class FloatRegisterImpl : public AbstractRegisterImpl {
+ friend class FloatRegister;
- inline friend Register as_Register(int encoding);
+ static constexpr const FloatRegisterImpl* first();
- VMReg as_VMReg();
+ public:
- // accessors
- int encoding() const { assert(is_valid(), "invalid register"); return value(); }
- const char* name() const;
+ // accessors and testers
+ int raw_encoding() const { return this - first(); }
+ int encoding() const { assert(is_valid(), "invalid register"); return raw_encoding(); }
+ bool is_valid() const { return 0 <= raw_encoding() && raw_encoding() < number_of_registers; }
+ inline FloatRegister successor() const;
- // testers
- bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+ VMReg as_VMReg() const;
+ int hi_bits() const {
+ return (encoding() >> 1) & 0xf;
+ }
+
+ int lo_bit() const {
+ return encoding() & 1;
+ }
+
+ int hi_bit() const {
+ return encoding() >> 5;
+ }
+
+ const char* name() const;
+ };
+
+ inline friend constexpr FloatRegister as_FloatRegister(int encoding);
+
+ constexpr FloatRegister() : _encoding(-1) {} // fnoreg
+
+ int operator==(const FloatRegister r) const { return _encoding == r._encoding; }
+ int operator!=(const FloatRegister r) const { return _encoding != r._encoding; }
+
+ const FloatRegisterImpl* operator->() const { return FloatRegisterImpl::first() + _encoding; }
};
-CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+extern FloatRegister::FloatRegisterImpl all_FloatRegisterImpls[FloatRegister::number_of_registers + 1] INTERNAL_VISIBILITY;
-
-// Use FloatRegister as shortcut
-class FloatRegisterImpl;
-typedef FloatRegisterImpl* FloatRegister;
-
-inline FloatRegister as_FloatRegister(int encoding) {
- return (FloatRegister)(intptr_t)encoding;
+inline constexpr const FloatRegister::FloatRegisterImpl* FloatRegister::FloatRegisterImpl::first() {
+ return all_FloatRegisterImpls + 1;
}
-class FloatRegisterImpl : public AbstractRegisterImpl {
- public:
- enum {
- number_of_registers = NOT_COMPILER2(32) COMPILER2_PRESENT(64)
- };
+constexpr FloatRegister fnoreg = FloatRegister();
- inline friend FloatRegister as_FloatRegister(int encoding);
-
- VMReg as_VMReg();
-
- int encoding() const { assert(is_valid(), "invalid register"); return value(); }
- bool is_valid() const { return 0 <= (intx)this && (intx)this < number_of_registers; }
- FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
-
- const char* name() const;
-
- int hi_bits() const {
- return (encoding() >> 1) & 0xf;
+inline constexpr FloatRegister as_FloatRegister(int encoding) {
+ if (0 <= encoding && encoding < FloatRegister::number_of_registers) {
+ return FloatRegister(encoding);
}
+ return fnoreg;
+}
- int lo_bit() const {
- return encoding() & 1;
- }
-
- int hi_bit() const {
- return encoding() >> 5;
- }
-};
-
-CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
+inline FloatRegister FloatRegister::FloatRegisterImpl::successor() const {
+ assert(is_valid(), "sainty");
+ return as_FloatRegister(encoding() + 1);
+}
/*
* S1-S6 are named with "_reg" suffix to avoid conflict with
* constants defined in sharedRuntimeTrig.cpp
*/
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S0, ( 0));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S1_reg, ( 1));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S2_reg, ( 2));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S3_reg, ( 3));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S4_reg, ( 4));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S5_reg, ( 5));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S6_reg, ( 6));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S7, ( 7));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S8, ( 8));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S9, ( 9));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S10, (10));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S11, (11));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S12, (12));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S13, (13));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S14, (14));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S15, (15));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S16, (16));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S17, (17));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S18, (18));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S19, (19));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S20, (20));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S21, (21));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S22, (22));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S23, (23));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S24, (24));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S25, (25));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S26, (26));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S27, (27));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S28, (28));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S29, (29));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S30, (30));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, S31, (31));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, Stemp, (30));
+constexpr FloatRegister S0 = as_FloatRegister( 0);
+constexpr FloatRegister S1_reg = as_FloatRegister(1);
+constexpr FloatRegister S2_reg = as_FloatRegister(2);
+constexpr FloatRegister S3_reg = as_FloatRegister(3);
+constexpr FloatRegister S4_reg = as_FloatRegister(4);
+constexpr FloatRegister S5_reg = as_FloatRegister(5);
+constexpr FloatRegister S6_reg = as_FloatRegister(6);
+constexpr FloatRegister S7 = as_FloatRegister( 7);
+constexpr FloatRegister S8 = as_FloatRegister( 8);
+constexpr FloatRegister S9 = as_FloatRegister( 9);
+constexpr FloatRegister S10 = as_FloatRegister(10);
+constexpr FloatRegister S11 = as_FloatRegister(11);
+constexpr FloatRegister S12 = as_FloatRegister(12);
+constexpr FloatRegister S13 = as_FloatRegister(13);
+constexpr FloatRegister S14 = as_FloatRegister(14);
+constexpr FloatRegister S15 = as_FloatRegister(15);
+constexpr FloatRegister S16 = as_FloatRegister(16);
+constexpr FloatRegister S17 = as_FloatRegister(17);
+constexpr FloatRegister S18 = as_FloatRegister(18);
+constexpr FloatRegister S19 = as_FloatRegister(19);
+constexpr FloatRegister S20 = as_FloatRegister(20);
+constexpr FloatRegister S21 = as_FloatRegister(21);
+constexpr FloatRegister S22 = as_FloatRegister(22);
+constexpr FloatRegister S23 = as_FloatRegister(23);
+constexpr FloatRegister S24 = as_FloatRegister(24);
+constexpr FloatRegister S25 = as_FloatRegister(25);
+constexpr FloatRegister S26 = as_FloatRegister(26);
+constexpr FloatRegister S27 = as_FloatRegister(27);
+constexpr FloatRegister S28 = as_FloatRegister(28);
+constexpr FloatRegister S29 = as_FloatRegister(29);
+constexpr FloatRegister S30 = as_FloatRegister(30);
+constexpr FloatRegister S31 = as_FloatRegister(31);
+constexpr FloatRegister Stemp = S30;
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D0, ( 0));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D1, ( 2));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D2, ( 4));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D3, ( 6));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D4, ( 8));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D5, ( 10));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D6, ( 12));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D7, ( 14));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D8, ( 16));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D9, ( 18));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D10, ( 20));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D11, ( 22));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D12, ( 24));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D13, ( 26));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D14, ( 28));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D15, (30));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D16, (32));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D17, (34));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D18, (36));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D19, (38));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D20, (40));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D21, (42));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D22, (44));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D23, (46));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D24, (48));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D25, (50));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D26, (52));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D27, (54));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D28, (56));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D29, (58));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D30, (60));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, D31, (62));
+constexpr FloatRegister D0 = as_FloatRegister( 0);
+constexpr FloatRegister D1 = as_FloatRegister( 2);
+constexpr FloatRegister D2 = as_FloatRegister( 4);
+constexpr FloatRegister D3 = as_FloatRegister( 6);
+constexpr FloatRegister D4 = as_FloatRegister( 8);
+constexpr FloatRegister D5 = as_FloatRegister(10);
+constexpr FloatRegister D6 = as_FloatRegister(12);
+constexpr FloatRegister D7 = as_FloatRegister(14);
+constexpr FloatRegister D8 = as_FloatRegister(16);
+constexpr FloatRegister D9 = as_FloatRegister(18);
+constexpr FloatRegister D10 = as_FloatRegister(20);
+constexpr FloatRegister D11 = as_FloatRegister(22);
+constexpr FloatRegister D12 = as_FloatRegister(24);
+constexpr FloatRegister D13 = as_FloatRegister(26);
+constexpr FloatRegister D14 = as_FloatRegister(28);
+constexpr FloatRegister D15 = as_FloatRegister(30);
+constexpr FloatRegister D16 = as_FloatRegister(32);
+constexpr FloatRegister D17 = as_FloatRegister(34);
+constexpr FloatRegister D18 = as_FloatRegister(36);
+constexpr FloatRegister D19 = as_FloatRegister(38);
+constexpr FloatRegister D20 = as_FloatRegister(40);
+constexpr FloatRegister D21 = as_FloatRegister(42);
+constexpr FloatRegister D22 = as_FloatRegister(44);
+constexpr FloatRegister D23 = as_FloatRegister(46);
+constexpr FloatRegister D24 = as_FloatRegister(48);
+constexpr FloatRegister D25 = as_FloatRegister(50);
+constexpr FloatRegister D26 = as_FloatRegister(52);
+constexpr FloatRegister D27 = as_FloatRegister(54);
+constexpr FloatRegister D28 = as_FloatRegister(56);
+constexpr FloatRegister D29 = as_FloatRegister(58);
+constexpr FloatRegister D30 = as_FloatRegister(60);
+constexpr FloatRegister D31 = as_FloatRegister(62);
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
- log_vmregs_per_word = LogBytesPerWord - LogBytesPerInt, // VMRegs are of 4-byte size
-#ifdef COMPILER2
- log_bytes_per_fpr = 2, // quad vectors
-#else
- log_bytes_per_fpr = 2, // double vectors
-#endif
- log_words_per_fpr = log_bytes_per_fpr - LogBytesPerWord,
- words_per_fpr = 1 << log_words_per_fpr,
- log_vmregs_per_fpr = log_bytes_per_fpr - LogBytesPerInt,
- log_vmregs_per_gpr = log_vmregs_per_word,
- vmregs_per_gpr = 1 << log_vmregs_per_gpr,
- vmregs_per_fpr = 1 << log_vmregs_per_fpr,
+ max_gpr = Register::number_of_registers * Register::max_slots_per_register,
+ max_fpr = max_gpr + FloatRegister::number_of_registers * FloatRegister::max_slots_per_register,
- num_gpr = RegisterImpl::number_of_registers << log_vmregs_per_gpr,
- max_gpr0 = num_gpr,
- num_fpr = FloatRegisterImpl::number_of_registers << log_vmregs_per_fpr,
- max_fpr0 = max_gpr0 + num_fpr,
- number_of_registers = num_gpr + num_fpr + 1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers
+ number_of_registers = max_fpr + 1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers
};
-
- static const int max_gpr;
- static const int max_fpr;
};
typedef AbstractRegSet RegSet;
@@ -328,100 +354,156 @@ inline FloatRegister AbstractRegSet::last() {
-class VFPSystemRegisterImpl;
-typedef VFPSystemRegisterImpl* VFPSystemRegister;
-class VFPSystemRegisterImpl : public AbstractRegisterImpl {
+class VFPSystemRegister {
+ private:
+ int _store_idx;
+
+ constexpr explicit VFPSystemRegister(int store_idx) : _store_idx(store_idx) {}
+
+ enum {
+ _FPSID_store_idx = 0,
+ _FPSCR_store_idx = 1,
+ _MVFR0_store_idx = 2,
+ _MVFR1_store_idx = 3
+ };
+
public:
- int encoding() const { return value(); }
+ enum {
+ FPSID = 0,
+ FPSCR = 1,
+ MVFR0 = 6,
+ MVFR1 = 7,
+ number_of_registers = 4
+ };
+
+ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
+ friend class VFPSystemRegister;
+
+ int _encoding;
+
+ static constexpr const VFPSystemRegisterImpl* first();
+
+ public:
+ constexpr VFPSystemRegisterImpl(int encoding) : _encoding(encoding) {}
+
+ int encoding() const { return _encoding; }
+ };
+
+ inline friend constexpr VFPSystemRegister as_VFPSystemRegister(int encoding);
+
+ constexpr VFPSystemRegister() : _store_idx(-1) {} // vfpsnoreg
+
+ int operator==(const VFPSystemRegister r) const { return _store_idx == r._store_idx; }
+ int operator!=(const VFPSystemRegister r) const { return _store_idx != r._store_idx; }
+
+ const VFPSystemRegisterImpl* operator->() const { return VFPSystemRegisterImpl::first() + _store_idx; }
};
-#define FPSID ((VFPSystemRegister)0)
-#define FPSCR ((VFPSystemRegister)1)
-#define MVFR0 ((VFPSystemRegister)0x6)
-#define MVFR1 ((VFPSystemRegister)0x7)
+extern VFPSystemRegister::VFPSystemRegisterImpl all_VFPSystemRegisterImpls[VFPSystemRegister::number_of_registers + 1] INTERNAL_VISIBILITY;
+
+inline constexpr const VFPSystemRegister::VFPSystemRegisterImpl* VFPSystemRegister::VFPSystemRegisterImpl::first() {
+ return all_VFPSystemRegisterImpls + 1;
+}
+
+constexpr VFPSystemRegister vfpsnoreg = VFPSystemRegister();
+
+inline constexpr VFPSystemRegister as_VFPSystemRegister(int encoding) {
+ switch (encoding) {
+ case VFPSystemRegister::FPSID: return VFPSystemRegister(VFPSystemRegister::_FPSID_store_idx);
+ case VFPSystemRegister::FPSCR: return VFPSystemRegister(VFPSystemRegister::_FPSCR_store_idx);
+ case VFPSystemRegister::MVFR0: return VFPSystemRegister(VFPSystemRegister::_MVFR0_store_idx);
+ case VFPSystemRegister::MVFR1: return VFPSystemRegister(VFPSystemRegister::_MVFR1_store_idx);
+ default: return vfpsnoreg;
+ }
+}
+
+constexpr VFPSystemRegister FPSID = as_VFPSystemRegister(VFPSystemRegister::FPSID);
+constexpr VFPSystemRegister FPSCR = as_VFPSystemRegister(VFPSystemRegister::FPSCR);
+constexpr VFPSystemRegister MVFR0 = as_VFPSystemRegister(VFPSystemRegister::MVFR0);
+constexpr VFPSystemRegister MVFR1 = as_VFPSystemRegister(VFPSystemRegister::MVFR1);
/*
* Register definitions shared across interpreter and compiler
*/
-#define Rexception_obj R4
-#define Rexception_pc R5
+constexpr Register Rexception_obj = R4;
+constexpr Register Rexception_pc = R5;
/*
* Interpreter register definitions common to C++ and template interpreters.
*/
-#define Rlocals R8
-#define Rmethod R9
-#define Rthread R10
-#define Rtemp R12
+constexpr Register Rlocals = R8;
+constexpr Register Rmethod = R9;
+constexpr Register Rthread = R10;
+constexpr Register Rtemp = R12;
// Interpreter calling conventions
-#define Rparams SP
-#define Rsender_sp R4
+constexpr Register Rparams = SP;
+constexpr Register Rsender_sp = R4;
// JSR292
// Note: R5_mh is needed only during the call setup, including adapters
// This does not seem to conflict with Rexception_pc
// In case of issues, R3 might be OK but adapters calling the runtime would have to save it
-#define R5_mh R5 // MethodHandle register, used during the call setup
+constexpr Register R5_mh = R5; // MethodHandle register, used during the call setup
/*
* C++ Interpreter Register Defines
*/
-#define Rsave0 R4
-#define Rsave1 R5
-#define Rsave2 R6
-#define Rstate altFP_7_11 // R7 or R11
-#define Ricklass R8
+constexpr Register Rsave0 = R4;
+constexpr Register Rsave1 = R5;
+constexpr Register Rsave2 = R6;
+constexpr Register Rstate = altFP_7_11; // R7 or R11
+constexpr Register Ricklass = R8;
/*
* TemplateTable Interpreter Register Usage
*/
// Temporary registers
-#define R0_tmp R0
-#define R1_tmp R1
-#define R2_tmp R2
-#define R3_tmp R3
-#define R4_tmp R4
-#define R5_tmp R5
-#define R12_tmp R12
-#define LR_tmp LR
+constexpr Register R0_tmp = R0;
+constexpr Register R1_tmp = R1;
+constexpr Register R2_tmp = R2;
+constexpr Register R3_tmp = R3;
+constexpr Register R4_tmp = R4;
+constexpr Register R5_tmp = R5;
+constexpr Register R12_tmp = R12;
+constexpr Register LR_tmp = LR;
-#define S0_tmp S0
-#define S1_tmp S1_reg
+constexpr FloatRegister S0_tmp = S0;
+constexpr FloatRegister S1_tmp = S1_reg;
-#define D0_tmp D0
-#define D1_tmp D1
+constexpr FloatRegister D0_tmp = D0;
+constexpr FloatRegister D1_tmp = D1;
// Temporary registers saved across VM calls (according to C calling conventions)
-#define Rtmp_save0 R4
-#define Rtmp_save1 R5
+constexpr Register Rtmp_save0 = R4;
+constexpr Register Rtmp_save1 = R5;
// Cached TOS value
-#define R0_tos R0
+constexpr Register R0_tos = R0;
-#define R0_tos_lo R0
-#define R1_tos_hi R1
+constexpr Register R0_tos_lo = R0;
+constexpr Register R1_tos_hi = R1;
-#define S0_tos S0
-#define D0_tos D0
+constexpr FloatRegister S0_tos = S0;
+constexpr FloatRegister D0_tos = D0;
// Dispatch table
-#define RdispatchTable R6
+constexpr Register RdispatchTable = R6;
// Bytecode pointer
-#define Rbcp altFP_7_11
+constexpr Register Rbcp = altFP_7_11;
// Pre-loaded next bytecode for the dispatch
-#define R3_bytecode R3
+constexpr Register R3_bytecode = R3;
// Conventions between bytecode templates and stubs
-#define R2_ClassCastException_obj R2
-#define R4_ArrayIndexOutOfBounds_index R4
+constexpr Register R2_ClassCastException_obj = R2;
+constexpr Register R4_ArrayIndexOutOfBounds_index = R4;
// Interpreter expression stack top
-#define Rstack_top SP
+constexpr Register Rstack_top = SP;
/*
* Linux 32-bit ARM C ABI Register calling conventions
@@ -444,10 +526,11 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
* R14 (LR) Link register
* R15 (PC) Program Counter
*/
-#define c_rarg0 R0
-#define c_rarg1 R1
-#define c_rarg2 R2
-#define c_rarg3 R3
+
+constexpr Register c_rarg0 = R0;
+constexpr Register c_rarg1 = R1;
+constexpr Register c_rarg2 = R2;
+constexpr Register c_rarg3 = R3;
#define GPR_PARAMS 4
@@ -455,10 +538,10 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
// Java ABI
// XXX Is this correct?
-#define j_rarg0 c_rarg0
-#define j_rarg1 c_rarg1
-#define j_rarg2 c_rarg2
-#define j_rarg3 c_rarg3
+constexpr Register j_rarg0 = c_rarg0;
+constexpr Register j_rarg1 = c_rarg1;
+constexpr Register j_rarg2 = c_rarg2;
+constexpr Register j_rarg3 = c_rarg3;
#endif // CPU_ARM_REGISTER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/runtime_arm.cpp b/src/hotspot/cpu/arm/runtime_arm.cpp
index 8d48de5795a..29fd0aa0a10 100644
--- a/src/hotspot/cpu/arm/runtime_arm.cpp
+++ b/src/hotspot/cpu/arm/runtime_arm.cpp
@@ -182,8 +182,6 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
//------------------------------ generate_exception_blob ---------------------------
// creates exception blob at the end
-// Using exception blob, this code is jumped from a compiled method.
-// (see emit_exception_handler in sparc.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 76e38d29478..13e1f4493ff 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -70,7 +70,7 @@ public:
enum RegisterLayout {
- fpu_save_size = FloatRegisterImpl::number_of_registers,
+ fpu_save_size = FloatRegister::number_of_registers,
#ifndef __SOFTFP__
D0_offset = 0,
#endif
@@ -139,8 +139,8 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
if (VM_Version::has_vfp3_32()) {
__ fpush(FloatRegisterSet(D16, 16));
} else {
- if (FloatRegisterImpl::number_of_registers > 32) {
- assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
+ if (FloatRegister::number_of_registers > 32) {
+ assert(FloatRegister::number_of_registers == 64, "nb fp registers should be 64");
__ sub(SP, SP, 32 * wordSize);
}
}
@@ -182,8 +182,8 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr
if (VM_Version::has_vfp3_32()) {
__ fpop(FloatRegisterSet(D16, 16));
} else {
- if (FloatRegisterImpl::number_of_registers > 32) {
- assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
+ if (FloatRegister::number_of_registers > 32) {
+ assert(FloatRegister::number_of_registers == 64, "nb fp registers should be 64");
__ add(SP, SP, 32 * wordSize);
}
}
diff --git a/src/hotspot/cpu/arm/vmreg_arm.cpp b/src/hotspot/cpu/arm/vmreg_arm.cpp
index 4ce1dd0be20..efaf38ef729 100644
--- a/src/hotspot/cpu/arm/vmreg_arm.cpp
+++ b/src/hotspot/cpu/arm/vmreg_arm.cpp
@@ -30,14 +30,14 @@ void VMRegImpl::set_regName() {
Register reg = ::as_Register(0);
int i;
for (i = 0; i < ConcreteRegisterImpl::max_gpr; reg = reg->successor()) {
- for (int j = 0; j < (1 << ConcreteRegisterImpl::log_vmregs_per_gpr); j++) {
+ for (int j = 0; j < Register::max_slots_per_register; j++) {
regName[i++] = reg->name();
}
}
#ifndef __SOFTFP__
FloatRegister freg = ::as_FloatRegister(0);
for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
- for (int j = 0; j < (1 << ConcreteRegisterImpl::log_vmregs_per_fpr); j++) {
+ for (int j = 0; j < Register::max_slots_per_register; j++) {
regName[i++] = freg->name();
}
freg = freg->successor();
diff --git a/src/hotspot/cpu/arm/vmreg_arm.hpp b/src/hotspot/cpu/arm/vmreg_arm.hpp
index c13f443b804..f1dfd09a1e6 100644
--- a/src/hotspot/cpu/arm/vmreg_arm.hpp
+++ b/src/hotspot/cpu/arm/vmreg_arm.hpp
@@ -36,20 +36,20 @@
inline Register as_Register() {
assert(is_Register(), "must be");
assert(is_concrete(), "concrete register expected");
- return ::as_Register(value() >> ConcreteRegisterImpl::log_vmregs_per_gpr);
+ return ::as_Register(value() / Register::max_slots_per_register);
}
inline FloatRegister as_FloatRegister() {
assert(is_FloatRegister(), "must be");
assert(is_concrete(), "concrete register expected");
- return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> ConcreteRegisterImpl::log_vmregs_per_fpr);
+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / FloatRegister::max_slots_per_register);
}
inline bool is_concrete() {
if (is_Register()) {
- return ((value() & right_n_bits(ConcreteRegisterImpl::log_vmregs_per_gpr)) == 0);
+ return (value() % Register::max_slots_per_register == 0);
} else if (is_FloatRegister()) {
- return (((value() - ConcreteRegisterImpl::max_gpr) & right_n_bits(ConcreteRegisterImpl::log_vmregs_per_fpr)) == 0);
+ return (value() % FloatRegister::max_slots_per_register == 0); // Single slot
} else {
return false;
}
diff --git a/src/hotspot/cpu/arm/vmreg_arm.inline.hpp b/src/hotspot/cpu/arm/vmreg_arm.inline.hpp
index f122b9ede70..3e5c18dbda0 100644
--- a/src/hotspot/cpu/arm/vmreg_arm.inline.hpp
+++ b/src/hotspot/cpu/arm/vmreg_arm.inline.hpp
@@ -25,11 +25,11 @@
#ifndef CPU_ARM_VMREG_ARM_INLINE_HPP
#define CPU_ARM_VMREG_ARM_INLINE_HPP
-inline VMReg RegisterImpl::as_VMReg() {
- return VMRegImpl::as_VMReg(encoding() << ConcreteRegisterImpl::log_vmregs_per_gpr);
+inline VMReg Register::RegisterImpl::as_VMReg() const {
+ return VMRegImpl::as_VMReg(encoding() * Register::max_slots_per_register);
}
-inline VMReg FloatRegisterImpl::as_VMReg() {
- return VMRegImpl::as_VMReg((encoding() << ConcreteRegisterImpl::log_vmregs_per_fpr) + ConcreteRegisterImpl::max_gpr);
+inline VMReg FloatRegister::FloatRegisterImpl::as_VMReg() const {
+ return VMRegImpl::as_VMReg((encoding() * FloatRegister::max_slots_per_register) + ConcreteRegisterImpl::max_gpr);
}
#endif // CPU_ARM_VMREG_ARM_INLINE_HPP
diff --git a/src/hotspot/cpu/ppc/atomicAccess_ppc.hpp b/src/hotspot/cpu/ppc/atomicAccess_ppc.hpp
index a0ff19e6171..c4529b0eb1a 100644
--- a/src/hotspot/cpu/ppc/atomicAccess_ppc.hpp
+++ b/src/hotspot/cpu/ppc/atomicAccess_ppc.hpp
@@ -157,6 +157,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
return result;
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 108da2039f6..0b48653ae64 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -264,12 +264,19 @@ int LIR_Assembler::emit_deopt_handler() {
}
int offset = code_offset();
+ Label start;
+
+ __ bind(start);
__ bl64_patchable(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
+ int entry_offset = __ offset();
+ __ b(start);
guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
+ assert(code_offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
index e4de2eb5c46..6a2f6264850 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
@@ -63,7 +63,7 @@ enum {
_static_call_stub_size = 4 * BytesPerInstWord + MacroAssembler::b64_patchable_size, // or smaller
_call_stub_size = _static_call_stub_size + MacroAssembler::trampoline_stub_size, // or smaller
_exception_handler_size = MacroAssembler::b64_patchable_size, // or smaller
- _deopt_handler_size = MacroAssembler::bl64_patchable_size
+ _deopt_handler_size = MacroAssembler::bl64_patchable_size + BytesPerInstWord
};
// '_static_call_stub_size' is only used on ppc (see LIR_Assembler::emit_static_call_stub()
diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.hpp b/src/hotspot/cpu/ppc/nativeInst_ppc.hpp
index dcb5c2bb3cb..75ca50674bf 100644
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.hpp
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.hpp
@@ -51,8 +51,6 @@ class NativeInstruction {
friend class Relocation;
public:
- bool is_post_call_nop() const { return MacroAssembler::is_post_call_nop(long_at(0)); }
-
bool is_jump() const { return Assembler::is_b(long_at(0)); } // See NativeGeneralJump.
bool is_sigtrap_ic_miss_check() {
@@ -531,6 +529,14 @@ class NativePostCallNop: public NativeInstruction {
};
public:
+ enum ppc_specific_constants {
+ // If the check is adjusted to read beyond size of the instruction at the deopt handler stub
+ // code entry point, it has to happen in two stages - to prevent out of bounds access in case
+ // the return address points to the entry point which could be at the end of page.
+ first_check_size = BytesPerInstWord
+ };
+
+ bool is_post_call_nop() const { return MacroAssembler::is_post_call_nop(long_at(0)); }
bool check() const { return is_post_call_nop(); }
bool decode(int32_t& oopmap_slot, int32_t& cb_offset) const {
uint32_t instr_bits = long_at(0);
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index c169d673aaf..87fcf112756 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -2088,17 +2088,11 @@ class HandlerImpl {
public:
- static int emit_exception_handler(C2_MacroAssembler *masm);
static int emit_deopt_handler(C2_MacroAssembler* masm);
- static uint size_exception_handler() {
- // The exception_handler is a b64_patchable.
- return MacroAssembler::b64_patchable_size;
- }
-
static uint size_deopt_handler() {
// The deopt_handler is a bl64_patchable.
- return MacroAssembler::bl64_patchable_size;
+ return MacroAssembler::bl64_patchable_size + BytesPerInstWord;
}
};
@@ -2114,22 +2108,6 @@ public:
source %{
-int HandlerImpl::emit_exception_handler(C2_MacroAssembler *masm) {
- address base = __ start_a_stub(size_exception_handler());
- if (base == nullptr) {
- ciEnv::current()->record_failure("CodeCache is full");
- return 0; // CodeBuffer::expand failed
- }
-
- int offset = __ offset();
- __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
- relocInfo::runtime_call_type);
- assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
- __ end_a_stub();
-
- return offset;
-}
-
// The deopt_handler is like the exception handler, but it calls to
// the deoptimization blob instead of jumping to the exception blob.
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
@@ -2140,12 +2118,23 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
}
int offset = __ offset();
+
+ Label start;
+ __ bind(start);
+
__ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
relocInfo::runtime_call_type);
+
+ int entry_offset = __ offset();
+
+ __ b(start);
+
assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
+ assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
//=============================================================================
@@ -2394,6 +2383,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
+bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
+ return false;
+}
+
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;
diff --git a/src/hotspot/cpu/ppc/runtime_ppc.cpp b/src/hotspot/cpu/ppc/runtime_ppc.cpp
index 2654075f702..ab658e9de58 100644
--- a/src/hotspot/cpu/ppc/runtime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/runtime_ppc.cpp
@@ -46,7 +46,6 @@
//------------------------------generate_exception_blob---------------------------
// Creates exception blob at the end.
-// Using exception blob, this code is jumped from a compiled method.
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index db45a2fa4c8..4e427ace404 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -83,7 +83,6 @@ class RegisterSaver {
static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
int* out_frame_size_in_bytes,
bool generate_oop_map,
- int return_pc_adjustment,
ReturnPCLocation return_pc_location,
bool save_vectors = false);
static void restore_live_registers_and_pop_frame(MacroAssembler* masm,
@@ -262,7 +261,6 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveVecRegs[] = {
OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
int* out_frame_size_in_bytes,
bool generate_oop_map,
- int return_pc_adjustment,
ReturnPCLocation return_pc_location,
bool save_vectors) {
// Push an abi_reg_args-frame and store all registers which may be live.
@@ -271,7 +269,6 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
// propagated to the RegisterMap of the caller frame during
// StackFrameStream construction (needed for deoptimization; see
// compiledVFrame::create_stack_value).
- // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment.
// Updated return pc is returned in R31 (if not return_pc_is_pre_saved).
// calculate frame size
@@ -305,14 +302,11 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
// Do the save_LR by hand and adjust the return pc if requested.
switch (return_pc_location) {
case return_pc_is_lr: __ mflr(R31); break;
- case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break;
+ case return_pc_is_pre_saved: break;
case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
default: ShouldNotReachHere();
}
if (return_pc_location != return_pc_is_pre_saved) {
- if (return_pc_adjustment != 0) {
- __ addi(R31, R31, return_pc_adjustment);
- }
__ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
}
@@ -2907,22 +2901,15 @@ void SharedRuntime::generate_deopt_blob() {
// deopt_handler: call_deopt_stub
// cur. return pc --> ...
//
- // So currently SR_LR points behind the call in the deopt handler.
- // We adjust it such that it points to the start of the deopt handler.
// The return_pc has been stored in the frame of the deoptee and
// will replace the address of the deopt_handler in the call
// to Deoptimization::fetch_unroll_info below.
- // We can't grab a free register here, because all registers may
- // contain live values, so let the RegisterSaver do the adjustment
- // of the return pc.
- const int return_pc_adjustment_no_exception = -MacroAssembler::bl64_patchable_size;
// Push the "unpack frame"
// Save everything in sight.
map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&first_frame_size_in_bytes,
/*generate_oop_map=*/ true,
- return_pc_adjustment_no_exception,
RegisterSaver::return_pc_is_lr);
assert(map != nullptr, "OopMap must have been created");
@@ -2957,7 +2944,6 @@ void SharedRuntime::generate_deopt_blob() {
RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&first_frame_size_in_bytes,
/*generate_oop_map=*/ false,
- /*return_pc_adjustment_exception=*/ 0,
RegisterSaver::return_pc_is_pre_saved);
// Deopt during an exception. Save exec mode for unpack_frames.
@@ -2975,7 +2961,6 @@ void SharedRuntime::generate_deopt_blob() {
RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&first_frame_size_in_bytes,
/*generate_oop_map=*/ false,
- /*return_pc_adjustment_reexecute=*/ 0,
RegisterSaver::return_pc_is_pre_saved);
__ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
#endif
@@ -3266,7 +3251,6 @@ SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr)
map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&frame_size_in_bytes,
/*generate_oop_map=*/ true,
- /*return_pc_adjustment=*/0,
return_pc_location, save_vectors);
// The following is basically a call_VM. However, we need the precise
@@ -3367,7 +3351,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination
map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&frame_size_in_bytes,
/*generate_oop_map*/ true,
- /*return_pc_adjustment*/ 0,
RegisterSaver::return_pc_is_lr);
// Use noreg as last_Java_pc, the return pc will be reconstructed
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index 9d8ae770ccf..e77a2067e89 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -377,12 +377,20 @@ int LIR_Assembler::emit_deopt_handler() {
int offset = code_offset();
- __ auipc(ra, 0);
- __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ Label start;
+ __ bind(start);
+
+ __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ int entry_offset = __ offset();
+ __ j(start);
+
guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
+ assert(code_offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
index e4efb2c171d..ed2ab0c4861 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
@@ -72,7 +72,7 @@ private:
// See emit_exception_handler for detail
_exception_handler_size = DEBUG_ONLY(256) NOT_DEBUG(32), // or smaller
// See emit_deopt_handler for detail
- // auipc (1) + far_jump (2)
+ // far_call (2) + j (1)
_deopt_handler_size = 1 * MacroAssembler::instruction_size +
2 * MacroAssembler::instruction_size
};
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
index d990cfbc50d..b28e33759b2 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
@@ -311,12 +311,19 @@ inline bool NativeInstruction::is_jump_or_nop() {
// can store an offset from the initial nop to the nmethod.
class NativePostCallNop: public NativeInstruction {
public:
+ enum RISCV_specific_constants {
+ // The two parts should be checked separately to prevent out of bounds access in
+ // case the return address points to the deopt handler stub code entry point
+ // which could be at the end of page.
+ first_check_size = instruction_size
+ };
+
bool check() const {
// Check for two instructions: nop; lui zr, hi20
// These instructions only ever appear together in a post-call
// NOP, so it's unnecessary to check that the third instruction is
// an addiw as well.
- return is_nop() && MacroAssembler::is_lui_to_zr_at(addr_at(4));
+ return is_nop() && MacroAssembler::is_lui_to_zr_at(addr_at(first_check_size));
}
bool decode(int32_t& oopmap_slot, int32_t& cb_offset) const;
bool patch(int32_t oopmap_slot, int32_t cb_offset);
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 7acbb5a478b..3f5dd4ad0ee 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1049,15 +1049,10 @@ class HandlerImpl {
public:
- static int emit_exception_handler(C2_MacroAssembler *masm);
static int emit_deopt_handler(C2_MacroAssembler* masm);
- static uint size_exception_handler() {
- return MacroAssembler::far_branch_size();
- }
-
static uint size_deopt_handler() {
- // count auipc + far branch
+ // count far call + j
return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
}
};
@@ -1838,25 +1833,6 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
//=============================================================================
-// Emit exception handler code.
-int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
-{
- // auipc t1, #exception_blob_entry_point
- // jr (offset)t1
- // Note that the code buffer's insts_mark is always relative to insts.
- // That's why we must use the macroassembler to generate a handler.
- address base = __ start_a_stub(size_exception_handler());
- if (base == nullptr) {
- ciEnv::current()->record_failure("CodeCache is full");
- return 0; // CodeBuffer::expand failed
- }
- int offset = __ offset();
- __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
- __ end_a_stub();
- return offset;
-}
-
// Emit deopt handler code.
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
{
@@ -1867,12 +1843,19 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
}
int offset = __ offset();
- __ auipc(ra, 0);
- __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ Label start;
+ __ bind(start);
+
+ __ far_call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ int entry_offset = __ offset();
+ __ j(start);
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+ assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
// REQUIRED MATCHER CODE
@@ -2070,6 +2053,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
+bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
+ return false;
+}
+
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;
diff --git a/src/hotspot/cpu/riscv/runtime_riscv.cpp b/src/hotspot/cpu/riscv/runtime_riscv.cpp
index e1add8dbb82..c52d5a31066 100644
--- a/src/hotspot/cpu/riscv/runtime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/runtime_riscv.cpp
@@ -249,8 +249,6 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
//------------------------------generate_exception_blob---------------------------
// creates exception blob at the end
-// Using exception blob, this code is jumped from a compiled method.
-// (see emit_exception_handler in riscv.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
index 16f2e5d8f5b..168a3a576d0 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -89,11 +89,12 @@ class VM_Version : public Abstract_VM_Version {
FLAG_SET_DEFAULT(flag, true); \
} else { \
FLAG_SET_DEFAULT(flag, false); \
- stringStream ss; \
- deps_string(ss, dep0, ##__VA_ARGS__); \
- warning("Cannot enable " #flag ", it's missing dependent extension(s) %s", ss.as_string(true)); \
/* Sync CPU features with flags */ \
disable_feature(); \
+ stringStream ss; \
+ ss.print("missing dependent extension(s): "); \
+ deps_string(ss, dep0, ##__VA_ARGS__); \
+ log_disabled(ss.as_string(true)); \
} \
} else { \
/* Sync CPU features with flags */ \
@@ -101,11 +102,12 @@ class VM_Version : public Abstract_VM_Version {
disable_feature(); \
} else if (!deps_all_enabled(dep0, ##__VA_ARGS__)) { \
FLAG_SET_DEFAULT(flag, false); \
- stringStream ss; \
- deps_string(ss, dep0, ##__VA_ARGS__); \
- warning("Cannot enable " #flag ", it's missing dependent extension(s) %s", ss.as_string(true)); \
/* Sync CPU features with flags */ \
disable_feature(); \
+ stringStream ss; \
+ ss.print("missing dependent extension(s): "); \
+ deps_string(ss, dep0, ##__VA_ARGS__); \
+ log_disabled(ss.as_string(true)); \
} \
} \
} \
@@ -136,6 +138,7 @@ class VM_Version : public Abstract_VM_Version {
RVExtFeatures::current()->clear_feature(_cpu_feature_index);
}
void log_enabled();
+ void log_disabled(const char* reason);
protected:
bool deps_all_enabled(RVExtFeatureValue* dep0, ...) {
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index 298234156c3..93d6051aa76 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -272,14 +272,27 @@ int LIR_Assembler::emit_deopt_handler() {
// Not enough space left for the handler.
bailout("deopt handler overflow");
return -1;
- } int offset = code_offset();
+ }
+
+ int offset = code_offset();
+
+ Label start;
+ __ bind(start);
+
// Size must be constant (see HandlerImpl::emit_deopt_handler).
__ load_const(Z_R1_scratch, SharedRuntime::deopt_blob()->unpack());
__ call(Z_R1_scratch);
+
+ int entry_offset = __ offset();
+
+ __ z_bru(start);
+
guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
+ assert(code_offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
void LIR_Assembler::jobject2reg(jobject o, Register reg) {
diff --git a/src/hotspot/cpu/s390/nativeInst_s390.hpp b/src/hotspot/cpu/s390/nativeInst_s390.hpp
index 16400df3f26..9852bc410b1 100644
--- a/src/hotspot/cpu/s390/nativeInst_s390.hpp
+++ b/src/hotspot/cpu/s390/nativeInst_s390.hpp
@@ -649,6 +649,13 @@ class NativeGeneralJump: public NativeInstruction {
class NativePostCallNop: public NativeInstruction {
public:
+ enum z_specific_constants {
+ // Once the check is implemented, this has to specify number of bytes checked on the first
+ // read. If the check would read beyond size of the instruction at the deopt handler stub
+ // code entry point, then it has to happen in two stages - to prevent out of bounds access
+ // in case the return address points to the entry point which could be at the end of page.
+ first_check_size = 0 // check is unimplemented
+ };
bool check() const { Unimplemented(); return false; }
bool decode(int32_t& oopmap_slot, int32_t& cb_offset) const { return false; }
bool patch(int32_t oopmap_slot, int32_t cb_offset) { Unimplemented(); return false; }
diff --git a/src/hotspot/cpu/s390/runtime_s390.cpp b/src/hotspot/cpu/s390/runtime_s390.cpp
index 314c407af91..658fba069b4 100644
--- a/src/hotspot/cpu/s390/runtime_s390.cpp
+++ b/src/hotspot/cpu/s390/runtime_s390.cpp
@@ -43,8 +43,6 @@
//------------------------------generate_exception_blob---------------------------
// creates exception blob at the end
-// Using exception blob, this code is jumped from a compiled method.
-// (see emit_exception_handler in s390.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index cab3965ecfa..7d3e963a108 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1649,15 +1649,10 @@ source_hpp %{ // Header information of the source block.
class HandlerImpl {
public:
- static int emit_exception_handler(C2_MacroAssembler *masm);
static int emit_deopt_handler(C2_MacroAssembler* masm);
- static uint size_exception_handler() {
- return NativeJump::max_instruction_size();
- }
-
static uint size_deopt_handler() {
- return NativeCall::max_instruction_size();
+ return NativeCall::max_instruction_size() + MacroAssembler::jump_pcrelative_size();
}
};
@@ -1672,43 +1667,6 @@ public:
source %{
-// This exception handler code snippet is placed after the method's
-// code. It is the return point if an exception occurred. it jumps to
-// the exception blob.
-//
-// If the method gets deoptimized, the method and this code snippet
-// get patched.
-//
-// 1) Trampoline code gets patched into the end of this exception
-// handler. the trampoline code jumps to the deoptimization blob.
-//
-// 2) The return address in the method's code will get patched such
-// that it jumps to the trampoline.
-//
-// 3) The handler will get patched such that it does not jump to the
-// exception blob, but to an entry in the deoptimization blob being
-// aware of the exception.
-int HandlerImpl::emit_exception_handler(C2_MacroAssembler *masm) {
- Register temp_reg = Z_R1;
-
- address base = __ start_a_stub(size_exception_handler());
- if (base == nullptr) {
- ciEnv::current()->record_failure("CodeCache is full");
- return 0; // CodeBuffer::expand failed
- }
-
- int offset = __ offset();
- // Use unconditional pc-relative jump with 32-bit range here.
- __ load_const_optimized(temp_reg, (address)OptoRuntime::exception_blob()->content_begin());
- __ z_br(temp_reg);
-
- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
-
- __ end_a_stub();
-
- return offset;
-}
-
// Emit deopt handler code.
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
address base = __ start_a_stub(size_deopt_handler());
@@ -1720,14 +1678,24 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
int offset = __ offset();
+ Label start;
+ __ bind(start);
+
// Size_deopt_handler() must be exact on zarch, so for simplicity
// we do not use load_const_opt here.
__ load_const(Z_R1, SharedRuntime::deopt_blob()->unpack());
__ call(Z_R1);
+
+ int entry_offset = __ offset();
+
+ __ z_bru(start);
+
assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
+ assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
//=============================================================================
@@ -1897,6 +1865,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
+bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
+ return false;
+}
+
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;
diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
index a3605f649cc..5b6f7dcd984 100644
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@@ -2544,14 +2544,10 @@ void SharedRuntime::generate_deopt_blob() {
// Normal entry (non-exception case)
//
// We have been called from the deopt handler of the deoptee.
- // Z_R14 points behind the call in the deopt handler. We adjust
- // it such that it points to the start of the deopt handler.
+ // Z_R14 points to the entry point of the deopt handler.
// The return_pc has been stored in the frame of the deoptee and
// will replace the address of the deopt_handler in the call
// to Deoptimization::fetch_unroll_info below.
- // The (int) cast is necessary, because -((unsigned int)14)
- // is an unsigned int.
- __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size());
const Register exec_mode_reg = Z_tmp_1;
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index e3ba0ebb56a..cbc5c6988d4 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -3860,6 +3860,46 @@ void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool mer
emit_operand(src, dst, 0);
}
+void Assembler::vmovsldup(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x12, (0xC0 | encode));
+}
+
+void Assembler::vmovshdup(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x16, (0xC0 | encode));
+}
+
+void Assembler::evmovsldup(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x12, (0xC0 | encode));
+}
+
+void Assembler::evmovshdup(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x16, (0xC0 | encode));
+}
+
// Uses zero extension on 64bit
void Assembler::movl(Register dst, int32_t imm32) {
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index c863191df4c..43471a88391 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1664,6 +1664,11 @@ private:
void evmovdqaq(XMMRegister dst, Address src, int vector_len);
void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void vmovsldup(XMMRegister dst, XMMRegister src, int vector_len);
+ void vmovshdup(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovsldup(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovshdup(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+
// Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src);
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index edeb0baea0e..a2ea7af606d 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -450,14 +450,22 @@ int LIR_Assembler::emit_deopt_handler() {
}
int offset = code_offset();
- InternalAddress here(__ pc());
- __ pushptr(here.addr(), rscratch1);
- __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ Label start;
+ __ bind(start);
+
+ __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ int entry_offset = __ offset();
+
+ __ jmp(start);
+
guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
+ assert(code_offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp
index 8524dc90276..33f7b063e77 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp
@@ -48,7 +48,7 @@
enum {
_call_stub_size = 28,
_exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
- _deopt_handler_size = 17
+ _deopt_handler_size = 7
};
public:
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
index 586135fcebc..34de9403ccf 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
@@ -89,10 +89,10 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
- Label done;
+ Label L_done;
__ testptr(count, count);
- __ jcc(Assembler::zero, done);
+ __ jccb(Assembler::zero, L_done);
// Calculate end address in "count".
Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
@@ -111,31 +111,31 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ shrptr(count, CardTable::card_shift());
__ addptr(count, tmp);
- Label loop;
+ Label L_loop;
// Iterate from start card to end card (inclusive).
- __ bind(loop);
+ __ bind(L_loop);
- Label is_clean_card;
+ Label L_is_clean_card;
if (UseCondCardMark) {
__ cmpb(Address(addr, 0), G1CardTable::clean_card_val());
- __ jcc(Assembler::equal, is_clean_card);
+ __ jccb(Assembler::equal, L_is_clean_card);
} else {
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
}
- Label next_card;
- __ bind(next_card);
+ Label L_next_card;
+ __ bind(L_next_card);
__ addptr(addr, sizeof(CardTable::CardValue));
__ cmpptr(addr, count);
- __ jcc(Assembler::belowEqual, loop);
- __ jmp(done);
+ __ jccb(Assembler::belowEqual, L_loop);
+ __ jmpb(L_done);
- __ bind(is_clean_card);
- // Card was clean. Dirty card and go to next..
+ __ bind(L_is_clean_card);
+ // Card was clean. Dirty card and go to next.
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
- __ jmp(next_card);
+ __ jmpb(L_next_card);
- __ bind(done);
+ __ bind(L_done);
}
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -157,22 +157,6 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
}
}
-static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
- const Register thread, const Register value, const Register temp) {
- // This code assumes that buffer index is pointer sized.
- STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
- // Can we store a value in the given thread's buffer?
- // (The index field is typed as size_t.)
- __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address)
- __ testptr(temp, temp); // index == 0?
- __ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer)
- // The buffer is not full, store value into it.
- __ subptr(temp, wordSize); // temp := next index
- __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index
- __ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index
- __ movptr(Address(temp, 0), value); // *(buffer address + next index) := value
-}
-
static void generate_pre_barrier_fast_path(MacroAssembler* masm,
const Register thread) {
Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
@@ -190,21 +174,40 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm,
const Register pre_val,
const Register thread,
const Register tmp,
- Label& done,
- Label& runtime) {
+ Label& L_done) {
+ Address index_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+ // This code assumes that buffer index is pointer sized.
+ STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
+
+ Label L_runtime;
+
// Do we need to load the previous value?
if (obj != noreg) {
__ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW);
}
+
// Is the previous value null?
- __ cmpptr(pre_val, NULL_WORD);
- __ jcc(Assembler::equal, done);
- generate_queue_insertion(masm,
- G1ThreadLocalData::satb_mark_queue_index_offset(),
- G1ThreadLocalData::satb_mark_queue_buffer_offset(),
- runtime,
- thread, pre_val, tmp);
- __ jmp(done);
+ __ testptr(pre_val, pre_val);
+ __ jcc(Assembler::equal, L_done);
+
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ movptr(tmp, index_addr); // temp := *(index address)
+ __ testptr(tmp, tmp); // index == 0?
+ __ jccb(Assembler::zero, L_runtime); // jump to runtime if index == 0 (full buffer)
+
+ // The buffer is not full, store value into it.
+ __ subptr(tmp, wordSize); // temp := next index
+ __ movptr(index_addr, tmp); // *(index address) := next index
+ __ addptr(tmp, buffer_addr); // temp := buffer address + next index
+ __ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value
+
+ // Jump out if done, or fall-through to runtime.
+ // "L_done" is far away, so jump cannot be short.
+ __ jmp(L_done);
+ __ bind(L_runtime);
}
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
@@ -219,7 +222,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
const Register thread = r15_thread;
Label done;
- Label runtime;
assert(pre_val != noreg, "check this code");
@@ -231,9 +233,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
generate_pre_barrier_fast_path(masm, thread);
// If marking is not active (*(mark queue active address) == 0), jump to done
__ jcc(Assembler::equal, done);
- generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime);
-
- __ bind(runtime);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done);
// Determine and save the live input values
__ push_call_clobbered_registers();
@@ -272,23 +272,23 @@ static void generate_post_barrier(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register tmp1,
- Label& done,
bool new_val_may_be_null) {
assert_different_registers(store_addr, new_val, tmp1, noreg);
Register thread = r15_thread;
+ Label L_done;
// Does store cross heap regions?
__ movptr(tmp1, store_addr); // tmp1 := store address
__ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
__ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
- __ jcc(Assembler::equal, done);
+ __ jccb(Assembler::equal, L_done);
// Crosses regions, storing null?
if (new_val_may_be_null) {
- __ cmpptr(new_val, NULL_WORD); // new value == null?
- __ jcc(Assembler::equal, done);
+ __ testptr(new_val, new_val); // new value == null?
+ __ jccb(Assembler::equal, L_done);
}
__ movptr(tmp1, store_addr); // tmp1 := store address
@@ -298,20 +298,19 @@ static void generate_post_barrier(MacroAssembler* masm,
__ addptr(tmp1, card_table_addr); // tmp1 := card address
if (UseCondCardMark) {
__ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
- __ jcc(Assembler::notEqual, done);
+ __ jccb(Assembler::notEqual, L_done);
}
// Storing a region crossing, non-null oop, card is clean.
// Dirty card.
__ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
+ __ bind(L_done);
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp) {
- Label done;
- generate_post_barrier(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
- __ bind(done);
+ generate_post_barrier(masm, store_addr, new_val, tmp, true /* new_val_may_be_null */);
}
#if defined(COMPILER2)
@@ -354,7 +353,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
G1PreBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
Register obj = stub->obj();
Register pre_val = stub->pre_val();
Register thread = stub->thread();
@@ -362,9 +360,8 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
assert(stub->tmp2() == noreg, "not needed in this platform");
__ bind(*stub->entry());
- generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation());
- __ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
__ jmp(*stub->continuation());
}
@@ -374,9 +371,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register new_val,
Register tmp,
bool new_val_may_be_null) {
- Label done;
- generate_post_barrier(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
- __ bind(done);
+ generate_post_barrier(masm, store_addr, new_val, tmp, new_val_may_be_null);
}
#endif // COMPILER2
@@ -449,7 +444,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
}
- __ cmpptr(pre_val_reg, NULL_WORD);
+ __ testptr(pre_val_reg, pre_val_reg);
__ jcc(Assembler::equal, *stub->continuation());
ce->store_parameter(stub->pre_val()->as_register(), 0);
__ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
@@ -465,9 +460,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2 /* unused on x86 */) {
- Label done;
- generate_post_barrier(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
- masm->bind(done);
+ generate_post_barrier(masm, store_addr, new_val, tmp1, true /* new_val_may_be_null */);
}
#define __ sasm->
@@ -490,8 +483,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
- Label done;
- Label runtime;
+ Label L_done, L_runtime;
// Is marking still active?
if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
@@ -500,13 +492,13 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
__ cmpb(queue_active, 0);
}
- __ jcc(Assembler::equal, done);
+ __ jcc(Assembler::equal, L_done);
// Can we store original value in the thread's buffer?
__ movptr(tmp, queue_index);
__ testptr(tmp, tmp);
- __ jcc(Assembler::zero, runtime);
+ __ jccb(Assembler::zero, L_runtime);
__ subptr(tmp, wordSize);
__ movptr(queue_index, tmp);
__ addptr(tmp, buffer);
@@ -514,9 +506,9 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
// prev_val (rax)
__ load_parameter(0, pre_val);
__ movptr(Address(tmp, 0), pre_val);
- __ jmp(done);
+ __ jmp(L_done);
- __ bind(runtime);
+ __ bind(L_runtime);
__ push_call_clobbered_registers();
@@ -526,7 +518,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ pop_call_clobbered_registers();
- __ bind(done);
+ __ bind(L_done);
__ pop_ppx(rdx);
__ pop_ppx(rax);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 4cecaa55345..695eea6ad03 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1368,6 +1368,7 @@ public:
void vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ using Assembler::evpcmpeqd;
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
// Vector compares
diff --git a/src/hotspot/cpu/x86/nativeInst_x86.hpp b/src/hotspot/cpu/x86/nativeInst_x86.hpp
index 3e767006480..ec7fc3b154a 100644
--- a/src/hotspot/cpu/x86/nativeInst_x86.hpp
+++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp
@@ -73,6 +73,7 @@ class NativeInstruction {
s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); }
u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); }
+ jshort short_at(int offset) const { return *(jshort*) addr_at(offset); }
jint int_at(int offset) const { return *(jint*) addr_at(offset); }
intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); }
@@ -578,10 +579,15 @@ public:
instruction_code = 0x0f,
instruction_size = 8,
instruction_offset = 0,
- displacement_offset = 4
+ displacement_offset = 4,
+
+ // The two parts should be checked separately to prevent out of bounds access in case
+ // the return address points to the deopt handler stub code entry point which could be
+ // at the end of page.
+ first_check_size = 2
};
- bool check() const { return int_at(0) == 0x841f0f; }
+ bool check() const { return short_at(0) == 0x1f0f && short_at(first_check_size) == 0x0084; }
bool decode(int32_t& oopmap_slot, int32_t& cb_offset) const {
int32_t data = int_at(displacement_offset);
if (data == 0) {
diff --git a/src/hotspot/cpu/x86/runtime_x86_64.cpp b/src/hotspot/cpu/x86/runtime_x86_64.cpp
index 7b98cf4fad7..5bf65299a0c 100644
--- a/src/hotspot/cpu/x86/runtime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/runtime_x86_64.cpp
@@ -242,8 +242,6 @@ UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
//------------------------------generate_exception_blob---------------------------
// creates exception blob at the end
-// Using exception blob, this code is jumped from a compiled method.
-// (see emit_exception_handler in x86_64.ad file)
//
// Given an exception pc at a call we call into the runtime for the
// handler in this method. This handler might merely restore state
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp
index 9555d60c8a4..b9590939468 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2025, Intel Corporation. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,8 +31,6 @@
#define __ _masm->
-#define xmm(i) as_XMMRegister(i)
-
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#else
@@ -40,15 +39,13 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-#define XMMBYTES 64
-
// Constants
//
ATTRIBUTE_ALIGNED(64) static const uint32_t dilithiumAvx512Consts[] = {
58728449, // montQInvModR
- 8380417, // dilithium_q
- 2365951, // montRSquareModQ
- 5373807 // Barrett addend for modular reduction
+ 8380417, // dilithium_q
+ 2365951, // montRSquareModQ
+ 5373807 // Barrett addend for modular reduction
};
const int montQInvModRIdx = 0;
@@ -60,392 +57,590 @@ static address dilithiumAvx512ConstsAddr(int offset) {
return ((address) dilithiumAvx512Consts) + offset;
}
-const Register scratch = r10;
-const XMMRegister montMulPerm = xmm28;
-const XMMRegister montQInvModR = xmm30;
-const XMMRegister dilithium_q = xmm31;
+ATTRIBUTE_ALIGNED(64) static const uint32_t unshufflePerms[] = {
+ // Shuffle for the 128-bit element swap (uint64_t)
+ 0, 0, 1, 0, 8, 0, 9, 0, 4, 0, 5, 0, 12, 0, 13, 0,
+ 10, 0, 11, 0, 2, 0, 3, 0, 14, 0, 15, 0, 6, 0, 7, 0,
+ // Final shuffle for AlmostNtt
+ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
+ 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15,
-ATTRIBUTE_ALIGNED(64) static const uint32_t dilithiumAvx512Perms[] = {
- // collect montmul results into the destination register
- 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15,
- // ntt
- // level 4
- 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23,
- 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31,
- // level 5
- 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27,
- 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31,
- // level 6
- 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29,
- 2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31,
- // level 7
- 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30,
- 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31,
- 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
- 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
-
- // ntt inverse
- // level 0
- 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
- 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
- // level 1
- 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30,
- 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31,
- // level 2
- 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29,
- 2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31,
- // level 3
- 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27,
- 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31,
- // level 4
- 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23,
- 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
+ // Initial shuffle for AlmostInverseNtt
+ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+ 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15
};
-const int montMulPermsIdx = 0;
-const int nttL4PermsIdx = 64;
-const int nttL5PermsIdx = 192;
-const int nttL6PermsIdx = 320;
-const int nttL7PermsIdx = 448;
-const int nttInvL0PermsIdx = 704;
-const int nttInvL1PermsIdx = 832;
-const int nttInvL2PermsIdx = 960;
-const int nttInvL3PermsIdx = 1088;
-const int nttInvL4PermsIdx = 1216;
-
-static address dilithiumAvx512PermsAddr() {
- return (address) dilithiumAvx512Perms;
+static address unshufflePermsAddr(int offset) {
+ return ((address) unshufflePerms) + offset*64;
}
-// We do Montgomery multiplications of two vectors of 16 ints each in 4 steps:
-// 1. Do the multiplications of the corresponding even numbered slots into
-// the odd numbered slots of a third register.
-// 2. Swap the even and odd numbered slots of the original input registers.
-// 3. Similar to step 1, but into a different output register.
-// 4. Combine the outputs of step 1 and step 3 into the output of the Montgomery
-// multiplication.
-// (For levels 0-6 in the Ntt and levels 1-7 of the inverse Ntt we only swap the
-// odd-even slots of the first multiplicand as in the second (zetas) the
-// odd slots contain the same number as the corresponding even one.)
-// The indexes of the registers to be multiplied
-// are in inputRegs1[] and inputRegs[2].
-// The results go to the registers whose indexes are in outputRegs.
-// scratchRegs should contain 12 different register indexes.
-// The set in outputRegs should not overlap with the set of the middle four
-// scratch registers.
-// The sets in inputRegs1 and inputRegs2 cannot overlap with the set of the
-// first eight scratch registers.
-// In most of the cases, the odd and the corresponding even slices of the
-// registers indexed by the numbers in inputRegs2 will contain the same number,
-// this should be indicated by calling this function with
-// input2NeedsShuffle=false .
+// The following function swaps elements A<->B, C<->D, and so forth.
+// input1[] is shuffled in place; shuffle of input2[] is copied to output2[].
+// Element size (in bits) is specified by size parameter.
+// +-----+-----+-----+-----+-----
+// | | A | | C | ...
+// +-----+-----+-----+-----+-----
+// +-----+-----+-----+-----+-----
+// | B | | D | | ...
+// +-----+-----+-----+-----+-----
//
-static void montMul64(int outputRegs[], int inputRegs1[], int inputRegs2[],
- int scratchRegs[], bool input2NeedsShuffle,
- MacroAssembler *_masm) {
+// NOTE: size 0 and 1 are used for initial and final shuffles respectively of
+// dilithiumAlmostInverseNtt and dilithiumAlmostNtt. For size 0 and 1, input1[]
+// and input2[] are modified in-place (and output2 is used as a temporary)
+//
+// Using C++ lambdas for improved readability (to hide parameters that always repeat)
+static auto whole_shuffle(Register scratch, KRegister mergeMask1, KRegister mergeMask2,
+ const XMMRegister unshuffle1, const XMMRegister unshuffle2, int vector_len, MacroAssembler *_masm) {
- for (int i = 0; i < 4; i++) {
- __ vpmuldq(xmm(scratchRegs[i]), xmm(inputRegs1[i]), xmm(inputRegs2[i]),
- Assembler::AVX_512bit);
- }
- for (int i = 0; i < 4; i++) {
- __ vpmulld(xmm(scratchRegs[i + 4]), xmm(scratchRegs[i]), montQInvModR,
- Assembler::AVX_512bit);
- }
- for (int i = 0; i < 4; i++) {
- __ vpmuldq(xmm(scratchRegs[i + 4]), xmm(scratchRegs[i + 4]), dilithium_q,
- Assembler::AVX_512bit);
- }
- for (int i = 0; i < 4; i++) {
- __ evpsubd(xmm(scratchRegs[i + 4]), k0, xmm(scratchRegs[i]),
- xmm(scratchRegs[i + 4]), false, Assembler::AVX_512bit);
+ int regCnt = 4;
+ if (vector_len == Assembler::AVX_256bit) {
+ regCnt = 2;
}
- for (int i = 0; i < 4; i++) {
- __ vpshufd(xmm(inputRegs1[i]), xmm(inputRegs1[i]), 0xB1,
- Assembler::AVX_512bit);
- if (input2NeedsShuffle) {
- __ vpshufd(xmm(inputRegs2[i]), xmm(inputRegs2[i]), 0xB1,
- Assembler::AVX_512bit);
+ return [=](const XMMRegister output2[], const XMMRegister input1[],
+ const XMMRegister input2[], int size) {
+ if (vector_len == Assembler::AVX_256bit) {
+ switch (size) {
+ case 128:
+ for (int i = 0; i < regCnt; i++) {
+ __ vperm2i128(output2[i], input1[i], input2[i], 0b110001);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vinserti128(input1[i], input1[i], input2[i], 1);
+ }
+ break;
+ case 64:
+ for (int i = 0; i < regCnt; i++) {
+ __ vshufpd(output2[i], input1[i], input2[i], 0b11111111, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vshufpd(input1[i], input1[i], input2[i], 0b00000000, vector_len);
+ }
+ break;
+ case 32:
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovshdup(output2[i], input1[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vpblendd(output2[i], output2[i], input2[i], 0b10101010, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovsldup(input2[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vpblendd(input1[i], input1[i], input2[i], 0b10101010, vector_len);
+ }
+ break;
+ // Special cases
+ case 1: // initial shuffle for dilithiumAlmostInverseNtt
+ // shuffle all even 32bit columns to input1, and odd to input2
+ for (int i = 0; i < regCnt; i++) {
+ // 0b-3-1-3-1
+ __ vshufps(output2[i], input1[i], input2[i], 0b11011101, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ // 0b-2-0-2-0
+ __ vshufps(input1[i], input1[i], input2[i], 0b10001000, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vpermq(input2[i], output2[i], 0b11011000, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ // 0b-3-1-2-0
+ __ vpermq(input1[i], input1[i], 0b11011000, vector_len);
+ }
+ break;
+ case 0: // final unshuffle for dilithiumAlmostNtt
+ // reverse case 1: all even are in input1 and odd in input2, put back
+ for (int i = 0; i < regCnt; i++) {
+ __ vpunpckhdq(output2[i], input1[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vpunpckldq(input1[i], input1[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vperm2i128(input2[i], input1[i], output2[i], 0b110001);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vinserti128(input1[i], input1[i], output2[i], 1);
+ }
+ break;
+ default:
+ assert(false, "Don't call here");
+ }
+ } else {
+ switch (size) {
+ case 256:
+ for (int i = 0; i < regCnt; i++) {
+ // 0b-3-2-3-2
+ __ evshufi64x2(output2[i], input1[i], input2[i], 0b11101110, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vinserti64x4(input1[i], input1[i], input2[i], 1);
+ }
+ break;
+ case 128:
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovdqu(output2[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evpermt2q(output2[i], unshuffle2, input1[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evpermt2q(input1[i], unshuffle1, input2[i], vector_len);
+ }
+
+ break;
+ case 64:
+ for (int i = 0; i < regCnt; i++) {
+ __ vshufpd(output2[i], input1[i], input2[i], 0b11111111, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vshufpd(input1[i], input1[i], input2[i], 0b00000000, vector_len);
+ }
+ break;
+ case 32:
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovdqu(output2[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evmovshdup(output2[i], mergeMask2, input1[i], true, vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evmovsldup(input1[i], mergeMask1, input2[i], true, vector_len);
+ }
+ break;
+ // Special cases
+ case 1: // initial shuffle for dilithiumAlmostInverseNtt
+ // shuffle all even 32bit columns to input1, and odd to input2
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovdqu(output2[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evpermt2d(input2[i], unshuffle2, input1[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evpermt2d(input1[i], unshuffle1, output2[i], vector_len);
+ }
+ break;
+ case 0: // final unshuffle for dilithiumAlmostNtt
+ // reverse case 1: all even are in input1 and odd in input2, put back
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovdqu(output2[i], input2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evpermt2d(input2[i], unshuffle2, input1[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ evpermt2d(input1[i], unshuffle1, output2[i], vector_len);
+ }
+ break;
+ default:
+ assert(false, "Don't call here");
+ }
}
- }
-
- for (int i = 0; i < 4; i++) {
- __ vpmuldq(xmm(scratchRegs[i]), xmm(inputRegs1[i]), xmm(inputRegs2[i]),
- Assembler::AVX_512bit);
- }
- for (int i = 0; i < 4; i++) {
- __ vpmulld(xmm(scratchRegs[i + 8]), xmm(scratchRegs[i]), montQInvModR,
- Assembler::AVX_512bit);
- }
- for (int i = 0; i < 4; i++) {
- __ vpmuldq(xmm(scratchRegs[i + 8]), xmm(scratchRegs[i + 8]), dilithium_q,
- Assembler::AVX_512bit);
- }
- for (int i = 0; i < 4; i++) {
- __ evpsubd(xmm(outputRegs[i]), k0, xmm(scratchRegs[i]),
- xmm(scratchRegs[i + 8]), false, Assembler::AVX_512bit);
- }
-
- for (int i = 0; i < 4; i++) {
- __ evpermt2d(xmm(outputRegs[i]), montMulPerm, xmm(scratchRegs[i + 4]),
- Assembler::AVX_512bit);
- }
+ }; // return
}
-static void montMul64(int outputRegs[], int inputRegs1[], int inputRegs2[],
- int scratchRegs[], MacroAssembler *_masm) {
- montMul64(outputRegs, inputRegs1, inputRegs2, scratchRegs, false, _masm);
-}
-
-static void sub_add(int subResult[], int addResult[],
- int input1[], int input2[], MacroAssembler *_masm) {
-
- for (int i = 0; i < 4; i++) {
- __ evpsubd(xmm(subResult[i]), k0, xmm(input1[i]), xmm(input2[i]), false,
- Assembler::AVX_512bit);
+// We do Montgomery multiplications of two AVX registers in 4 steps:
+// 1. Do the multiplications of the corresponding even numbered slots into
+// the odd numbered slots of the scratch2 register.
+// 2. Swap the even and odd numbered slots of the original input registers.(*Note)
+// 3. Similar to step 1, but multiplication result is placed into output register.
+// 4. Combine odd/even slots respectively from the scratch2 and output registers
+// into the output register for the final result of the Montgomery multiplication.
+// (*Note: For levels 0-6 in the Ntt and levels 1-7 of the inverse Ntt, need NOT
+// swap the second operand (zetas) since the odd slots contain the same number
+// as the corresponding even one. This is indicated by input2NeedsShuffle=false)
+//
+// The registers to be multiplied are in input1[] and inputs2[]. The results go
+// into output[]. Two scratch[] register arrays are expected. input1[] can
+// overlap with either output[] or scratch1[]
+// - If AVX512, all register arrays are of length 4
+// - If AVX2, first two registers of each array are in xmm0-xmm15 range
+// Constants montQInvModR, dilithium_q and mergeMask expected to have already
+// been loaded.
+//
+// Using C++ lambdas for improved readability (to hide parameters that always repeat)
+static auto whole_montMul(XMMRegister montQInvModR, XMMRegister dilithium_q,
+ KRegister mergeMask, int vector_len, MacroAssembler *_masm) {
+ int regCnt = 4;
+ int regSize = 64;
+ if (vector_len == Assembler::AVX_256bit) {
+ regCnt = 2;
+ regSize = 32;
}
- for (int i = 0; i < 4; i++) {
- __ evpaddd(xmm(addResult[i]), k0, xmm(input1[i]), xmm(input2[i]), false,
- Assembler::AVX_512bit);
- }
-}
+ return [=](const XMMRegister output[], const XMMRegister input1[],
+ const XMMRegister input2[], const XMMRegister scratch1[],
+ const XMMRegister scratch2[], bool input2NeedsShuffle = false) {
+ // (Register overloading) Can't always use scratch1 (could override input1).
+ // If so, use output:
+ const XMMRegister* scratch = scratch1 == input1 ? output: scratch1;
-static void loadPerm(int destinationRegs[], Register perms,
- int offset, MacroAssembler *_masm) {
- __ evmovdqul(xmm(destinationRegs[0]), Address(perms, offset),
- Assembler::AVX_512bit);
- for (int i = 1; i < 4; i++) {
- __ evmovdqul(xmm(destinationRegs[i]), xmm(destinationRegs[0]),
- Assembler::AVX_512bit);
+ // scratch = input1_even * intput2_even
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmuldq(scratch[i], input1[i], input2[i], vector_len);
}
+
+ // scratch2_low = scratch_low * montQInvModR
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmuldq(scratch2[i], scratch[i], montQInvModR, vector_len);
+ }
+
+ // scratch2 = scratch2_low * dilithium_q
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmuldq(scratch2[i], scratch2[i], dilithium_q, vector_len);
+ }
+
+ // scratch2_high = scratch2_high - scratch_high
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(scratch2[i], scratch[i], scratch2[i], vector_len);
+ }
+
+ // input1_even = input1_odd
+ // input2_even = input2_odd
+ for (int i = 0; i < regCnt; i++) {
+ __ vpshufd(input1[i], input1[i], 0xB1, vector_len);
+ if (input2NeedsShuffle) {
+ __ vpshufd(input2[i], input2[i], 0xB1, vector_len);
+ }
+ }
+
+ // scratch1 = input1_even*intput2_even
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmuldq(scratch1[i], input1[i], input2[i], vector_len);
+ }
+
+ // output = scratch1_low * montQInvModR
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmuldq(output[i], scratch1[i], montQInvModR, vector_len);
+ }
+
+ // output = output * dilithium_q
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmuldq(output[i], output[i], dilithium_q, vector_len);
+ }
+
+ // output_high = scratch1_high - output_high
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(output[i], scratch1[i], output[i], vector_len);
+ }
+
+ // output = select(output_high, scratch2_high)
+ if (vector_len == Assembler::AVX_256bit) {
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovshdup(scratch2[i], scratch2[i], vector_len);
+ }
+ for (int i = 0; i < regCnt; i++) {
+ __ vpblendd(output[i], output[i], scratch2[i], 0b01010101, vector_len);
+ }
+ } else {
+ for (int i = 0; i < regCnt; i++) {
+ __ evmovshdup(output[i], mergeMask, scratch2[i], true, vector_len);
+ }
+ }
+ }; // return
}
-static void load4Xmms(int destinationRegs[], Register source, int offset,
- MacroAssembler *_masm) {
- for (int i = 0; i < 4; i++) {
- __ evmovdqul(xmm(destinationRegs[i]), Address(source, offset + i * XMMBYTES),
- Assembler::AVX_512bit);
+static void sub_add(const XMMRegister subResult[], const XMMRegister addResult[],
+ const XMMRegister input1[], const XMMRegister input2[],
+ int vector_len, MacroAssembler *_masm) {
+ int regCnt = 4;
+ if (vector_len == Assembler::AVX_256bit) {
+ regCnt = 2;
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(subResult[i], input1[i], input2[i], vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpaddd(addResult[i], input1[i], input2[i], vector_len);
}
}
-static void loadXmm29(Register source, int offset, MacroAssembler *_masm) {
- __ evmovdqul(xmm29, Address(source, offset), Assembler::AVX_512bit);
-}
+static void loadXmms(const XMMRegister destinationRegs[], Register source, int offset,
+ int vector_len, MacroAssembler *_masm, int regCnt = -1, int memStep = -1) {
-static void store4Xmms(Register destination, int offset, int xmmRegs[],
- MacroAssembler *_masm) {
- for (int i = 0; i < 4; i++) {
- __ evmovdqul(Address(destination, offset + i * XMMBYTES), xmm(xmmRegs[i]),
- Assembler::AVX_512bit);
+ if (vector_len == Assembler::AVX_256bit) {
+ regCnt = regCnt == -1 ? 2 : regCnt;
+ memStep = memStep == -1 ? 32 : memStep;
+ } else {
+ regCnt = 4;
+ memStep = 64;
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovdqu(destinationRegs[i], Address(source, offset + i * memStep), vector_len);
}
}
-static int xmm0_3[] = {0, 1, 2, 3};
-static int xmm0145[] = {0, 1, 4, 5};
-static int xmm0246[] = {0, 2, 4, 6};
-static int xmm0426[] = {0, 4, 2, 6};
-static int xmm1357[] = {1, 3, 5, 7};
-static int xmm1537[] = {1, 5, 3, 7};
-static int xmm2367[] = {2, 3, 6, 7};
-static int xmm4_7[] = {4, 5, 6, 7};
-static int xmm8_11[] = {8, 9, 10, 11};
-static int xmm12_15[] = {12, 13, 14, 15};
-static int xmm16_19[] = {16, 17, 18, 19};
-static int xmm20_23[] = {20, 21, 22, 23};
-static int xmm20222426[] = {20, 22, 24, 26};
-static int xmm21232527[] = {21, 23, 25, 27};
-static int xmm24_27[] = {24, 25, 26, 27};
-static int xmm4_20_24[] = {4, 5, 6, 7, 20, 21, 22, 23, 24, 25, 26, 27};
-static int xmm16_27[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
-static int xmm29_29[] = {29, 29, 29, 29};
+static void storeXmms(Register destination, int offset, const XMMRegister xmmRegs[],
+ int vector_len, MacroAssembler *_masm, int regCnt = -1, int memStep = -1) {
+ if (vector_len == Assembler::AVX_256bit) {
+ regCnt = regCnt == -1 ? 2 : regCnt;
+ memStep = memStep == -1 ? 32 : memStep;
+ } else {
+ regCnt = 4;
+ memStep = 64;
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vmovdqu(Address(destination, offset + i * memStep), xmmRegs[i], vector_len);
+ }
+}
// Dilithium NTT function except for the final "normalization" to |coeff| < Q.
// Implements
// static int implDilithiumAlmostNtt(int[] coeffs, int zetas[]) {}
//
// coeffs (int[256]) = c_rarg0
-// zetas (int[256]) = c_rarg1
+// zetas (int[128*8]) = c_rarg1
//
-//
-static address generate_dilithiumAlmostNtt_avx512(StubGenerator *stubgen,
- MacroAssembler *_masm) {
-
+static address generate_dilithiumAlmostNtt_avx(StubGenerator *stubgen,
+ int vector_len, MacroAssembler *_masm) {
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumAlmostNtt_id;
StubCodeMark mark(stubgen, stub_id);
address start = __ pc();
__ enter();
- Label L_loop, L_end;
-
const Register coeffs = c_rarg0;
const Register zetas = c_rarg1;
- const Register iterations = c_rarg2;
-
- const Register perms = r11;
-
- __ lea(perms, ExternalAddress(dilithiumAvx512PermsAddr()));
-
- __ evmovdqul(montMulPerm, Address(perms, montMulPermsIdx), Assembler::AVX_512bit);
+ const Register scratch = r10;
// Each level represents one iteration of the outer for loop of the Java version
// In each of these iterations half of the coefficients are (Montgomery)
// multiplied by a zeta corresponding to the coefficient and then these
// products will be added to and subtracted from the other half of the
- // coefficients. In each level we just collect the coefficients (using
- // evpermi2d() instructions where necessary, i.e. in levels 4-7) that need to
+ // coefficients. In each level we just shuffle the coefficients that need to
// be multiplied by the zetas in one set, the rest to another set of vector
// registers, then redistribute the addition/substraction results.
// For levels 0 and 1 the zetas are not different within the 4 xmm registers
- // that we would use for them, so we use only one, xmm29.
- loadXmm29(zetas, 0, _masm);
+ // that we would use for them, so we use only one register.
+
+ // AVX2 version uses the first half of these arrays
+ const XMMRegister Coeffs1[] = {xmm0, xmm1, xmm16, xmm17};
+ const XMMRegister Coeffs2[] = {xmm2, xmm3, xmm18, xmm19};
+ const XMMRegister Coeffs3[] = {xmm4, xmm5, xmm20, xmm21};
+ const XMMRegister Coeffs4[] = {xmm6, xmm7, xmm22, xmm23};
+ const XMMRegister Scratch1[] = {xmm8, xmm9, xmm24, xmm25};
+ const XMMRegister Scratch2[] = {xmm10, xmm11, xmm26, xmm27};
+ const XMMRegister Zetas1[] = {xmm12, xmm12, xmm12, xmm12};
+ const XMMRegister Zetas2[] = {xmm12, xmm12, xmm13, xmm13};
+ const XMMRegister Zetas3[] = {xmm12, xmm13, xmm28, xmm29};
+ const XMMRegister montQInvModR = xmm14;
+ const XMMRegister dilithium_q = xmm15;
+ const XMMRegister unshuffle1 = xmm30;
+ const XMMRegister unshuffle2 = xmm31;
+ KRegister mergeMask1 = k1;
+ KRegister mergeMask2 = k2;
+ // lambdas to hide repeated parameters
+ auto shuffle = whole_shuffle(scratch, mergeMask1, mergeMask2, unshuffle1, unshuffle2, vector_len, _masm);
+ auto montMul64 = whole_montMul(montQInvModR, dilithium_q, mergeMask2, vector_len, _masm);
+
__ vpbroadcastd(montQInvModR,
ExternalAddress(dilithiumAvx512ConstsAddr(montQInvModRIdx)),
- Assembler::AVX_512bit, scratch); // q^-1 mod 2^32
+ vector_len, scratch); // q^-1 mod 2^32
__ vpbroadcastd(dilithium_q,
ExternalAddress(dilithiumAvx512ConstsAddr(dilithium_qIdx)),
- Assembler::AVX_512bit, scratch); // q
+ vector_len, scratch); // q
- // load all coefficients into the vector registers Zmm_0-Zmm_15,
- // 16 coefficients into each
- load4Xmms(xmm0_3, coeffs, 0, _masm);
- load4Xmms(xmm4_7, coeffs, 4 * XMMBYTES, _masm);
- load4Xmms(xmm8_11, coeffs, 8 * XMMBYTES, _masm);
- load4Xmms(xmm12_15, coeffs, 12 * XMMBYTES, _masm);
+ if (vector_len == Assembler::AVX_512bit) {
+ // levels 0-3, register shuffles:
+ const XMMRegister Coeffs1_1[] = {xmm0, xmm1, xmm2, xmm3};
+ const XMMRegister Coeffs2_1[] = {xmm16, xmm17, xmm18, xmm19};
+ const XMMRegister Coeffs3_1[] = {xmm4, xmm5, xmm6, xmm7};
+ const XMMRegister Coeffs4_1[] = {xmm20, xmm21, xmm22, xmm23};
+ const XMMRegister Coeffs1_2[] = {xmm0, xmm16, xmm2, xmm18};
+ const XMMRegister Coeffs2_2[] = {xmm1, xmm17, xmm3, xmm19};
+ const XMMRegister Coeffs3_2[] = {xmm4, xmm20, xmm6, xmm22};
+ const XMMRegister Coeffs4_2[] = {xmm5, xmm21, xmm7, xmm23};
- // level 0 and 1 can be done entirely in registers as the zetas on these
- // levels are the same for all the montmuls that we can do in parallel
+ // Constants for shuffle and montMul64
+ __ mov64(scratch, 0b1010101010101010);
+ __ kmovwl(mergeMask1, scratch);
+ __ knotwl(mergeMask2, mergeMask1);
+ __ vmovdqu(unshuffle1, ExternalAddress(unshufflePermsAddr(0)), vector_len, scratch);
+ __ vmovdqu(unshuffle2, ExternalAddress(unshufflePermsAddr(1)), vector_len, scratch);
- // level 0
- montMul64(xmm16_19, xmm8_11, xmm29_29, xmm16_27, _masm);
- sub_add(xmm8_11, xmm0_3, xmm0_3, xmm16_19, _masm);
- montMul64(xmm16_19, xmm12_15, xmm29_29, xmm16_27, _masm);
- loadXmm29(zetas, 512, _masm); // for level 1
- sub_add(xmm12_15, xmm4_7, xmm4_7, xmm16_19, _masm);
+ int memStep = 4 * 64; // 4*64-byte registers
+ loadXmms(Coeffs1, coeffs, 0*memStep, vector_len, _masm);
+ loadXmms(Coeffs2, coeffs, 1*memStep, vector_len, _masm);
+ loadXmms(Coeffs3, coeffs, 2*memStep, vector_len, _masm);
+ loadXmms(Coeffs4, coeffs, 3*memStep, vector_len, _masm);
- // level 1
+ // level 0-3 can be done by shuffling registers (also notice fewer zetas loads, they repeat)
+ // level 0 - 128
+ // scratch1 = coeffs3 * zetas1
+ // coeffs3, coeffs1 = coeffs1 ± scratch1
+ // scratch1 = coeffs4 * zetas1
+ // coeffs4, coeffs2 = coeffs2 ± scratch1
+ __ vmovdqu(Zetas1[0], Address(zetas, 0), vector_len);
+ montMul64(Scratch1, Coeffs3, Zetas1, Coeffs3, Scratch2);
+ sub_add(Coeffs3, Coeffs1, Coeffs1, Scratch1, vector_len, _masm);
+ montMul64(Scratch1, Coeffs4, Zetas1, Coeffs4, Scratch2);
+ sub_add(Coeffs4, Coeffs2, Coeffs2, Scratch1, vector_len, _masm);
- montMul64(xmm16_19, xmm4_7, xmm29_29, xmm16_27, _masm);
- loadXmm29(zetas, 768, _masm);
- sub_add(xmm4_7, xmm0_3, xmm0_3, xmm16_19, _masm);
- montMul64(xmm16_19, xmm12_15, xmm29_29, xmm16_27, _masm);
- sub_add(xmm12_15, xmm8_11, xmm8_11, xmm16_19, _masm);
+ // level 1 - 64
+ __ vmovdqu(Zetas1[0], Address(zetas, 512), vector_len);
+ montMul64(Scratch1, Coeffs2, Zetas1, Coeffs2, Scratch2);
+ sub_add(Coeffs2, Coeffs1, Coeffs1, Scratch1, vector_len, _masm);
- // levels 2 to 7 are done in 2 batches, by first saving half of the coefficients
- // from level 1 into memory, doing all the level 2 to level 7 computations
- // on the remaining half in the vector registers, saving the result to
- // memory after level 7, then loading back the coefficients that we saved after
- // level 1 and do the same computation with those
+ __ vmovdqu(Zetas1[0], Address(zetas, 4*64 + 512), vector_len);
+ montMul64(Scratch1, Coeffs4, Zetas1, Coeffs4, Scratch2);
+ sub_add(Coeffs4, Coeffs3, Coeffs3, Scratch1, vector_len, _masm);
- store4Xmms(coeffs, 8 * XMMBYTES, xmm8_11, _masm);
- store4Xmms(coeffs, 12 * XMMBYTES, xmm12_15, _masm);
+ // level 2 - 32
+ __ vmovdqu(Zetas2[0], Address(zetas, 2 * 512), vector_len);
+ __ vmovdqu(Zetas2[2], Address(zetas, 2*64 + 2 * 512), vector_len);
+ montMul64(Scratch1, Coeffs2_1, Zetas2, Coeffs2_1, Scratch2);
+ sub_add(Coeffs2_1, Coeffs1_1, Coeffs1_1, Scratch1, vector_len, _masm);
- __ movl(iterations, 2);
+ __ vmovdqu(Zetas2[0], Address(zetas, 4*64 + 2 * 512), vector_len);
+ __ vmovdqu(Zetas2[2], Address(zetas, 6*64 + 2 * 512), vector_len);
+ montMul64(Scratch1, Coeffs4_1, Zetas2, Coeffs4_1, Scratch2);
+ sub_add(Coeffs4_1, Coeffs3_1, Coeffs3_1, Scratch1, vector_len, _masm);
- __ align(OptoLoopAlignment);
- __ BIND(L_loop);
+ // level 3 - 16
+ loadXmms(Zetas3, zetas, 3 * 512, vector_len, _masm);
+ montMul64(Scratch1, Coeffs2_2, Zetas3, Coeffs2_2, Scratch2);
+ sub_add(Coeffs2_2, Coeffs1_2, Coeffs1_2, Scratch1, vector_len, _masm);
- __ subl(iterations, 1);
+ loadXmms(Zetas3, zetas, 4*64 + 3 * 512, vector_len, _masm);
+ montMul64(Scratch1, Coeffs4_2, Zetas3, Coeffs4_2, Scratch2);
+ sub_add(Coeffs4_2, Coeffs3_2, Coeffs3_2, Scratch1, vector_len, _masm);
- // level 2
- load4Xmms(xmm12_15, zetas, 2 * 512, _masm);
- montMul64(xmm16_19, xmm2367, xmm12_15, xmm16_27, _masm);
- load4Xmms(xmm12_15, zetas, 3 * 512, _masm); // for level 3
- sub_add(xmm2367, xmm0145, xmm0145, xmm16_19, _masm);
+ for (int level = 4, distance = 8; level<8; level++, distance /= 2) {
+ // zetas = load(level * 512)
+ // coeffs1_2, scratch1 = shuffle(coeffs1_2, coeffs2_2)
+ // scratch1 = scratch1 * zetas
+ // coeffs2_2 = coeffs1_2 - scratch1
+ // coeffs1_2 = coeffs1_2 + scratch1
+ loadXmms(Zetas3, zetas, level * 512, vector_len, _masm);
+ shuffle(Scratch1, Coeffs1_2, Coeffs2_2, distance * 32); // Coeffs2_2 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs2_2, Scratch2, level==7);
+ sub_add(Coeffs2_2, Coeffs1_2, Coeffs1_2, Scratch1, vector_len, _masm);
- // level 3
+ loadXmms(Zetas3, zetas, 4*64 + level * 512, vector_len, _masm);
+ shuffle(Scratch1, Coeffs3_2, Coeffs4_2, distance * 32); // Coeffs4_2 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs4_2, Scratch2, level==7);
+ sub_add(Coeffs4_2, Coeffs3_2, Coeffs3_2, Scratch1, vector_len, _masm);
+ }
- montMul64(xmm16_19, xmm1357, xmm12_15, xmm16_27, _masm);
- sub_add(xmm1357, xmm0246, xmm0246, xmm16_19, _masm);
+ // Constants for final unshuffle
+ __ vmovdqu(unshuffle1, ExternalAddress(unshufflePermsAddr(2)), vector_len, scratch);
+ __ vmovdqu(unshuffle2, ExternalAddress(unshufflePermsAddr(3)), vector_len, scratch);
+ shuffle(Scratch1, Coeffs1_2, Coeffs2_2, 0);
+ shuffle(Scratch1, Coeffs3_2, Coeffs4_2, 0);
- // level 4
- loadPerm(xmm16_19, perms, nttL4PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttL4PermsIdx + 64, _masm);
- load4Xmms(xmm24_27, zetas, 4 * 512, _masm);
+ storeXmms(coeffs, 0*memStep, Coeffs1, vector_len, _masm);
+ storeXmms(coeffs, 1*memStep, Coeffs2, vector_len, _masm);
+ storeXmms(coeffs, 2*memStep, Coeffs3, vector_len, _masm);
+ storeXmms(coeffs, 3*memStep, Coeffs4, vector_len, _masm);
+ } else { // Assembler::AVX_256bit
+ // levels 0-4, register shuffles:
+ const XMMRegister Coeffs1_1[] = {xmm0, xmm2};
+ const XMMRegister Coeffs2_1[] = {xmm1, xmm3};
+ const XMMRegister Coeffs3_1[] = {xmm4, xmm6};
+ const XMMRegister Coeffs4_1[] = {xmm5, xmm7};
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i/2 + 16), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
+ const XMMRegister Coeffs1_2[] = {xmm0, xmm1, xmm2, xmm3};
+ const XMMRegister Coeffs2_2[] = {xmm4, xmm5, xmm6, xmm7};
+
+ // Since we cannot fit the entire payload into registers, we process the
+ // input in two stages. For the first half, load 8 registers, each 32 integers
+ // apart. With one load, we can process level 0-2 (128-, 64- and 32-integers
+ // apart). For the remaining levels, load 8 registers from consecutive memory
+ // (16-, 8-, 4-, 2-, 1-integer apart)
+ // Levels 5, 6, 7 (4-, 2-, 1-integer apart) require shuffles within registers.
+ // On the other levels, shuffles can be done by rearranging the register order
+
+ // Four batches of 8 registers each, 128 bytes apart
+ for (int i=0; i<4; i++) {
+ loadXmms(Coeffs1_2, coeffs, i*32 + 0*128, vector_len, _masm, 4, 128);
+ loadXmms(Coeffs2_2, coeffs, i*32 + 4*128, vector_len, _masm, 4, 128);
+
+ // level 0-2 can be done by shuffling registers (also notice fewer zetas loads, they repeat)
+ // level 0 - 128
+ __ vmovdqu(Zetas1[0], Address(zetas, 0), vector_len);
+ montMul64(Scratch1, Coeffs3, Zetas1, Coeffs3, Scratch2);
+ sub_add(Coeffs3, Coeffs1, Coeffs1, Scratch1, vector_len, _masm);
+ montMul64(Scratch1, Coeffs4, Zetas1, Coeffs4, Scratch2);
+ sub_add(Coeffs4, Coeffs2, Coeffs2, Scratch1, vector_len, _masm);
+
+ // level 1 - 64
+ __ vmovdqu(Zetas1[0], Address(zetas, 512), vector_len);
+ montMul64(Scratch1, Coeffs2, Zetas1, Coeffs2, Scratch2);
+ sub_add(Coeffs2, Coeffs1, Coeffs1, Scratch1, vector_len, _masm);
+
+ __ vmovdqu(Zetas1[0], Address(zetas, 4*64 + 512), vector_len);
+ montMul64(Scratch1, Coeffs4, Zetas1, Coeffs4, Scratch2);
+ sub_add(Coeffs4, Coeffs3, Coeffs3, Scratch1, vector_len, _masm);
+
+ // level 2 - 32
+ loadXmms(Zetas3, zetas, 2 * 512, vector_len, _masm, 2, 128);
+ montMul64(Scratch1, Coeffs2_1, Zetas3, Coeffs2_1, Scratch2);
+ sub_add(Coeffs2_1, Coeffs1_1, Coeffs1_1, Scratch1, vector_len, _masm);
+
+ loadXmms(Zetas3, zetas, 4*64 + 2 * 512, vector_len, _masm, 2, 128);
+ montMul64(Scratch1, Coeffs4_1, Zetas3, Coeffs4_1, Scratch2);
+ sub_add(Coeffs4_1, Coeffs3_1, Coeffs3_1, Scratch1, vector_len, _masm);
+
+ storeXmms(coeffs, i*32 + 0*128, Coeffs1_2, vector_len, _masm, 4, 128);
+ storeXmms(coeffs, i*32 + 4*128, Coeffs2_2, vector_len, _masm, 4, 128);
+ }
+
+ // Four batches of 8 registers, consecutive loads
+ for (int i=0; i<4; i++) {
+ loadXmms(Coeffs1_2, coeffs, i*256, vector_len, _masm, 4);
+ loadXmms(Coeffs2_2, coeffs, 128 + i*256, vector_len, _masm, 4);
+
+ // level 3 - 16
+ __ vmovdqu(Zetas1[0], Address(zetas, i*128 + 3 * 512), vector_len);
+ montMul64(Scratch1, Coeffs2, Zetas1, Coeffs2, Scratch2);
+ sub_add(Coeffs2, Coeffs1, Coeffs1, Scratch1, vector_len, _masm);
+
+ __ vmovdqu(Zetas1[0], Address(zetas, i*128 + 64 + 3 * 512), vector_len);
+ montMul64(Scratch1, Coeffs4, Zetas1, Coeffs4, Scratch2);
+ sub_add(Coeffs4, Coeffs3, Coeffs3, Scratch1, vector_len, _masm);
+
+ // level 4 - 8
+ loadXmms(Zetas3, zetas, i*128 + 4 * 512, vector_len, _masm);
+ montMul64(Scratch1, Coeffs2_1, Zetas3, Coeffs2_1, Scratch2);
+ sub_add(Coeffs2_1, Coeffs1_1, Coeffs1_1, Scratch1, vector_len, _masm);
+
+ loadXmms(Zetas3, zetas, i*128 + 64 + 4 * 512, vector_len, _masm);
+ montMul64(Scratch1, Coeffs4_1, Zetas3, Coeffs4_1, Scratch2);
+ sub_add(Coeffs4_1, Coeffs3_1, Coeffs3_1, Scratch1, vector_len, _masm);
+
+ for (int level = 5, distance = 4; level<8; level++, distance /= 2) {
+ // zetas = load(level * 512)
+ // coeffs1_2, scratch1 = shuffle(coeffs1_2, coeffs2_2)
+ // scratch1 = scratch1 * zetas
+ // coeffs2_2 = coeffs1_2 - scratch1
+ // coeffs1_2 = coeffs1_2 + scratch1
+ loadXmms(Zetas3, zetas, i*128 + level * 512, vector_len, _masm);
+ shuffle(Scratch1, Coeffs1_1, Coeffs2_1, distance * 32); //Coeffs2_2 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs2_1, Scratch2, level==7);
+ sub_add(Coeffs2_1, Coeffs1_1, Coeffs1_1, Scratch1, vector_len, _masm);
+
+ loadXmms(Zetas3, zetas, i*128 + 64 + level * 512, vector_len, _masm);
+ shuffle(Scratch1, Coeffs3_1, Coeffs4_1, distance * 32); //Coeffs4_2 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs4_1, Scratch2, level==7);
+ sub_add(Coeffs4_1, Coeffs3_1, Coeffs3_1, Scratch1, vector_len, _masm);
+ }
+
+ shuffle(Scratch1, Coeffs1_1, Coeffs2_1, 0);
+ shuffle(Scratch1, Coeffs3_1, Coeffs4_1, 0);
+
+ storeXmms(coeffs, i*256, Coeffs1_2, vector_len, _masm, 4);
+ storeXmms(coeffs, 128 + i*256, Coeffs2_2, vector_len, _masm, 4);
+ }
}
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i / 2 + 12), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
-
- montMul64(xmm12_15, xmm12_15, xmm24_27, xmm4_20_24, _masm);
- sub_add(xmm1357, xmm0246, xmm16_19, xmm12_15, _masm);
-
- // level 5
- loadPerm(xmm16_19, perms, nttL5PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttL5PermsIdx + 64, _masm);
- load4Xmms(xmm24_27, zetas, 5 * 512, _masm);
-
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i/2 + 16), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i / 2 + 12), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
-
- montMul64(xmm12_15, xmm12_15, xmm24_27, xmm4_20_24, _masm);
- sub_add(xmm1357, xmm0246, xmm16_19, xmm12_15, _masm);
-
- // level 6
- loadPerm(xmm16_19, perms, nttL6PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttL6PermsIdx + 64, _masm);
- load4Xmms(xmm24_27, zetas, 6 * 512, _masm);
-
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i/2 + 16), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i / 2 + 12), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
-
- montMul64(xmm12_15, xmm12_15, xmm24_27, xmm4_20_24, _masm);
- sub_add(xmm1357, xmm0246, xmm16_19, xmm12_15, _masm);
-
- // level 7
- loadPerm(xmm16_19, perms, nttL7PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttL7PermsIdx + 64, _masm);
- load4Xmms(xmm24_27, zetas, 7 * 512, _masm);
-
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i / 2 + 16), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i / 2 + 12), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- }
-
- montMul64(xmm12_15, xmm12_15, xmm24_27, xmm4_20_24, true, _masm);
- loadPerm(xmm0246, perms, nttL7PermsIdx + 2 * XMMBYTES, _masm);
- loadPerm(xmm1357, perms, nttL7PermsIdx + 3 * XMMBYTES, _masm);
- sub_add(xmm21232527, xmm20222426, xmm16_19, xmm12_15, _masm);
-
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i), xmm(i + 20), xmm(i + 21), Assembler::AVX_512bit);
- __ evpermi2d(xmm(i + 1), xmm(i + 20), xmm(i + 21), Assembler::AVX_512bit);
- }
-
- __ cmpl(iterations, 0);
- __ jcc(Assembler::equal, L_end);
-
- store4Xmms(coeffs, 0, xmm0_3, _masm);
- store4Xmms(coeffs, 4 * XMMBYTES, xmm4_7, _masm);
-
- load4Xmms(xmm0_3, coeffs, 8 * XMMBYTES, _masm);
- load4Xmms(xmm4_7, coeffs, 12 * XMMBYTES, _masm);
-
- __ addptr(zetas, 4 * XMMBYTES);
-
- __ jmp(L_loop);
-
- __ BIND(L_end);
-
- store4Xmms(coeffs, 8 * XMMBYTES, xmm0_3, _masm);
- store4Xmms(coeffs, 12 * XMMBYTES, xmm4_7, _masm);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ mov64(rax, 0); // return 0
@@ -459,173 +654,234 @@ static address generate_dilithiumAlmostNtt_avx512(StubGenerator *stubgen,
// static int implDilithiumAlmostInverseNtt(int[] coeffs, int[] zetas) {}
//
// coeffs (int[256]) = c_rarg0
-// zetas (int[256]) = c_rarg1
-static address generate_dilithiumAlmostInverseNtt_avx512(StubGenerator *stubgen,
- MacroAssembler *_masm) {
-
+// zetas (int[128*8]) = c_rarg1
+static address generate_dilithiumAlmostInverseNtt_avx(StubGenerator *stubgen,
+ int vector_len, MacroAssembler *_masm) {
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumAlmostInverseNtt_id;
StubCodeMark mark(stubgen, stub_id);
address start = __ pc();
__ enter();
- Label L_loop, L_end;
-
const Register coeffs = c_rarg0;
const Register zetas = c_rarg1;
+ const Register scratch = r10;
- const Register iterations = c_rarg2;
+ // AVX2 version uses the first half of these arrays
+ const XMMRegister Coeffs1[] = {xmm0, xmm1, xmm16, xmm17};
+ const XMMRegister Coeffs2[] = {xmm2, xmm3, xmm18, xmm19};
+ const XMMRegister Coeffs3[] = {xmm4, xmm5, xmm20, xmm21};
+ const XMMRegister Coeffs4[] = {xmm6, xmm7, xmm22, xmm23};
+ const XMMRegister Scratch1[] = {xmm8, xmm9, xmm24, xmm25};
+ const XMMRegister Scratch2[] = {xmm10, xmm11, xmm26, xmm27};
+ const XMMRegister Zetas1[] = {xmm12, xmm12, xmm12, xmm12};
+ const XMMRegister Zetas2[] = {xmm12, xmm12, xmm13, xmm13};
+ const XMMRegister Zetas3[] = {xmm12, xmm13, xmm28, xmm29};
+ const XMMRegister montQInvModR = xmm14;
+ const XMMRegister dilithium_q = xmm15;
+ const XMMRegister unshuffle1 = xmm30;
+ const XMMRegister unshuffle2 = xmm31;
+ KRegister mergeMask1 = k1;
+ KRegister mergeMask2 = k2;
+ // lambdas to hide repeated parameters
+ auto shuffle = whole_shuffle(scratch, mergeMask1, mergeMask2, unshuffle1, unshuffle2, vector_len, _masm);
+ auto montMul64 = whole_montMul(montQInvModR, dilithium_q, mergeMask2, vector_len, _masm);
- const Register perms = r11;
-
- __ lea(perms, ExternalAddress(dilithiumAvx512PermsAddr()));
-
- __ evmovdqul(montMulPerm, Address(perms, montMulPermsIdx), Assembler::AVX_512bit);
__ vpbroadcastd(montQInvModR,
ExternalAddress(dilithiumAvx512ConstsAddr(montQInvModRIdx)),
- Assembler::AVX_512bit, scratch); // q^-1 mod 2^32
+ vector_len, scratch); // q^-1 mod 2^32
__ vpbroadcastd(dilithium_q,
ExternalAddress(dilithiumAvx512ConstsAddr(dilithium_qIdx)),
- Assembler::AVX_512bit, scratch); // q
+ vector_len, scratch); // q
// Each level represents one iteration of the outer for loop of the
// Java version.
// In each of these iterations half of the coefficients are added to and
// subtracted from the other half of the coefficients then the result of
- // the substartion is (Montgomery) multiplied by the corresponding zetas.
- // In each level we just collect the coefficients (using evpermi2d()
- // instructions where necessary, i.e. on levels 0-4) so that the results of
+ // the subtraction is (Montgomery) multiplied by the corresponding zetas.
+ // In each level we just shuffle the coefficients so that the results of
// the additions and subtractions go to the vector registers so that they
// align with each other and the zetas.
- // We do levels 0-6 in two batches, each batch entirely in the vector registers
- load4Xmms(xmm0_3, coeffs, 0, _masm);
- load4Xmms(xmm4_7, coeffs, 4 * XMMBYTES, _masm);
+ if (vector_len == Assembler::AVX_512bit) {
+ // levels 4-7, register shuffles:
+ const XMMRegister Coeffs1_1[] = {xmm0, xmm1, xmm2, xmm3};
+ const XMMRegister Coeffs2_1[] = {xmm16, xmm17, xmm18, xmm19};
+ const XMMRegister Coeffs3_1[] = {xmm4, xmm5, xmm6, xmm7};
+ const XMMRegister Coeffs4_1[] = {xmm20, xmm21, xmm22, xmm23};
+ const XMMRegister Coeffs1_2[] = {xmm0, xmm16, xmm2, xmm18};
+ const XMMRegister Coeffs2_2[] = {xmm1, xmm17, xmm3, xmm19};
+ const XMMRegister Coeffs3_2[] = {xmm4, xmm20, xmm6, xmm22};
+ const XMMRegister Coeffs4_2[] = {xmm5, xmm21, xmm7, xmm23};
- __ movl(iterations, 2);
+ // Constants for shuffle and montMul64
+ __ mov64(scratch, 0b1010101010101010);
+ __ kmovwl(mergeMask1, scratch);
+ __ knotwl(mergeMask2, mergeMask1);
+ __ vmovdqu(unshuffle1, ExternalAddress(unshufflePermsAddr(4)), vector_len, scratch);
+ __ vmovdqu(unshuffle2, ExternalAddress(unshufflePermsAddr(5)), vector_len, scratch);
- __ align(OptoLoopAlignment);
- __ BIND(L_loop);
+ int memStep = 4 * 64;
+ loadXmms(Coeffs1, coeffs, 0*memStep, vector_len, _masm);
+ loadXmms(Coeffs2, coeffs, 1*memStep, vector_len, _masm);
+ loadXmms(Coeffs3, coeffs, 2*memStep, vector_len, _masm);
+ loadXmms(Coeffs4, coeffs, 3*memStep, vector_len, _masm);
- __ subl(iterations, 1);
+ shuffle(Scratch1, Coeffs1_2, Coeffs2_2, 1);
+ shuffle(Scratch1, Coeffs3_2, Coeffs4_2, 1);
- // level 0
- loadPerm(xmm8_11, perms, nttInvL0PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttInvL0PermsIdx + 64, _masm);
+ // Constants for shuffle(128)
+ __ vmovdqu(unshuffle1, ExternalAddress(unshufflePermsAddr(0)), vector_len, scratch);
+ __ vmovdqu(unshuffle2, ExternalAddress(unshufflePermsAddr(1)), vector_len, scratch);
+ for (int level = 0, distance = 1; level<4; level++, distance *= 2) {
+ // zetas = load(level * 512)
+ // coeffs1_2 = coeffs1_2 + coeffs2_2
+ // scratch1 = coeffs1_2 - coeffs2_2
+ // scratch1 = scratch1 * zetas
+ // coeffs1_2, coeffs2_2 = shuffle(coeffs1_2, scratch1)
+ loadXmms(Zetas3, zetas, level * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs1_2, Coeffs1_2, Coeffs2_2, vector_len, _masm); // Coeffs2_2 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs2_2, Scratch2, level==0);
+ shuffle(Coeffs2_2, Coeffs1_2, Scratch1, distance * 32);
- for (int i = 0; i < 8; i += 2) {
- __ evpermi2d(xmm(i / 2 + 8), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
- __ evpermi2d(xmm(i / 2 + 12), xmm(i), xmm(i + 1), Assembler::AVX_512bit);
+ loadXmms(Zetas3, zetas, 4*64 + level * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs3_2, Coeffs3_2, Coeffs4_2, vector_len, _masm); // Coeffs4_2 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs4_2, Scratch2, level==0);
+ shuffle(Coeffs4_2, Coeffs3_2, Scratch1, distance * 32);
+ }
+
+ // level 4
+ loadXmms(Zetas3, zetas, 4 * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs1_2, Coeffs1_2, Coeffs2_2, vector_len, _masm); // Coeffs2_2 freed
+ montMul64(Coeffs2_2, Scratch1, Zetas3, Scratch1, Scratch2);
+
+ loadXmms(Zetas3, zetas, 4*64 + 4 * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs3_2, Coeffs3_2, Coeffs4_2, vector_len, _masm); // Coeffs4_2 freed
+ montMul64(Coeffs4_2, Scratch1, Zetas3, Scratch1, Scratch2);
+
+ // level 5
+ __ vmovdqu(Zetas2[0], Address(zetas, 5 * 512), vector_len);
+ __ vmovdqu(Zetas2[2], Address(zetas, 2*64 + 5 * 512), vector_len);
+ sub_add(Scratch1, Coeffs1_1, Coeffs1_1, Coeffs2_1, vector_len, _masm); // Coeffs2_1 freed
+ montMul64(Coeffs2_1, Scratch1, Zetas2, Scratch1, Scratch2);
+
+ __ vmovdqu(Zetas2[0], Address(zetas, 4*64 + 5 * 512), vector_len);
+ __ vmovdqu(Zetas2[2], Address(zetas, 6*64 + 5 * 512), vector_len);
+ sub_add(Scratch1, Coeffs3_1, Coeffs3_1, Coeffs4_1, vector_len, _masm); // Coeffs4_1 freed
+ montMul64(Coeffs4_1, Scratch1, Zetas2, Scratch1, Scratch2);
+
+ // level 6
+ __ vmovdqu(Zetas1[0], Address(zetas, 6 * 512), vector_len);
+ sub_add(Scratch1, Coeffs1, Coeffs1, Coeffs2, vector_len, _masm); // Coeffs2 freed
+ montMul64(Coeffs2, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ __ vmovdqu(Zetas1[0], Address(zetas, 4*64 + 6 * 512), vector_len);
+ sub_add(Scratch1, Coeffs3, Coeffs3, Coeffs4, vector_len, _masm); // Coeffs4 freed
+ montMul64(Coeffs4, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ // level 7
+ __ vmovdqu(Zetas1[0], Address(zetas, 7 * 512), vector_len);
+ sub_add(Scratch1, Coeffs1, Coeffs1, Coeffs3, vector_len, _masm); // Coeffs3 freed
+ montMul64(Coeffs3, Scratch1, Zetas1, Scratch1, Scratch2);
+ sub_add(Scratch1, Coeffs2, Coeffs2, Coeffs4, vector_len, _masm); // Coeffs4 freed
+ montMul64(Coeffs4, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ storeXmms(coeffs, 0*memStep, Coeffs1, vector_len, _masm);
+ storeXmms(coeffs, 1*memStep, Coeffs2, vector_len, _masm);
+ storeXmms(coeffs, 2*memStep, Coeffs3, vector_len, _masm);
+ storeXmms(coeffs, 3*memStep, Coeffs4, vector_len, _masm);
+ } else { // Assembler::AVX_256bit
+ // Permutations of Coeffs1, Coeffs2, Coeffs3 and Coeffs4
+ const XMMRegister Coeffs1_1[] = {xmm0, xmm2};
+ const XMMRegister Coeffs2_1[] = {xmm1, xmm3};
+ const XMMRegister Coeffs3_1[] = {xmm4, xmm6};
+ const XMMRegister Coeffs4_1[] = {xmm5, xmm7};
+
+ const XMMRegister Coeffs1_2[] = {xmm0, xmm1, xmm2, xmm3};
+ const XMMRegister Coeffs2_2[] = {xmm4, xmm5, xmm6, xmm7};
+
+ // Four batches of 8 registers, consecutive loads
+ for (int i=0; i<4; i++) {
+ loadXmms(Coeffs1_2, coeffs, i*256, vector_len, _masm, 4);
+ loadXmms(Coeffs2_2, coeffs, 128 + i*256, vector_len, _masm, 4);
+
+ shuffle(Scratch1, Coeffs1_1, Coeffs2_1, 1);
+ shuffle(Scratch1, Coeffs3_1, Coeffs4_1, 1);
+
+ for (int level = 0, distance = 1; level <= 2; level++, distance *= 2) {
+ // zetas = load(level * 512)
+ // coeffs1_2 = coeffs1_2 + coeffs2_2
+ // scratch1 = coeffs1_2 - coeffs2_2
+ // scratch1 = scratch1 * zetas
+ // coeffs1_2, coeffs2_2 = shuffle(coeffs1_2, scratch1)
+ loadXmms(Zetas3, zetas, i*128 + level * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs1_1, Coeffs1_1, Coeffs2_1, vector_len, _masm); // Coeffs2_1 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs2_1, Scratch2, level==0);
+ shuffle(Coeffs2_1, Coeffs1_1, Scratch1, distance * 32);
+
+ loadXmms(Zetas3, zetas, i*128 + 64 + level * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs3_1, Coeffs3_1, Coeffs4_1, vector_len, _masm); // Coeffs4_1 freed
+ montMul64(Scratch1, Scratch1, Zetas3, Coeffs4_1, Scratch2, level==0);
+ shuffle(Coeffs4_1, Coeffs3_1, Scratch1, distance * 32);
+ }
+
+ // level 3
+ loadXmms(Zetas3, zetas, i*128 + 3 * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs1_1, Coeffs1_1, Coeffs2_1, vector_len, _masm); // Coeffs2_1 freed
+ montMul64(Coeffs2_1, Scratch1, Zetas3, Scratch1, Scratch2);
+
+ loadXmms(Zetas3, zetas, i*128 + 64 + 3 * 512, vector_len, _masm);
+ sub_add(Scratch1, Coeffs3_1, Coeffs3_1, Coeffs4_1, vector_len, _masm); // Coeffs4_1 freed
+ montMul64(Coeffs4_1, Scratch1, Zetas3, Scratch1, Scratch2);
+
+ // level 4
+ __ vmovdqu(Zetas1[0], Address(zetas, i*128 + 4 * 512), vector_len);
+ sub_add(Scratch1, Coeffs1, Coeffs1, Coeffs2, vector_len, _masm); // Coeffs2 freed
+ montMul64(Coeffs2, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ __ vmovdqu(Zetas1[0], Address(zetas, i*128 + 64 + 4 * 512), vector_len);
+ sub_add(Scratch1, Coeffs3, Coeffs3, Coeffs4, vector_len, _masm); // Coeffs4 freed
+ montMul64(Coeffs4, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ storeXmms(coeffs, i*256, Coeffs1_2, vector_len, _masm, 4);
+ storeXmms(coeffs, 128 + i*256, Coeffs2_2, vector_len, _masm, 4);
+ }
+
+ // Four batches of 8 registers each, 128 bytes apart
+ for (int i=0; i<4; i++) {
+ loadXmms(Coeffs1_2, coeffs, i*32 + 0*128, vector_len, _masm, 4, 128);
+ loadXmms(Coeffs2_2, coeffs, i*32 + 4*128, vector_len, _masm, 4, 128);
+
+ // level 5
+ loadXmms(Zetas3, zetas, 5 * 512, vector_len, _masm, 2, 128);
+ sub_add(Scratch1, Coeffs1_1, Coeffs1_1, Coeffs2_1, vector_len, _masm); // Coeffs2_1 freed
+ montMul64(Coeffs2_1, Scratch1, Zetas3, Scratch1, Scratch2);
+
+ loadXmms(Zetas3, zetas, 4*64 + 5 * 512, vector_len, _masm, 2, 128);
+ sub_add(Scratch1, Coeffs3_1, Coeffs3_1, Coeffs4_1, vector_len, _masm); // Coeffs4_1 freed
+ montMul64(Coeffs4_1, Scratch1, Zetas3, Scratch1, Scratch2);
+
+ // level 6
+ __ vmovdqu(Zetas1[0], Address(zetas, 6 * 512), vector_len);
+ sub_add(Scratch1, Coeffs1, Coeffs1, Coeffs2, vector_len, _masm); // Coeffs2 freed
+ montMul64(Coeffs2, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ __ vmovdqu(Zetas1[0], Address(zetas, 4*64 + 6 * 512), vector_len);
+ sub_add(Scratch1, Coeffs3, Coeffs3, Coeffs4, vector_len, _masm); // Coeffs4 freed
+ montMul64(Coeffs4, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ // level 7
+ __ vmovdqu(Zetas1[0], Address(zetas, 7 * 512), vector_len);
+ sub_add(Scratch1, Coeffs1, Coeffs1, Coeffs3, vector_len, _masm); // Coeffs3 freed
+ montMul64(Coeffs3, Scratch1, Zetas1, Scratch1, Scratch2);
+ sub_add(Scratch1, Coeffs2, Coeffs2, Coeffs4, vector_len, _masm); // Coeffs4 freed
+ montMul64(Coeffs4, Scratch1, Zetas1, Scratch1, Scratch2);
+
+ storeXmms(coeffs, i*32 + 0*128, Coeffs1_2, vector_len, _masm, 4, 128);
+ storeXmms(coeffs, i*32 + 4*128, Coeffs2_2, vector_len, _masm, 4, 128);
+ }
}
- load4Xmms(xmm4_7, zetas, 0, _masm);
- sub_add(xmm24_27, xmm0_3, xmm8_11, xmm12_15, _masm);
- montMul64(xmm4_7, xmm4_7, xmm24_27, xmm16_27, true, _masm);
-
- // level 1
- loadPerm(xmm8_11, perms, nttInvL1PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttInvL1PermsIdx + 64, _masm);
-
- for (int i = 0; i < 4; i++) {
- __ evpermi2d(xmm(i + 8), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- __ evpermi2d(xmm(i + 12), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- }
-
- load4Xmms(xmm4_7, zetas, 512, _masm);
- sub_add(xmm24_27, xmm0_3, xmm8_11, xmm12_15, _masm);
- montMul64(xmm4_7, xmm24_27, xmm4_7, xmm16_27, _masm);
-
- // level 2
- loadPerm(xmm8_11, perms, nttInvL2PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttInvL2PermsIdx + 64, _masm);
-
- for (int i = 0; i < 4; i++) {
- __ evpermi2d(xmm(i + 8), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- __ evpermi2d(xmm(i + 12), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- }
-
- load4Xmms(xmm4_7, zetas, 2 * 512, _masm);
- sub_add(xmm24_27, xmm0_3, xmm8_11, xmm12_15, _masm);
- montMul64(xmm4_7, xmm24_27, xmm4_7, xmm16_27, _masm);
-
- // level 3
- loadPerm(xmm8_11, perms, nttInvL3PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttInvL3PermsIdx + 64, _masm);
-
- for (int i = 0; i < 4; i++) {
- __ evpermi2d(xmm(i + 8), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- __ evpermi2d(xmm(i + 12), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- }
-
- load4Xmms(xmm4_7, zetas, 3 * 512, _masm);
- sub_add(xmm24_27, xmm0_3, xmm8_11, xmm12_15, _masm);
- montMul64(xmm4_7, xmm24_27, xmm4_7, xmm16_27, _masm);
-
- // level 4
- loadPerm(xmm8_11, perms, nttInvL4PermsIdx, _masm);
- loadPerm(xmm12_15, perms, nttInvL4PermsIdx + 64, _masm);
-
- for (int i = 0; i < 4; i++) {
- __ evpermi2d(xmm(i + 8), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- __ evpermi2d(xmm(i + 12), xmm(i), xmm(i + 4), Assembler::AVX_512bit);
- }
-
- load4Xmms(xmm4_7, zetas, 4 * 512, _masm);
- sub_add(xmm24_27, xmm0_3, xmm8_11, xmm12_15, _masm);
- montMul64(xmm4_7, xmm24_27, xmm4_7, xmm16_27, _masm);
-
- // level 5
- load4Xmms(xmm12_15, zetas, 5 * 512, _masm);
- sub_add(xmm8_11, xmm0_3, xmm0426, xmm1537, _masm);
- montMul64(xmm4_7, xmm8_11, xmm12_15, xmm16_27, _masm);
-
- // level 6
- load4Xmms(xmm12_15, zetas, 6 * 512, _masm);
- sub_add(xmm8_11, xmm0_3, xmm0145, xmm2367, _masm);
- montMul64(xmm4_7, xmm8_11, xmm12_15, xmm16_27, _masm);
-
- __ cmpl(iterations, 0);
- __ jcc(Assembler::equal, L_end);
-
- // save the coefficients of the first batch, adjust the zetas
- // and load the second batch of coefficients
- store4Xmms(coeffs, 0, xmm0_3, _masm);
- store4Xmms(coeffs, 4 * XMMBYTES, xmm4_7, _masm);
-
- __ addptr(zetas, 4 * XMMBYTES);
-
- load4Xmms(xmm0_3, coeffs, 8 * XMMBYTES, _masm);
- load4Xmms(xmm4_7, coeffs, 12 * XMMBYTES, _masm);
-
- __ jmp(L_loop);
-
- __ BIND(L_end);
-
- // load the coeffs of the first batch of coefficients that were saved after
- // level 6 into Zmm_8-Zmm_15 and do the last level entirely in the vector
- // registers
- load4Xmms(xmm8_11, coeffs, 0, _masm);
- load4Xmms(xmm12_15, coeffs, 4 * XMMBYTES, _masm);
-
- // level 7
-
- loadXmm29(zetas, 7 * 512, _masm);
-
- for (int i = 0; i < 8; i++) {
- __ evpaddd(xmm(i + 16), k0, xmm(i), xmm(i + 8), false, Assembler::AVX_512bit);
- }
-
- for (int i = 0; i < 8; i++) {
- __ evpsubd(xmm(i), k0, xmm(i + 8), xmm(i), false, Assembler::AVX_512bit);
- }
-
- store4Xmms(coeffs, 0, xmm16_19, _masm);
- store4Xmms(coeffs, 4 * XMMBYTES, xmm20_23, _masm);
- montMul64(xmm0_3, xmm0_3, xmm29_29, xmm16_27, _masm);
- montMul64(xmm4_7, xmm4_7, xmm29_29, xmm16_27, _masm);
- store4Xmms(coeffs, 8 * XMMBYTES, xmm0_3, _masm);
- store4Xmms(coeffs, 12 * XMMBYTES, xmm4_7, _masm);
-
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ mov64(rax, 0); // return 0
__ ret(0);
@@ -641,8 +897,8 @@ static address generate_dilithiumAlmostInverseNtt_avx512(StubGenerator *stubgen,
// result (int[256]) = c_rarg0
// poly1 (int[256]) = c_rarg1
// poly2 (int[256]) = c_rarg2
-static address generate_dilithiumNttMult_avx512(StubGenerator *stubgen,
- MacroAssembler *_masm) {
+static address generate_dilithiumNttMult_avx(StubGenerator *stubgen,
+ int vector_len, MacroAssembler *_masm) {
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumNttMult_id;
@@ -655,40 +911,60 @@ static address generate_dilithiumNttMult_avx512(StubGenerator *stubgen,
const Register result = c_rarg0;
const Register poly1 = c_rarg1;
const Register poly2 = c_rarg2;
-
- const Register perms = r10; // scratch reused after not needed any more
+ const Register scratch = r10;
const Register len = r11;
- const XMMRegister montRSquareModQ = xmm29;
+ const XMMRegister montQInvModR = xmm8;
+ const XMMRegister dilithium_q = xmm9;
+
+ const XMMRegister Poly1[] = {xmm0, xmm1, xmm16, xmm17};
+ const XMMRegister Poly2[] = {xmm2, xmm3, xmm18, xmm19};
+ const XMMRegister Scratch1[] = {xmm4, xmm5, xmm20, xmm21};
+ const XMMRegister Scratch2[] = {xmm6, xmm7, xmm22, xmm23};
+ const XMMRegister MontRSquareModQ[] = {xmm10, xmm10, xmm10, xmm10};
+ KRegister mergeMask = k1;
+ // lambda to hide repeated parameters
+ auto montMul64 = whole_montMul(montQInvModR, dilithium_q, mergeMask, vector_len, _masm);
__ vpbroadcastd(montQInvModR,
ExternalAddress(dilithiumAvx512ConstsAddr(montQInvModRIdx)),
- Assembler::AVX_512bit, scratch); // q^-1 mod 2^32
+ vector_len, scratch); // q^-1 mod 2^32
__ vpbroadcastd(dilithium_q,
ExternalAddress(dilithiumAvx512ConstsAddr(dilithium_qIdx)),
- Assembler::AVX_512bit, scratch); // q
- __ vpbroadcastd(montRSquareModQ,
+ vector_len, scratch); // q
+ __ vpbroadcastd(MontRSquareModQ[0],
ExternalAddress(dilithiumAvx512ConstsAddr(montRSquareModQIdx)),
- Assembler::AVX_512bit, scratch); // 2^64 mod q
+ vector_len, scratch); // 2^64 mod q
+ if (vector_len == Assembler::AVX_512bit) {
+ __ mov64(scratch, 0b0101010101010101);
+ __ kmovwl(mergeMask, scratch);
+ }
- __ lea(perms, ExternalAddress(dilithiumAvx512PermsAddr()));
- __ evmovdqul(montMulPerm, Address(perms, montMulPermsIdx), Assembler::AVX_512bit);
+ // Total payload is 256*int32s.
+ // - memStep is number of bytes one iteration processes.
+ // - loopCnt is number of iterations it will take to process entire payload.
+ int loopCnt = 4;
+ int memStep = 4 * 64;
+ if (vector_len == Assembler::AVX_256bit) {
+ loopCnt = 16;
+ memStep = 2 * 32;
+ }
- __ movl(len, 4);
+ __ movl(len, loopCnt);
__ align(OptoLoopAlignment);
__ BIND(L_loop);
- load4Xmms(xmm4_7, poly2, 0, _masm);
- load4Xmms(xmm0_3, poly1, 0, _masm);
- montMul64(xmm4_7, xmm4_7, xmm29_29, xmm16_27, _masm);
- montMul64(xmm0_3, xmm0_3, xmm4_7, xmm16_27, true, _masm);
- store4Xmms(result, 0, xmm0_3, _masm);
+ loadXmms(Poly2, poly2, 0, vector_len, _masm);
+ loadXmms(Poly1, poly1, 0, vector_len, _masm);
+ montMul64(Poly2, Poly2, MontRSquareModQ, Scratch1, Scratch2);
+ montMul64(Poly1, Poly1, Poly2, Scratch1, Scratch2, true);
+ storeXmms(result, 0, Poly1, vector_len, _masm);
__ subl(len, 1);
- __ addptr(poly1, 4 * XMMBYTES);
- __ addptr(poly2, 4 * XMMBYTES);
- __ addptr(result, 4 * XMMBYTES);
+ __ addptr(poly1, memStep);
+ __ addptr(poly2, memStep);
+ __ addptr(result, memStep);
__ cmpl(len, 0);
__ jcc(Assembler::notEqual, L_loop);
@@ -705,8 +981,8 @@ static address generate_dilithiumNttMult_avx512(StubGenerator *stubgen,
//
// coeffs (int[256]) = c_rarg0
// constant (int) = c_rarg1
-static address generate_dilithiumMontMulByConstant_avx512(StubGenerator *stubgen,
- MacroAssembler *_masm) {
+static address generate_dilithiumMontMulByConstant_avx(StubGenerator *stubgen,
+ int vector_len, MacroAssembler *_masm) {
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumMontMulByConstant_id;
@@ -718,38 +994,64 @@ static address generate_dilithiumMontMulByConstant_avx512(StubGenerator *stubgen
const Register coeffs = c_rarg0;
const Register rConstant = c_rarg1;
-
- const Register perms = c_rarg2; // not used for argument
+ const Register scratch = r10;
const Register len = r11;
- const XMMRegister constant = xmm29;
+ const XMMRegister montQInvModR = xmm8;
+ const XMMRegister dilithium_q = xmm9;
- __ lea(perms, ExternalAddress(dilithiumAvx512PermsAddr()));
+ const XMMRegister Coeffs1[] = {xmm0, xmm1, xmm16, xmm17};
+ const XMMRegister Coeffs2[] = {xmm2, xmm3, xmm18, xmm19};
+ const XMMRegister Scratch1[] = {xmm4, xmm5, xmm20, xmm21};
+ const XMMRegister Scratch2[] = {xmm6, xmm7, xmm22, xmm23};
+ const XMMRegister Constant[] = {xmm10, xmm10, xmm10, xmm10};
+ XMMRegister constant = Constant[0];
+ KRegister mergeMask = k1;
+ // lambda to hide repeated parameters
+ auto montMul64 = whole_montMul(montQInvModR, dilithium_q, mergeMask, vector_len, _masm);
- // the following four vector registers are used in montMul64
+ // load constants for montMul64
__ vpbroadcastd(montQInvModR,
ExternalAddress(dilithiumAvx512ConstsAddr(montQInvModRIdx)),
- Assembler::AVX_512bit, scratch); // q^-1 mod 2^32
+ vector_len, scratch); // q^-1 mod 2^32
__ vpbroadcastd(dilithium_q,
ExternalAddress(dilithiumAvx512ConstsAddr(dilithium_qIdx)),
- Assembler::AVX_512bit, scratch); // q
- __ evmovdqul(montMulPerm, Address(perms, montMulPermsIdx), Assembler::AVX_512bit);
- __ evpbroadcastd(constant, rConstant, Assembler::AVX_512bit); // constant multiplier
+ vector_len, scratch); // q
+ if (vector_len == Assembler::AVX_256bit) {
+ __ movdl(constant, rConstant);
+ __ vpbroadcastd(constant, constant, vector_len); // constant multiplier
+ } else {
+ __ evpbroadcastd(constant, rConstant, Assembler::AVX_512bit); // constant multiplier
- __ movl(len, 2);
+ __ mov64(scratch, 0b0101010101010101); //dw-mask
+ __ kmovwl(mergeMask, scratch);
+ }
+
+ // Total payload is 256*int32s.
+ // - memStep is number of bytes one montMul64 processes.
+ // - loopCnt is number of iterations it will take to process entire payload.
+ // - (two memSteps per loop)
+ int memStep = 4 * 64;
+ int loopCnt = 2;
+ if (vector_len == Assembler::AVX_256bit) {
+ memStep = 2 * 32;
+ loopCnt = 8;
+ }
+
+ __ movl(len, loopCnt);
__ align(OptoLoopAlignment);
__ BIND(L_loop);
- load4Xmms(xmm0_3, coeffs, 0, _masm);
- load4Xmms(xmm4_7, coeffs, 4 * XMMBYTES, _masm);
- montMul64(xmm0_3, xmm0_3, xmm29_29, xmm16_27, _masm);
- montMul64(xmm4_7, xmm4_7, xmm29_29, xmm16_27, _masm);
- store4Xmms(coeffs, 0, xmm0_3, _masm);
- store4Xmms(coeffs, 4 * XMMBYTES, xmm4_7, _masm);
+ loadXmms(Coeffs1, coeffs, 0, vector_len, _masm);
+ loadXmms(Coeffs2, coeffs, memStep, vector_len, _masm);
+ montMul64(Coeffs1, Coeffs1, Constant, Scratch1, Scratch2);
+ montMul64(Coeffs2, Coeffs2, Constant, Scratch1, Scratch2);
+ storeXmms(coeffs, 0, Coeffs1, vector_len, _masm);
+ storeXmms(coeffs, memStep, Coeffs2, vector_len, _masm);
__ subl(len, 1);
- __ addptr(coeffs, 512);
+ __ addptr(coeffs, 2 * memStep);
__ cmpl(len, 0);
__ jcc(Assembler::notEqual, L_loop);
@@ -769,9 +1071,8 @@ static address generate_dilithiumMontMulByConstant_avx512(StubGenerator *stubgen
// highPart (int[256]) = c_rarg2
// twoGamma2 (int) = c_rarg3
// multiplier (int) = c_rarg4
-static address generate_dilithiumDecomposePoly_avx512(StubGenerator *stubgen,
- MacroAssembler *_masm) {
-
+static address generate_dilithiumDecomposePoly_avx(StubGenerator *stubgen,
+ int vector_len, MacroAssembler *_masm) {
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumDecomposePoly_id;
StubCodeMark mark(stubgen, stub_id);
@@ -785,26 +1086,45 @@ static address generate_dilithiumDecomposePoly_avx512(StubGenerator *stubgen,
const Register highPart = c_rarg2;
const Register rTwoGamma2 = c_rarg3;
+ const Register scratch = r10;
const Register len = r11;
- const XMMRegister zero = xmm24;
- const XMMRegister one = xmm25;
- const XMMRegister qMinus1 = xmm26;
- const XMMRegister gamma2 = xmm27;
- const XMMRegister twoGamma2 = xmm28;
- const XMMRegister barrettMultiplier = xmm29;
- const XMMRegister barrettAddend = xmm30;
- __ vpxor(zero, zero, zero, Assembler::AVX_512bit); // 0
- __ vpternlogd(xmm0, 0xff, xmm0, xmm0, Assembler::AVX_512bit); // -1
- __ vpsubd(one, zero, xmm0, Assembler::AVX_512bit); // 1
+ const XMMRegister one = xmm0;
+ const XMMRegister gamma2 = xmm1;
+ const XMMRegister twoGamma2 = xmm2;
+ const XMMRegister barrettMultiplier = xmm3;
+ const XMMRegister barrettAddend = xmm4;
+ const XMMRegister dilithium_q = xmm5;
+ const XMMRegister zero = xmm29; // AVX512-only
+ const XMMRegister minusOne = xmm30; // AVX512-only
+ const XMMRegister qMinus1 = xmm31; // AVX512-only
+
+ XMMRegister RPlus[] = {xmm6, xmm7, xmm16, xmm17};
+ XMMRegister Quotient[] = {xmm8, xmm9, xmm18, xmm19};
+ XMMRegister R0[] = {xmm10, xmm11, xmm20, xmm21};
+ XMMRegister Mask[] = {xmm12, xmm13, xmm22, xmm23};
+ XMMRegister Tmp1[] = {xmm14, xmm15, xmm24, xmm25};
+
__ vpbroadcastd(dilithium_q,
ExternalAddress(dilithiumAvx512ConstsAddr(dilithium_qIdx)),
- Assembler::AVX_512bit, scratch); // q
+ vector_len, scratch); // q
__ vpbroadcastd(barrettAddend,
ExternalAddress(dilithiumAvx512ConstsAddr(barrettAddendIdx)),
- Assembler::AVX_512bit, scratch); // addend for Barrett reduction
+ vector_len, scratch); // addend for Barrett reduction
+ if (vector_len == Assembler::AVX_512bit) {
+ __ vpxor(zero, zero, zero, vector_len); // 0
+ __ vpternlogd(minusOne, 0xff, minusOne, minusOne, vector_len); // -1
+ __ vpsrld(one, minusOne, 31, vector_len);
+ __ vpsubd(qMinus1, dilithium_q, one, vector_len); // q - 1
+ __ evpbroadcastd(twoGamma2, rTwoGamma2, vector_len); // 2 * gamma2
+ } else {
+ __ vpcmpeqd(one, one, one, vector_len);
+ __ vpsrld(one, one, 31, vector_len);
+ __ movdl(twoGamma2, rTwoGamma2);
+ __ vpbroadcastd(twoGamma2, twoGamma2, vector_len); // 2 * gamma2
+ }
- __ evpbroadcastd(twoGamma2, rTwoGamma2, Assembler::AVX_512bit); // 2 * gamma2
+ __ vpsrad(gamma2, twoGamma2, 1, vector_len); // gamma2
#ifndef _WIN64
const Register rMultiplier = c_rarg4;
@@ -813,201 +1133,185 @@ static address generate_dilithiumDecomposePoly_avx512(StubGenerator *stubgen,
const Register rMultiplier = c_rarg3; // arg3 is already consumed, reused here
__ movptr(rMultiplier, multiplier_mem);
#endif
- __ evpbroadcastd(barrettMultiplier, rMultiplier,
- Assembler::AVX_512bit); // multiplier for mod 2 * gamma2 reduce
+ if (vector_len == Assembler::AVX_512bit) {
+ __ evpbroadcastd(barrettMultiplier, rMultiplier,
+ vector_len); // multiplier for mod 2 * gamma2 reduce
+ } else {
+ __ movdl(barrettMultiplier, rMultiplier);
+ __ vpbroadcastd(barrettMultiplier, barrettMultiplier, vector_len);
+ }
- __ evpsubd(qMinus1, k0, dilithium_q, one, false, Assembler::AVX_512bit); // q - 1
- __ evpsrad(gamma2, k0, twoGamma2, 1, false, Assembler::AVX_512bit); // gamma2
+ // Total payload is 1024 bytes
+ int memStep = 4 * 64; // Number of bytes per loop iteration
+ int regCnt = 4; // Register array length
+ if (vector_len == Assembler::AVX_256bit) {
+ memStep = 2 * 32;
+ regCnt = 2;
+ }
__ movl(len, 1024);
__ align(OptoLoopAlignment);
__ BIND(L_loop);
- load4Xmms(xmm0_3, input, 0, _masm);
+ loadXmms(RPlus, input, 0, vector_len, _masm);
- __ addptr(input, 4 * XMMBYTES);
+ __ addptr(input, memStep);
- // rplus in xmm0
// rplus = rplus - ((rplus + 5373807) >> 23) * dilithium_q;
- __ evpaddd(xmm4, k0, xmm0, barrettAddend, false, Assembler::AVX_512bit);
- __ evpaddd(xmm5, k0, xmm1, barrettAddend, false, Assembler::AVX_512bit);
- __ evpaddd(xmm6, k0, xmm2, barrettAddend, false, Assembler::AVX_512bit);
- __ evpaddd(xmm7, k0, xmm3, barrettAddend, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpaddd(Tmp1[i], RPlus[i], barrettAddend, vector_len);
+ }
- __ evpsrad(xmm4, k0, xmm4, 23, false, Assembler::AVX_512bit);
- __ evpsrad(xmm5, k0, xmm5, 23, false, Assembler::AVX_512bit);
- __ evpsrad(xmm6, k0, xmm6, 23, false, Assembler::AVX_512bit);
- __ evpsrad(xmm7, k0, xmm7, 23, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsrad(Tmp1[i], Tmp1[i], 23, vector_len);
+ }
- __ evpmulld(xmm4, k0, xmm4, dilithium_q, false, Assembler::AVX_512bit);
- __ evpmulld(xmm5, k0, xmm5, dilithium_q, false, Assembler::AVX_512bit);
- __ evpmulld(xmm6, k0, xmm6, dilithium_q, false, Assembler::AVX_512bit);
- __ evpmulld(xmm7, k0, xmm7, dilithium_q, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmulld(Tmp1[i], Tmp1[i], dilithium_q, vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(RPlus[i], RPlus[i], Tmp1[i], vector_len);
+ }
- __ evpsubd(xmm0, k0, xmm0, xmm4, false, Assembler::AVX_512bit);
- __ evpsubd(xmm1, k0, xmm1, xmm5, false, Assembler::AVX_512bit);
- __ evpsubd(xmm2, k0, xmm2, xmm6, false, Assembler::AVX_512bit);
- __ evpsubd(xmm3, k0, xmm3, xmm7, false, Assembler::AVX_512bit);
- // rplus in xmm0
// rplus = rplus + ((rplus >> 31) & dilithium_q);
- __ evpsrad(xmm4, k0, xmm0, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm5, k0, xmm1, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm6, k0, xmm2, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm7, k0, xmm3, 31, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsrad(Tmp1[i], RPlus[i], 31, vector_len);
+ }
- __ evpandd(xmm4, k0, xmm4, dilithium_q, false, Assembler::AVX_512bit);
- __ evpandd(xmm5, k0, xmm5, dilithium_q, false, Assembler::AVX_512bit);
- __ evpandd(xmm6, k0, xmm6, dilithium_q, false, Assembler::AVX_512bit);
- __ evpandd(xmm7, k0, xmm7, dilithium_q, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpand(Tmp1[i], Tmp1[i], dilithium_q, vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpaddd(RPlus[i], RPlus[i], Tmp1[i], vector_len);
+ }
- __ evpaddd(xmm0, k0, xmm0, xmm4, false, Assembler::AVX_512bit);
- __ evpaddd(xmm1, k0, xmm1, xmm5, false, Assembler::AVX_512bit);
- __ evpaddd(xmm2, k0, xmm2, xmm6, false, Assembler::AVX_512bit);
- __ evpaddd(xmm3, k0, xmm3, xmm7, false, Assembler::AVX_512bit);
- // rplus in xmm0
// int quotient = (rplus * barrettMultiplier) >> 22;
- __ evpmulld(xmm4, k0, xmm0, barrettMultiplier, false, Assembler::AVX_512bit);
- __ evpmulld(xmm5, k0, xmm1, barrettMultiplier, false, Assembler::AVX_512bit);
- __ evpmulld(xmm6, k0, xmm2, barrettMultiplier, false, Assembler::AVX_512bit);
- __ evpmulld(xmm7, k0, xmm3, barrettMultiplier, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmulld(Quotient[i], RPlus[i], barrettMultiplier, vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsrad(Quotient[i], Quotient[i], 22, vector_len);
+ }
- __ evpsrad(xmm4, k0, xmm4, 22, false, Assembler::AVX_512bit);
- __ evpsrad(xmm5, k0, xmm5, 22, false, Assembler::AVX_512bit);
- __ evpsrad(xmm6, k0, xmm6, 22, false, Assembler::AVX_512bit);
- __ evpsrad(xmm7, k0, xmm7, 22, false, Assembler::AVX_512bit);
- // quotient in xmm4
// int r0 = rplus - quotient * twoGamma2;
- __ evpmulld(xmm8, k0, xmm4, twoGamma2, false, Assembler::AVX_512bit);
- __ evpmulld(xmm9, k0, xmm5, twoGamma2, false, Assembler::AVX_512bit);
- __ evpmulld(xmm10, k0, xmm6, twoGamma2, false, Assembler::AVX_512bit);
- __ evpmulld(xmm11, k0, xmm7, twoGamma2, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpmulld(R0[i], Quotient[i], twoGamma2, vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(R0[i], RPlus[i], R0[i], vector_len);
+ }
- __ evpsubd(xmm8, k0, xmm0, xmm8, false, Assembler::AVX_512bit);
- __ evpsubd(xmm9, k0, xmm1, xmm9, false, Assembler::AVX_512bit);
- __ evpsubd(xmm10, k0, xmm2, xmm10, false, Assembler::AVX_512bit);
- __ evpsubd(xmm11, k0, xmm3, xmm11, false, Assembler::AVX_512bit);
- // r0 in xmm8
// int mask = (twoGamma2 - r0) >> 22;
- __ evpsubd(xmm12, k0, twoGamma2, xmm8, false, Assembler::AVX_512bit);
- __ evpsubd(xmm13, k0, twoGamma2, xmm9, false, Assembler::AVX_512bit);
- __ evpsubd(xmm14, k0, twoGamma2, xmm10, false, Assembler::AVX_512bit);
- __ evpsubd(xmm15, k0, twoGamma2, xmm11, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(Mask[i], twoGamma2, R0[i], vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsrad(Mask[i], Mask[i], 22, vector_len);
+ }
- __ evpsrad(xmm12, k0, xmm12, 22, false, Assembler::AVX_512bit);
- __ evpsrad(xmm13, k0, xmm13, 22, false, Assembler::AVX_512bit);
- __ evpsrad(xmm14, k0, xmm14, 22, false, Assembler::AVX_512bit);
- __ evpsrad(xmm15, k0, xmm15, 22, false, Assembler::AVX_512bit);
- // mask in xmm12
// r0 -= (mask & twoGamma2);
- __ evpandd(xmm16, k0, xmm12, twoGamma2, false, Assembler::AVX_512bit);
- __ evpandd(xmm17, k0, xmm13, twoGamma2, false, Assembler::AVX_512bit);
- __ evpandd(xmm18, k0, xmm14, twoGamma2, false, Assembler::AVX_512bit);
- __ evpandd(xmm19, k0, xmm15, twoGamma2, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpand(Tmp1[i], Mask[i], twoGamma2, vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(R0[i], R0[i], Tmp1[i], vector_len);
+ }
- __ evpsubd(xmm8, k0, xmm8, xmm16, false, Assembler::AVX_512bit);
- __ evpsubd(xmm9, k0, xmm9, xmm17, false, Assembler::AVX_512bit);
- __ evpsubd(xmm10, k0, xmm10, xmm18, false, Assembler::AVX_512bit);
- __ evpsubd(xmm11, k0, xmm11, xmm19, false, Assembler::AVX_512bit);
- // r0 in xmm8
// quotient += (mask & 1);
- __ evpandd(xmm16, k0, xmm12, one, false, Assembler::AVX_512bit);
- __ evpandd(xmm17, k0, xmm13, one, false, Assembler::AVX_512bit);
- __ evpandd(xmm18, k0, xmm14, one, false, Assembler::AVX_512bit);
- __ evpandd(xmm19, k0, xmm15, one, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpand(Tmp1[i], Mask[i], one, vector_len);
+ }
- __ evpaddd(xmm4, k0, xmm4, xmm16, false, Assembler::AVX_512bit);
- __ evpaddd(xmm5, k0, xmm5, xmm17, false, Assembler::AVX_512bit);
- __ evpaddd(xmm6, k0, xmm6, xmm18, false, Assembler::AVX_512bit);
- __ evpaddd(xmm7, k0, xmm7, xmm19, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpaddd(Quotient[i], Quotient[i], Tmp1[i], vector_len);
+ }
// mask = (twoGamma2 / 2 - r0) >> 31;
- __ evpsubd(xmm12, k0, gamma2, xmm8, false, Assembler::AVX_512bit);
- __ evpsubd(xmm13, k0, gamma2, xmm9, false, Assembler::AVX_512bit);
- __ evpsubd(xmm14, k0, gamma2, xmm10, false, Assembler::AVX_512bit);
- __ evpsubd(xmm15, k0, gamma2, xmm11, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(Mask[i], gamma2, R0[i], vector_len);
+ }
- __ evpsrad(xmm12, k0, xmm12, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm13, k0, xmm13, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm14, k0, xmm14, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm15, k0, xmm15, 31, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsrad(Mask[i], Mask[i], 31, vector_len);
+ }
// r0 -= (mask & twoGamma2);
- __ evpandd(xmm16, k0, xmm12, twoGamma2, false, Assembler::AVX_512bit);
- __ evpandd(xmm17, k0, xmm13, twoGamma2, false, Assembler::AVX_512bit);
- __ evpandd(xmm18, k0, xmm14, twoGamma2, false, Assembler::AVX_512bit);
- __ evpandd(xmm19, k0, xmm15, twoGamma2, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpand(Tmp1[i], Mask[i], twoGamma2, vector_len);
+ }
+
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(R0[i], R0[i], Tmp1[i], vector_len);
+ }
- __ evpsubd(xmm8, k0, xmm8, xmm16, false, Assembler::AVX_512bit);
- __ evpsubd(xmm9, k0, xmm9, xmm17, false, Assembler::AVX_512bit);
- __ evpsubd(xmm10, k0, xmm10, xmm18, false, Assembler::AVX_512bit);
- __ evpsubd(xmm11, k0, xmm11, xmm19, false, Assembler::AVX_512bit);
- // r0 in xmm8
// quotient += (mask & 1);
- __ evpandd(xmm16, k0, xmm12, one, false, Assembler::AVX_512bit);
- __ evpandd(xmm17, k0, xmm13, one, false, Assembler::AVX_512bit);
- __ evpandd(xmm18, k0, xmm14, one, false, Assembler::AVX_512bit);
- __ evpandd(xmm19, k0, xmm15, one, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpand(Tmp1[i], Mask[i], one, vector_len);
+ }
- __ evpaddd(xmm4, k0, xmm4, xmm16, false, Assembler::AVX_512bit);
- __ evpaddd(xmm5, k0, xmm5, xmm17, false, Assembler::AVX_512bit);
- __ evpaddd(xmm6, k0, xmm6, xmm18, false, Assembler::AVX_512bit);
- __ evpaddd(xmm7, k0, xmm7, xmm19, false, Assembler::AVX_512bit);
- // quotient in xmm4
+ for (int i = 0; i < regCnt; i++) {
+ __ vpaddd(Quotient[i], Quotient[i], Tmp1[i], vector_len);
+ }
+ // r1 in RPlus
// int r1 = rplus - r0 - (dilithium_q - 1);
- __ evpsubd(xmm16, k0, xmm0, xmm8, false, Assembler::AVX_512bit);
- __ evpsubd(xmm17, k0, xmm1, xmm9, false, Assembler::AVX_512bit);
- __ evpsubd(xmm18, k0, xmm2, xmm10, false, Assembler::AVX_512bit);
- __ evpsubd(xmm19, k0, xmm3, xmm11, false, Assembler::AVX_512bit);
-
- __ evpsubd(xmm16, k0, xmm16, xmm26, false, Assembler::AVX_512bit);
- __ evpsubd(xmm17, k0, xmm17, xmm26, false, Assembler::AVX_512bit);
- __ evpsubd(xmm18, k0, xmm18, xmm26, false, Assembler::AVX_512bit);
- __ evpsubd(xmm19, k0, xmm19, xmm26, false, Assembler::AVX_512bit);
- // r1 in xmm16
// r1 = (r1 | (-r1)) >> 31; // 0 if rplus - r0 == (dilithium_q - 1), -1 otherwise
- __ evpsubd(xmm20, k0, zero, xmm16, false, Assembler::AVX_512bit);
- __ evpsubd(xmm21, k0, zero, xmm17, false, Assembler::AVX_512bit);
- __ evpsubd(xmm22, k0, zero, xmm18, false, Assembler::AVX_512bit);
- __ evpsubd(xmm23, k0, zero, xmm19, false, Assembler::AVX_512bit);
+ for (int i = 0; i < regCnt; i++) {
+ __ vpsubd(RPlus[i], RPlus[i], R0[i], vector_len);
+ }
- __ evporq(xmm16, k0, xmm16, xmm20, false, Assembler::AVX_512bit);
- __ evporq(xmm17, k0, xmm17, xmm21, false, Assembler::AVX_512bit);
- __ evporq(xmm18, k0, xmm18, xmm22, false, Assembler::AVX_512bit);
- __ evporq(xmm19, k0, xmm19, xmm23, false, Assembler::AVX_512bit);
+ if (vector_len == Assembler::AVX_512bit) {
+ KRegister EqMsk[] = {k1, k2, k3, k4};
+ for (int i = 0; i < regCnt; i++) {
+ __ evpcmpeqd(EqMsk[i], k0, RPlus[i], qMinus1, vector_len);
+ }
- __ evpsubd(xmm12, k0, zero, one, false, Assembler::AVX_512bit); // -1
+ // r0 += ~r1; // add -1 or keep as is, using EqMsk as filter
+ for (int i = 0; i < regCnt; i++) {
+ __ evpaddd(R0[i], EqMsk[i], R0[i], minusOne, true, vector_len);
+ }
- __ evpsrad(xmm0, k0, xmm16, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm1, k0, xmm17, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm2, k0, xmm18, 31, false, Assembler::AVX_512bit);
- __ evpsrad(xmm3, k0, xmm19, 31, false, Assembler::AVX_512bit);
- // r1 in xmm0
- // r0 += ~r1;
- __ evpxorq(xmm20, k0, xmm0, xmm12, false, Assembler::AVX_512bit);
- __ evpxorq(xmm21, k0, xmm1, xmm12, false, Assembler::AVX_512bit);
- __ evpxorq(xmm22, k0, xmm2, xmm12, false, Assembler::AVX_512bit);
- __ evpxorq(xmm23, k0, xmm3, xmm12, false, Assembler::AVX_512bit);
+ // r1 in Quotient
+ // r1 = r1 & quotient; // copy 0 or keep as is, using EqMsk as filter
+ for (int i = 0; i < regCnt; i++) {
+ __ evpandd(Quotient[i], EqMsk[i], Quotient[i], zero, true, vector_len);
+ }
+ } else {
+ const XMMRegister qMinus1 = Tmp1[0];
+ __ vpsubd(qMinus1, dilithium_q, one, vector_len); // q - 1
- __ evpaddd(xmm8, k0, xmm8, xmm20, false, Assembler::AVX_512bit);
- __ evpaddd(xmm9, k0, xmm9, xmm21, false, Assembler::AVX_512bit);
- __ evpaddd(xmm10, k0, xmm10, xmm22, false, Assembler::AVX_512bit);
- __ evpaddd(xmm11, k0, xmm11, xmm23, false, Assembler::AVX_512bit);
- // r0 in xmm8
- // r1 = r1 & quotient;
- __ evpandd(xmm0, k0, xmm4, xmm0, false, Assembler::AVX_512bit);
- __ evpandd(xmm1, k0, xmm5, xmm1, false, Assembler::AVX_512bit);
- __ evpandd(xmm2, k0, xmm6, xmm2, false, Assembler::AVX_512bit);
- __ evpandd(xmm3, k0, xmm7, xmm3, false, Assembler::AVX_512bit);
- // r1 in xmm0
+ for (int i = 0; i < regCnt; i++) {
+ __ vpcmpeqd(Mask[i], RPlus[i], qMinus1, vector_len);
+ }
+
+ // r0 += ~r1;
+ // Mask already negated
+ for (int i = 0; i < regCnt; i++) {
+ __ vpaddd(R0[i], R0[i], Mask[i], vector_len);
+ }
+
+ // r1 in Quotient
+ // r1 = r1 & quotient;
+ for (int i = 0; i < regCnt; i++) {
+ __ vpandn(Quotient[i], Mask[i], Quotient[i], vector_len);
+ }
+ }
+
+ // r1 in Quotient
// lowPart[m] = r0;
// highPart[m] = r1;
- store4Xmms(highPart, 0, xmm0_3, _masm);
- store4Xmms(lowPart, 0, xmm8_11, _masm);
+ storeXmms(highPart, 0, Quotient, vector_len, _masm);
+ storeXmms(lowPart, 0, R0, vector_len, _masm);
- __ addptr(highPart, 4 * XMMBYTES);
- __ addptr(lowPart, 4 * XMMBYTES);
- __ subl(len, 4 * XMMBYTES);
+ __ addptr(highPart, memStep);
+ __ addptr(lowPart, memStep);
+ __ subl(len, memStep);
__ jcc(Assembler::notEqual, L_loop);
__ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -1018,17 +1322,21 @@ static address generate_dilithiumDecomposePoly_avx512(StubGenerator *stubgen,
}
void StubGenerator::generate_dilithium_stubs() {
+ int vector_len = Assembler::AVX_256bit;
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512bw()) {
+ vector_len = Assembler::AVX_512bit;
+ }
// Generate Dilithium intrinsics code
if (UseDilithiumIntrinsics) {
- StubRoutines::_dilithiumAlmostNtt =
- generate_dilithiumAlmostNtt_avx512(this, _masm);
- StubRoutines::_dilithiumAlmostInverseNtt =
- generate_dilithiumAlmostInverseNtt_avx512(this, _masm);
- StubRoutines::_dilithiumNttMult =
- generate_dilithiumNttMult_avx512(this, _masm);
- StubRoutines::_dilithiumMontMulByConstant =
- generate_dilithiumMontMulByConstant_avx512(this, _masm);
- StubRoutines::_dilithiumDecomposePoly =
- generate_dilithiumDecomposePoly_avx512(this, _masm);
+ StubRoutines::_dilithiumAlmostNtt =
+ generate_dilithiumAlmostNtt_avx(this, vector_len, _masm);
+ StubRoutines::_dilithiumAlmostInverseNtt =
+ generate_dilithiumAlmostInverseNtt_avx(this, vector_len, _masm);
+ StubRoutines::_dilithiumNttMult =
+ generate_dilithiumNttMult_avx(this, vector_len, _masm);
+ StubRoutines::_dilithiumMontMulByConstant =
+ generate_dilithiumMontMulByConstant_avx(this, vector_len, _masm);
+ StubRoutines::_dilithiumDecomposePoly =
+ generate_dilithiumDecomposePoly_avx(this, vector_len, _masm);
}
}
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 4961aed61c3..747daefd51d 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -1271,8 +1271,7 @@ void VM_Version::get_processor_features() {
}
// Dilithium Intrinsics
- // Currently we only have them for AVX512
- if (supports_evex() && supports_avx512bw()) {
+ if (UseAVX > 1) {
if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
UseDilithiumIntrinsics = true;
}
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index a9748617e1f..1d393897bca 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -2633,6 +2633,70 @@ bool Matcher::supports_vector_calling_convention(void) {
return EnableVectorSupport;
}
+static bool is_ndd_demotable(const MachNode* mdef) {
+ return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
+}
+
+static bool is_ndd_demotable_commutative(const MachNode* mdef) {
+ return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
+}
+
+static bool is_demotion_candidate(const MachNode* mdef) {
+ return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
+}
+
+bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
+ int oper_index) {
+ if (mdef == nullptr) {
+ return false;
+ }
+
+ if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
+ mdef->in(mdef->operand_index(oper_index)) == nullptr) {
+ assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
+ assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
+ return false;
+ }
+
+ // Complex memory operand covers multiple incoming edges needed for
+ // address computation. Biasing def towards any address component will not
+ // result in NDD demotion by assembler.
+ if (mdef->operand_num_edges(oper_index) != 1) {
+ assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
+ return false;
+ }
+
+ // Demotion candidate must be register mask compatible with definition.
+ const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
+ if (!oper_mask.overlap(mdef->out_RegMask())) {
+ assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
+ return false;
+ }
+
+ switch (oper_index) {
+ // First operand of MachNode corresponding to Intel APX NDD selection
+ // pattern can share its assigned register with definition operand if
+ // their live ranges do not overlap. In such a scenario we can demote
+ // it to legacy map0/map1 instruction by replacing its 4-byte extended
+ // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
+ // are decorated with a special flag by instruction selector.
+ case 1:
+ return is_demotion_candidate(mdef);
+
+ // Definition operand of commutative operation can be biased towards second
+ // operand.
+ case 2:
+ return is_ndd_demotable_commutative(mdef);
+
+ // Current scheme only selects up to two biasing candidates
+ default:
+ assert(false, "unhandled operand index: %s", mdef->Name());
+ break;
+ }
+
+ return false;
+}
+
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
assert(EnableVectorSupport, "sanity");
int lo = XMM0_num;
@@ -2767,21 +2831,11 @@ class HandlerImpl {
public:
- static int emit_exception_handler(C2_MacroAssembler *masm);
static int emit_deopt_handler(C2_MacroAssembler* masm);
- static uint size_exception_handler() {
- // NativeCall instruction size is the same as NativeJump.
- // exception handler starts out as jump and can be patched to
- // a call be deoptimization. (4932387)
- // Note that this value is also credited (in output.cpp) to
- // the size of the code section.
- return NativeJump::instruction_size;
- }
-
static uint size_deopt_handler() {
- // three 5 byte instructions plus one move for unreachable address.
- return 15+3;
+ // one call and one jmp.
+ return 7;
}
};
@@ -2822,7 +2876,7 @@ static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_byte
class Node::PD {
public:
- enum NodeFlags {
+ enum NodeFlags : uint64_t {
Flag_intel_jcc_erratum = Node::_last_flag << 1,
Flag_sets_carry_flag = Node::_last_flag << 2,
Flag_sets_parity_flag = Node::_last_flag << 3,
@@ -2834,7 +2888,9 @@ public:
Flag_clears_zero_flag = Node::_last_flag << 9,
Flag_clears_overflow_flag = Node::_last_flag << 10,
Flag_clears_sign_flag = Node::_last_flag << 11,
- _last_flag = Flag_clears_sign_flag
+ Flag_ndd_demotable = Node::_last_flag << 12,
+ Flag_ndd_demotable_commutative = Node::_last_flag << 13,
+ _last_flag = Flag_ndd_demotable_commutative
};
};
@@ -2873,24 +2929,6 @@ int MachNode::compute_padding(int current_offset) const {
}
}
-// Emit exception handler code.
-// Stuff framesize into a register and call a VM stub routine.
-int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
-
- // Note that the code buffer's insts_mark is always relative to insts.
- // That's why we must use the macroassembler to generate a handler.
- address base = __ start_a_stub(size_exception_handler());
- if (base == nullptr) {
- ciEnv::current()->record_failure("CodeCache is full");
- return 0; // CodeBuffer::expand failed
- }
- int offset = __ offset();
- __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
- __ end_a_stub();
- return offset;
-}
-
// Emit deopt handler code.
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
@@ -2903,21 +2941,20 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
}
int offset = __ offset();
- address the_pc = (address) __ pc();
- Label next;
- // push a "the_pc" on the stack without destroying any registers
- // as they all may be live.
+ Label start;
+ __ bind(start);
- // push address of "next"
- __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
- __ bind(next);
- // adjust it so it matches "the_pc"
- __ subptr(Address(rsp, 0), __ offset() - offset);
+ __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+ int entry_offset = __ offset();
+
+ __ jmp(start);
- __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
+ assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
+ "out of bounds read in post-call NOP check");
__ end_a_stub();
- return offset;
+ return entry_offset;
}
static Assembler::Width widthForType(BasicType bt) {
@@ -9830,7 +9867,7 @@ instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AddI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -9858,7 +9895,7 @@ instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AddI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -9901,7 +9938,7 @@ instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AddI src1 (LoadI src2)));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
@@ -9958,6 +9995,7 @@ instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddI src val));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "eincl $dst, $src\t# int ndd" %}
ins_encode %{
@@ -10012,6 +10050,7 @@ instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddI src val));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "edecl $dst, $src\t# int ndd" %}
ins_encode %{
@@ -10118,7 +10157,7 @@ instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AddL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -10146,7 +10185,7 @@ instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AddL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -10189,7 +10228,7 @@ instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AddL src1 (LoadL src2)));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
@@ -10245,6 +10284,7 @@ instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddL src val));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "eincq $dst, $src\t# long ndd" %}
ins_encode %{
@@ -10299,6 +10339,7 @@ instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddL src val));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "edecq $dst, $src\t# long ndd" %}
ins_encode %{
@@ -11013,7 +11054,7 @@ instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -11027,7 +11068,7 @@ instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -11070,7 +11111,7 @@ instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (SubI src1 (LoadI src2)));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
@@ -11128,7 +11169,7 @@ instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -11142,7 +11183,7 @@ instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (SubL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -11185,7 +11226,7 @@ instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (SubL src1 (LoadL src2)));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
@@ -11257,7 +11298,7 @@ instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubI zero src));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegl $dst, $src\t# int ndd" %}
ins_encode %{
@@ -11285,7 +11326,7 @@ instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (NegI src));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegl $dst, $src\t# int ndd" %}
ins_encode %{
@@ -11326,7 +11367,7 @@ instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubL zero src));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegq $dst, $src\t# long ndd" %}
ins_encode %{
@@ -11354,7 +11395,7 @@ instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (NegL src));
effect(KILL cr);
- flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
+ flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegq $dst, $src\t# long ndd" %}
ins_encode %{
@@ -11399,6 +11440,7 @@ instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (MulI src1 src2));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable_commutative);
ins_cost(300);
format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
@@ -11440,6 +11482,7 @@ instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (MulI src1 (LoadI src2)));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
ins_cost(350);
format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
@@ -11491,6 +11534,7 @@ instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (MulL src1 src2));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable_commutative);
ins_cost(300);
format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
@@ -11532,6 +11576,7 @@ instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (MulL src1 (LoadL src2)));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable_commutative);
ins_cost(350);
format %{ "eimulq $dst, $src1, $src2 \t# long" %}
@@ -11806,6 +11851,7 @@ instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftI src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
ins_encode %{
@@ -11834,6 +11880,7 @@ instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftI src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
ins_encode %{
@@ -11940,6 +11987,7 @@ instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (RShiftI src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
ins_encode %{
@@ -12046,6 +12094,7 @@ instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (URShiftI src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
ins_encode %{
@@ -12153,6 +12202,7 @@ instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftL src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@@ -12181,6 +12231,7 @@ instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftL src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@@ -12287,6 +12338,7 @@ instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (RShiftL src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@@ -12393,6 +12445,7 @@ instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (URShiftL src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@@ -12564,6 +12617,7 @@ instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
match(Set dst (RotateLeft src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
ins_encode %{
@@ -12628,6 +12682,7 @@ instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
match(Set dst (RotateRight src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
ins_encode %{
@@ -12680,6 +12735,7 @@ instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
match(Set dst (RotateLeft dst shift));
effect(KILL cr);
+
format %{ "rolq $dst, $shift" %}
ins_encode %{
__ rolq($dst$$Register);
@@ -12693,6 +12749,7 @@ instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
match(Set dst (RotateLeft src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
ins_encode %{
@@ -12757,6 +12814,7 @@ instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
match(Set dst (RotateRight src shift));
effect(KILL cr);
+ flag(PD::Flag_ndd_demotable);
format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
ins_encode %{
@@ -12834,7 +12892,7 @@ instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AndI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -12927,7 +12985,7 @@ instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AndI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -12971,7 +13029,7 @@ instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AndI src1 (LoadI src2)));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
@@ -13171,7 +13229,7 @@ instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -13200,7 +13258,7 @@ instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -13214,7 +13272,7 @@ instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
ins_encode %{
@@ -13258,7 +13316,7 @@ instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 (LoadI src2)));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
@@ -13334,7 +13392,7 @@ instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (XorI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -13360,6 +13418,7 @@ instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
%{
match(Set dst (XorI src imm));
predicate(UseAPX);
+ flag(PD::Flag_ndd_demotable);
format %{ "enotl $dst, $src" %}
ins_encode %{
@@ -13390,7 +13449,7 @@ instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
match(Set dst (XorI src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@@ -13436,7 +13495,7 @@ instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (XorI src1 (LoadI src2)));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
@@ -13515,7 +13574,7 @@ instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AndL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -13571,7 +13630,7 @@ instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AndL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -13615,7 +13674,7 @@ instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AndL src1 (LoadL src2)));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
@@ -13818,7 +13877,7 @@ instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -13873,7 +13932,7 @@ instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -13887,7 +13946,7 @@ instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
ins_encode %{
@@ -13932,7 +13991,7 @@ instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 (LoadL src2)));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
@@ -14011,7 +14070,7 @@ instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (XorL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -14037,6 +14096,7 @@ instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
%{
predicate(UseAPX);
match(Set dst (XorL src imm));
+ flag(PD::Flag_ndd_demotable);
format %{ "enotq $dst, $src" %}
ins_encode %{
@@ -14067,7 +14127,7 @@ instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
match(Set dst (XorL src1 src2));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@@ -14113,7 +14173,7 @@ instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (XorL src1 (LoadL src2)));
effect(KILL cr);
- flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
+ flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
@@ -16568,6 +16628,7 @@ instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
predicate(UseAPX);
match(Set dst (MinI src1 src2));
effect(DEF dst, USE src1, USE src2);
+ flag(PD::Flag_ndd_demotable);
ins_cost(200);
expand %{
@@ -16619,6 +16680,7 @@ instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
predicate(UseAPX);
match(Set dst (MaxI src1 src2));
effect(DEF dst, USE src1, USE src2);
+ flag(PD::Flag_ndd_demotable);
ins_cost(200);
expand %{
diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index 5f81912c0d6..48bd5e05816 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -1747,6 +1747,9 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
+void os::numa_set_thread_affinity(Thread *thread, int node) {
+}
+
void os::numa_make_global(char *addr, size_t bytes) {
}
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index 3e5fa8b84e1..0b37cb100f6 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -1581,6 +1581,9 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
+void os::numa_set_thread_affinity(Thread *thread, int node) {
+}
+
void os::numa_make_global(char *addr, size_t bytes) {
}
diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
index 8aeb64ef18c..f556bc57f26 100644
--- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
+++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
@@ -209,14 +209,14 @@ class CgroupV1Subsystem: public CgroupSubsystem {
bool pids_max(uint64_t& result) override;
bool pids_current(uint64_t& result) override;
- bool is_containerized();
+ bool is_containerized() override;
- const char * container_type() {
+ const char * container_type() override {
return "cgroupv1";
}
- CachingCgroupController* memory_controller() { return _memory; }
- CachingCgroupController* cpu_controller() { return _cpu; }
- CgroupCpuacctController* cpuacct_controller() { return _cpuacct; }
+ CachingCgroupController* memory_controller() override { return _memory; }
+ CachingCgroupController* cpu_controller() override { return _cpu; }
+ CgroupCpuacctController* cpuacct_controller() override { return _cpuacct; }
private:
/* controllers */
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index a345663dd5b..a1d957eb77d 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -159,9 +159,7 @@ physical_memory_size_type os::Linux::_physical_memory = 0;
address os::Linux::_initial_thread_stack_bottom = nullptr;
uintptr_t os::Linux::_initial_thread_stack_size = 0;
-int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = nullptr;
pthread_t os::Linux::_main_thread;
-bool os::Linux::_supports_fast_thread_cpu_time = false;
const char * os::Linux::_libc_version = nullptr;
const char * os::Linux::_libpthread_version = nullptr;
@@ -1475,29 +1473,6 @@ void os::Linux::capture_initial_stack(size_t max_size) {
////////////////////////////////////////////////////////////////////////////////
// time support
-void os::Linux::fast_thread_clock_init() {
- clockid_t clockid;
- struct timespec tp;
- int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
- (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
-
- // Switch to using fast clocks for thread cpu time if
- // the clock_getres() returns 0 error code.
- // Note, that some kernels may support the current thread
- // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
- // returned by the pthread_getcpuclockid().
- // If the fast POSIX clocks are supported then the clock_getres()
- // must return at least tp.tv_sec == 0 which means a resolution
- // better than 1 sec. This is extra check for reliability.
-
- if (pthread_getcpuclockid_func &&
- pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
- clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
- _supports_fast_thread_cpu_time = true;
- _pthread_getcpuclockid = pthread_getcpuclockid_func;
- }
-}
-
// thread_id is kernel thread id (similar to Solaris LWP id)
intx os::current_thread_id() { return os::Linux::gettid(); }
int os::current_process_id() {
@@ -1770,7 +1745,9 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
{EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"},
};
-#if (defined AMD64)
+#if (defined IA32)
+ static Elf32_Half running_arch_code=EM_386;
+#elif (defined AMD64) || (defined X32)
static Elf32_Half running_arch_code=EM_X86_64;
#elif (defined __sparc) && (defined _LP64)
static Elf32_Half running_arch_code=EM_SPARCV9;
@@ -1804,7 +1781,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
static Elf32_Half running_arch_code=EM_LOONGARCH;
#else
#error Method os::dll_load requires that one of following is defined:\
- AARCH64, ALPHA, ARM, AMD64, LOONGARCH64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, RISCV, S390, SH, __sparc
+ AARCH64, ALPHA, ARM, AMD64, IA32, LOONGARCH64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, RISCV, S390, SH, __sparc
#endif
// Identify compatibility class for VM's architecture and library's architecture
@@ -1866,6 +1843,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
}
void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
+#ifndef IA32
bool ieee_handling = IEEE_subnormal_handling_OK();
if (!ieee_handling) {
Events::log_dll_message(nullptr, "IEEE subnormal handling check failed before loading %s", filename);
@@ -1888,9 +1866,14 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
// numerical "accuracy", but we need to protect Java semantics first
// and foremost. See JDK-8295159.
+ // This workaround is ineffective on IA32 systems because the MXCSR
+ // register (which controls flush-to-zero mode) is not stored in the
+ // legacy fenv.
+
fenv_t default_fenv;
int rtn = fegetenv(&default_fenv);
assert(rtn == 0, "fegetenv must succeed");
+#endif // IA32
void* result;
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
@@ -1910,6 +1893,7 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
} else {
Events::log_dll_message(nullptr, "Loaded shared library %s", filename);
log_info(os)("shared library load of %s was successful", filename);
+#ifndef IA32
// Quickly test to make sure subnormals are correctly handled.
if (! IEEE_subnormal_handling_OK()) {
// We just dlopen()ed a library that mangled the floating-point flags.
@@ -1935,6 +1919,7 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
assert(false, "fesetenv didn't work");
}
}
+#endif // IA32
}
return result;
}
@@ -2433,6 +2418,7 @@ void os::Linux::print_uptime_info(outputStream* st) {
if (ret == 0) {
os::print_dhm(st, "OS uptime:", (long) sinfo.uptime);
}
+ assert(ret == 0, "sysinfo failed: %s", os::strerror(errno));
}
bool os::Linux::print_container_info(outputStream* st) {
@@ -2597,7 +2583,8 @@ void os::print_memory_info(outputStream* st) {
// values in struct sysinfo are "unsigned long"
struct sysinfo si;
- sysinfo(&si);
+ int ret = sysinfo(&si);
+ assert(ret == 0, "sysinfo failed: %s", os::strerror(errno));
physical_memory_size_type phys_mem = physical_memory();
st->print(", physical " PHYS_MEM_TYPE_FORMAT "k",
phys_mem >> 10);
@@ -2605,10 +2592,12 @@ void os::print_memory_info(outputStream* st) {
(void)os::available_memory(avail_mem);
st->print("(" PHYS_MEM_TYPE_FORMAT "k free)",
avail_mem >> 10);
- st->print(", swap " UINT64_FORMAT "k",
- ((jlong)si.totalswap * si.mem_unit) >> 10);
- st->print("(" UINT64_FORMAT "k free)",
- ((jlong)si.freeswap * si.mem_unit) >> 10);
+ if (ret == 0) {
+ st->print(", swap " UINT64_FORMAT "k",
+ ((jlong)si.totalswap * si.mem_unit) >> 10);
+ st->print("(" UINT64_FORMAT "k free)",
+ ((jlong)si.freeswap * si.mem_unit) >> 10);
+ }
st->cr();
st->print("Page Sizes: ");
_page_sizes.print_on(st);
@@ -2991,6 +2980,10 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
+void os::numa_set_thread_affinity(Thread* thread, int node) {
+ Linux::numa_set_thread_affinity(thread->osthread()->thread_id(), node);
+}
+
void os::numa_make_global(char *addr, size_t bytes) {
Linux::numa_interleave_memory(addr, bytes);
}
@@ -3173,6 +3166,8 @@ bool os::Linux::libnuma_init() {
libnuma_dlsym(handle, "numa_set_bind_policy")));
set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t,
libnuma_dlsym(handle, "numa_bitmask_isbitset")));
+ set_numa_bitmask_clearbit(CAST_TO_FN_PTR(numa_bitmask_clearbit_func_t,
+ libnuma_dlsym(handle, "numa_bitmask_clearbit")));
set_numa_bitmask_equal(CAST_TO_FN_PTR(numa_bitmask_equal_func_t,
libnuma_dlsym(handle, "numa_bitmask_equal")));
set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
@@ -3187,20 +3182,32 @@ bool os::Linux::libnuma_init() {
libnuma_dlsym(handle, "numa_set_preferred")));
set_numa_get_run_node_mask(CAST_TO_FN_PTR(numa_get_run_node_mask_func_t,
libnuma_v2_dlsym(handle, "numa_get_run_node_mask")));
+ set_numa_sched_setaffinity(CAST_TO_FN_PTR(numa_sched_setaffinity_func_t,
+ libnuma_v2_dlsym(handle, "numa_sched_setaffinity")));
+ set_numa_allocate_cpumask(CAST_TO_FN_PTR(numa_allocate_cpumask_func_t,
+ libnuma_v2_dlsym(handle, "numa_allocate_cpumask")));
if (numa_available() != -1) {
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
+ set_numa_all_cpus_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_cpus_ptr"));
set_numa_interleave_bitmask(_numa_get_interleave_mask());
set_numa_membind_bitmask(_numa_get_membind());
set_numa_cpunodebind_bitmask(_numa_get_run_node_mask());
+
// Create an index -> node mapping, since nodes are not always consecutive
_nindex_to_node = new (mtInternal) GrowableArray(0, mtInternal);
rebuild_nindex_to_node_map();
+
// Create a cpu -> node mapping
_cpu_to_node = new (mtInternal) GrowableArray(0, mtInternal);
rebuild_cpu_to_node_map();
+
+ // Create a node -> CPUs mapping
+ _numa_affinity_masks = new (mtInternal) GrowableArray(0, mtInternal);
+ build_numa_affinity_masks();
+
return true;
}
}
@@ -3236,6 +3243,42 @@ size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
return ((thr_type == java_thread || thr_type == compiler_thread) ? 0 : os::vm_page_size());
}
+void os::Linux::build_numa_affinity_masks() {
+ // We only build the affinity masks if running libnuma v2 (_numa_node_to_cpus_v2
+ // is available) and we have the affinity mask of the process when it started.
+ if (_numa_node_to_cpus_v2 == nullptr || _numa_all_cpus_ptr == nullptr) {
+ return;
+ }
+
+ // It's important that we respect any user configuration by removing the
+ // CPUs we're not allowed to run on from the affinity mask. For example,
+ // if the user runs the JVM with "numactl -C 0-1,4-5" on a machine with
+ // the following NUMA setup:
+ // NUMA 0: CPUs 0-3, NUMA 1: CPUs 4-7
+ // We expect to get the following affinity masks:
+ // Affinity masks: idx 0 = (0, 1), idx 1 = (4, 5)
+
+ const int num_nodes = get_existing_num_nodes();
+ const unsigned num_cpus = (unsigned)os::processor_count();
+
+ for (int i = 0; i < num_nodes; i++) {
+ struct bitmask* affinity_mask = _numa_allocate_cpumask();
+
+ // Fill the affinity mask with all CPUs belonging to NUMA node i
+ _numa_node_to_cpus_v2(i, affinity_mask);
+
+ // Clear the bits of all CPUs that the process is not allowed to
+ // execute tasks on
+ for (unsigned j = 0; j < num_cpus; j++) {
+ if (!_numa_bitmask_isbitset(_numa_all_cpus_ptr, j)) {
+ _numa_bitmask_clearbit(affinity_mask, j);
+ }
+ }
+
+ _numa_affinity_masks->push(affinity_mask);
+ }
+}
+
void os::Linux::rebuild_nindex_to_node_map() {
int highest_node_number = Linux::numa_max_node();
@@ -3351,6 +3394,25 @@ int os::Linux::numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen)
return -1;
}
+void os::Linux::numa_set_thread_affinity(pid_t tid, int node) {
+ // We only set affinity if running libnuma v2 (_numa_sched_setaffinity
+ // is available) and we have all affinity mask
+ if (_numa_sched_setaffinity == nullptr ||
+ _numa_all_cpus_ptr == nullptr ||
+ _numa_affinity_masks->is_empty()) {
+ return;
+ }
+
+ if (node == -1) {
+ // If the node is -1, the affinity is reverted to the original affinity
+ // of the thread when the VM was started
+ _numa_sched_setaffinity(tid, _numa_all_cpus_ptr);
+ } else {
+ // Normal case, set the affinity to the corresponding affinity mask
+ _numa_sched_setaffinity(tid, _numa_affinity_masks->at(node));
+ }
+}
+
int os::Linux::get_node_by_cpu(int cpu_id) {
if (cpu_to_node() != nullptr && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
return cpu_to_node()->at(cpu_id);
@@ -3360,6 +3422,7 @@ int os::Linux::get_node_by_cpu(int cpu_id) {
GrowableArray* os::Linux::_cpu_to_node;
GrowableArray* os::Linux::_nindex_to_node;
+GrowableArray* os::Linux::_numa_affinity_masks;
os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
os::Linux::numa_node_to_cpus_v2_func_t os::Linux::_numa_node_to_cpus_v2;
@@ -3371,17 +3434,21 @@ os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v2;
os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
+os::Linux::numa_bitmask_clearbit_func_t os::Linux::_numa_bitmask_clearbit;
os::Linux::numa_bitmask_equal_func_t os::Linux::_numa_bitmask_equal;
os::Linux::numa_distance_func_t os::Linux::_numa_distance;
os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
os::Linux::numa_get_run_node_mask_func_t os::Linux::_numa_get_run_node_mask;
+os::Linux::numa_sched_setaffinity_func_t os::Linux::_numa_sched_setaffinity;
+os::Linux::numa_allocate_cpumask_func_t os::Linux::_numa_allocate_cpumask;
os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages;
os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred;
os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
unsigned long* os::Linux::_numa_all_nodes;
struct bitmask* os::Linux::_numa_all_nodes_ptr;
struct bitmask* os::Linux::_numa_nodes_ptr;
+struct bitmask* os::Linux::_numa_all_cpus_ptr;
struct bitmask* os::Linux::_numa_interleave_bitmask;
struct bitmask* os::Linux::_numa_membind_bitmask;
struct bitmask* os::Linux::_numa_cpunodebind_bitmask;
@@ -4236,7 +4303,7 @@ OSReturn os::get_native_priority(const Thread* const thread,
// For reference, please, see IEEE Std 1003.1-2004:
// http://www.unix.org/single_unix_specification
-jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
+jlong os::Linux::total_thread_cpu_time(clockid_t clockid) {
struct timespec tp;
int status = clock_gettime(clockid, &tp);
assert(status == 0, "clock_gettime error: %s", os::strerror(errno));
@@ -4464,8 +4531,6 @@ jint os::init_2(void) {
os::Posix::init_2();
- Linux::fast_thread_clock_init();
-
if (PosixSignals::init() == JNI_ERR) {
return JNI_ERR;
}
@@ -4893,14 +4958,14 @@ int os::open(const char *path, int oflag, int mode) {
return fd;
}
-static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
+static jlong user_thread_cpu_time(Thread *thread);
-static jlong fast_cpu_time(Thread *thread) {
+static jlong total_thread_cpu_time(Thread *thread) {
clockid_t clockid;
- int rc = os::Linux::pthread_getcpuclockid(thread->osthread()->pthread_id(),
+ int rc = pthread_getcpuclockid(thread->osthread()->pthread_id(),
&clockid);
if (rc == 0) {
- return os::Linux::fast_thread_cpu_time(clockid);
+ return os::Linux::total_thread_cpu_time(clockid);
} else {
// It's possible to encounter a terminated native thread that failed
// to detach itself from the VM - which should result in ESRCH.
@@ -4917,41 +4982,31 @@ static jlong fast_cpu_time(Thread *thread) {
// the fast estimate available on the platform.
jlong os::current_thread_cpu_time() {
- if (os::Linux::supports_fast_thread_cpu_time()) {
- return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
- } else {
- // return user + sys since the cost is the same
- return slow_thread_cpu_time(Thread::current(), true /* user + sys */);
- }
+ return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
}
jlong os::thread_cpu_time(Thread* thread) {
- // consistent with what current_thread_cpu_time() returns
- if (os::Linux::supports_fast_thread_cpu_time()) {
- return fast_cpu_time(thread);
- } else {
- return slow_thread_cpu_time(thread, true /* user + sys */);
- }
+ return total_thread_cpu_time(thread);
}
jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
- if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
- return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
+ if (user_sys_cpu_time) {
+ return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
} else {
- return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time);
+ return user_thread_cpu_time(Thread::current());
}
}
jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
- if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
- return fast_cpu_time(thread);
+ if (user_sys_cpu_time) {
+ return total_thread_cpu_time(thread);
} else {
- return slow_thread_cpu_time(thread, user_sys_cpu_time);
+ return user_thread_cpu_time(thread);
}
}
// -1 on error.
-static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
+static jlong user_thread_cpu_time(Thread *thread) {
pid_t tid = thread->osthread()->thread_id();
char *s;
char stat[2048];
@@ -4988,11 +5043,8 @@ static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
&ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
&user_time, &sys_time);
if (count != 13) return -1;
- if (user_sys_cpu_time) {
- return ((jlong)sys_time + (jlong)user_time) * (1000000000 / os::Posix::clock_tics_per_second());
- } else {
- return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
- }
+
+ return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
}
void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
@@ -5071,7 +5123,7 @@ int os::get_core_path(char* buffer, size_t bufferSize) {
if (core_pattern[0] == '|') {
written = jio_snprintf(buffer, bufferSize,
- "\"%s\" (or dumping to %s/core.%d)",
+ "\"%s\" (alternatively, falling back to %s/core.%d)",
&core_pattern[1], p, current_process_id());
} else if (pid_pos != nullptr) {
*pid_pos = '\0';
diff --git a/src/hotspot/os/linux/os_linux.hpp b/src/hotspot/os/linux/os_linux.hpp
index df96a17d8e9..dd07cb600b9 100644
--- a/src/hotspot/os/linux/os_linux.hpp
+++ b/src/hotspot/os/linux/os_linux.hpp
@@ -32,19 +32,19 @@
class os::Linux {
friend class os;
- static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
-
static address _initial_thread_stack_bottom;
static uintptr_t _initial_thread_stack_size;
static const char *_libc_version;
static const char *_libpthread_version;
- static bool _supports_fast_thread_cpu_time;
-
static GrowableArray* _cpu_to_node;
static GrowableArray* _nindex_to_node;
+ static GrowableArray* _numa_affinity_masks;
+
+ static void build_numa_affinity_masks();
+
protected:
static physical_memory_size_type _physical_memory;
@@ -142,18 +142,7 @@ class os::Linux {
static bool manually_expand_stack(JavaThread * t, address addr);
static void expand_stack_to(address bottom);
- // fast POSIX clocks support
- static void fast_thread_clock_init(void);
-
- static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
- return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
- }
-
- static bool supports_fast_thread_cpu_time() {
- return _supports_fast_thread_cpu_time;
- }
-
- static jlong fast_thread_cpu_time(clockid_t clockid);
+ static jlong total_thread_cpu_time(clockid_t clockid);
static jlong sendfile(int out_fd, int in_fd, jlong* offset, jlong count);
@@ -230,8 +219,11 @@ class os::Linux {
typedef void (*numa_set_preferred_func_t)(int node);
typedef void (*numa_set_bind_policy_func_t)(int policy);
typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
+ typedef int (*numa_bitmask_clearbit_func_t)(struct bitmask *bmp, unsigned int n);
typedef int (*numa_bitmask_equal_func_t)(struct bitmask *bmp1, struct bitmask *bmp2);
typedef int (*numa_distance_func_t)(int node1, int node2);
+ typedef int (*numa_sched_setaffinity_func_t)(pid_t pid, struct bitmask* mask);
+ typedef struct bitmask* (*numa_allocate_cpumask_func_t)(void);
static sched_getcpu_func_t _sched_getcpu;
static numa_node_to_cpus_func_t _numa_node_to_cpus;
@@ -244,6 +236,7 @@ class os::Linux {
static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
static numa_set_bind_policy_func_t _numa_set_bind_policy;
static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
+ static numa_bitmask_clearbit_func_t _numa_bitmask_clearbit;
static numa_bitmask_equal_func_t _numa_bitmask_equal;
static numa_distance_func_t _numa_distance;
static numa_get_membind_func_t _numa_get_membind;
@@ -251,9 +244,12 @@ class os::Linux {
static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
static numa_move_pages_func_t _numa_move_pages;
static numa_set_preferred_func_t _numa_set_preferred;
+ static numa_sched_setaffinity_func_t _numa_sched_setaffinity;
+ static numa_allocate_cpumask_func_t _numa_allocate_cpumask;
static unsigned long* _numa_all_nodes;
static struct bitmask* _numa_all_nodes_ptr;
static struct bitmask* _numa_nodes_ptr;
+ static struct bitmask* _numa_all_cpus_ptr;
static struct bitmask* _numa_interleave_bitmask;
static struct bitmask* _numa_membind_bitmask;
static struct bitmask* _numa_cpunodebind_bitmask;
@@ -269,6 +265,7 @@ class os::Linux {
static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
+ static void set_numa_bitmask_clearbit(numa_bitmask_clearbit_func_t func) { _numa_bitmask_clearbit = func; }
static void set_numa_bitmask_equal(numa_bitmask_equal_func_t func) { _numa_bitmask_equal = func; }
static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
@@ -279,9 +276,12 @@ class os::Linux {
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
+ static void set_numa_all_cpus_ptr(struct bitmask **ptr) { _numa_all_cpus_ptr = (ptr == nullptr ? nullptr : *ptr); }
static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; }
static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; }
static void set_numa_cpunodebind_bitmask(struct bitmask* ptr) { _numa_cpunodebind_bitmask = ptr ; }
+ static void set_numa_sched_setaffinity(numa_sched_setaffinity_func_t func) { _numa_sched_setaffinity = func; }
+ static void set_numa_allocate_cpumask(numa_allocate_cpumask_func_t func) { _numa_allocate_cpumask = func; }
static int sched_getcpu_syscall(void);
enum NumaAllocationPolicy{
@@ -292,6 +292,8 @@ class os::Linux {
static NumaAllocationPolicy _current_numa_policy;
public:
+ static void numa_set_thread_affinity(pid_t tid, int node);
+
static int sched_getcpu() { return _sched_getcpu != nullptr ? _sched_getcpu() : -1; }
static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
static int numa_max_node() { return _numa_max_node != nullptr ? _numa_max_node() : -1; }
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index 1a04cbba0de..8f1f07dd055 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -108,41 +108,60 @@ size_t os::_os_min_stack_allowed = PTHREAD_STACK_MIN;
// Check core dump limit and report possible place where core can be found
void os::check_core_dump_prerequisites(char* buffer, size_t bufferSize, bool check_only) {
+ stringStream buf(buffer, bufferSize);
if (!FLAG_IS_DEFAULT(CreateCoredumpOnCrash) && !CreateCoredumpOnCrash) {
- jio_snprintf(buffer, bufferSize, "CreateCoredumpOnCrash is disabled from command line");
- VMError::record_coredump_status(buffer, false);
+ buf.print("CreateCoredumpOnCrash is disabled from command line");
+ VMError::record_coredump_status(buf.freeze(), false);
} else {
struct rlimit rlim;
bool success = true;
bool warn = true;
char core_path[PATH_MAX];
if (get_core_path(core_path, PATH_MAX) <= 0) {
- jio_snprintf(buffer, bufferSize, "core.%d (may not exist)", current_process_id());
+ // In the warning message, let the user know.
+ if (check_only) {
+ buf.print("the core path couldn't be determined. It commonly defaults to ");
+ }
+ buf.print("core.%d%s", current_process_id(), check_only ? "" : " (may not exist)");
#ifdef LINUX
} else if (core_path[0] == '"') { // redirect to user process
- jio_snprintf(buffer, bufferSize, "Core dumps may be processed with %s", core_path);
+ if (check_only) {
+ buf.print("core dumps may be further processed by the following: ");
+ } else {
+ buf.print("Determined by the following: ");
+ }
+ buf.print("%s", core_path);
#endif
} else if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
- jio_snprintf(buffer, bufferSize, "%s (may not exist)", core_path);
+ if (check_only) {
+ buf.print("the rlimit couldn't be determined. If resource limits permit, the core dump will be located at ");
+ }
+ buf.print("%s%s", core_path, check_only ? "" : " (may not exist)");
} else {
switch(rlim.rlim_cur) {
case RLIM_INFINITY:
- jio_snprintf(buffer, bufferSize, "%s", core_path);
+ buf.print("%s", core_path);
warn = false;
break;
case 0:
- jio_snprintf(buffer, bufferSize, "Core dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again");
+ buf.print("%s dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again", check_only ? "core" : "Core");
success = false;
break;
default:
- jio_snprintf(buffer, bufferSize, "%s (max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", core_path, uint64_t(rlim.rlim_cur) / K);
+ if (check_only) {
+ buf.print("core dumps are constrained ");
+ } else {
+ buf.print( "%s ", core_path);
+ }
+ buf.print( "(max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", uint64_t(rlim.rlim_cur) / K);
break;
}
}
+ const char* result = buf.freeze();
if (!check_only) {
- VMError::record_coredump_status(buffer, success);
+ VMError::record_coredump_status(result, success);
} else if (warn) {
- warning("CreateCoredumpOnCrash specified, but %s", buffer);
+ warning("CreateCoredumpOnCrash specified, but %s", result);
}
}
}
diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp
index 5833e324070..625eb63445a 100644
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@@ -621,7 +621,7 @@ int JVM_HANDLE_XXX_SIGNAL(int sig, siginfo_t* info,
if (cb != nullptr && cb->is_nmethod()) {
nmethod* nm = cb->as_nmethod();
assert(nm->insts_contains_inclusive(pc), "");
- address deopt = nm->deopt_handler_begin();
+ address deopt = nm->deopt_handler_entry();
assert(deopt != nullptr, "");
frame fr = os::fetch_frame_from_context(uc);
diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp
index ce2baeaf46c..8a450a291d3 100644
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -2795,7 +2795,7 @@ LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo) {
if (cb != nullptr && cb->is_nmethod()) {
nmethod* nm = cb->as_nmethod();
frame fr = os::fetch_frame_from_context((void*)exceptionInfo->ContextRecord);
- address deopt = nm->deopt_handler_begin();
+ address deopt = nm->deopt_handler_entry();
assert(nm->insts_contains_inclusive(pc), "");
nm->set_original_pc(&fr, pc);
// Set pc to handler
@@ -3752,6 +3752,7 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
return page_size;
}
+void os::numa_set_thread_affinity(Thread *thread, int node) { }
void os::numa_make_global(char *addr, size_t bytes) { }
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { }
size_t os::numa_get_groups_num() { return MAX2(numa_node_list_holder.get_count(), 1); }
diff --git a/src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp b/src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp
index 3d2c632ace8..67701775f94 100644
--- a/src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp
+++ b/src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp
@@ -52,12 +52,16 @@ struct AtomicAccess::PlatformAdd {
}
};
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template
template
inline T AtomicAccess::PlatformXchg::operator()(T volatile* dest,
T exchange_value,
atomic_memory_order order) const {
STATIC_ASSERT(byte_size == sizeof(T));
+ STATIC_ASSERT(byte_size == 4 || byte_size == 8);
T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
FULL_MEM_BARRIER;
return res;
diff --git a/src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp
index 1024c6b1418..29471300f3d 100644
--- a/src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp
+++ b/src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp
@@ -52,6 +52,9 @@ inline D AtomicAccess::PlatformAdd<4>::fetch_then_add(D volatile* dest, I add_va
return old_value;
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp b/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
index 6a720dac54e..6c8684718fc 100644
--- a/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
@@ -66,6 +66,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
return res;
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp
index 6e5f53edfa3..4ddb2b758b4 100644
--- a/src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp
@@ -113,6 +113,9 @@ inline D AtomicAccess::PlatformAdd<8>::fetch_then_add(D volatile* dest, I add_va
return atomic_fastcall(stub, dest, add_value);
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
index 5b5f9da51a6..390207f9e5e 100644
--- a/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
@@ -118,6 +118,8 @@ inline D AtomicAccess::PlatformAdd<4>::add_then_fetch(D volatile* dest, I add_va
return add_using_helper(ARMAtomicFuncs::_add_func, dest, add_value);
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template
diff --git a/src/hotspot/os_cpu/linux_arm/macroAssembler_linux_arm_32.cpp b/src/hotspot/os_cpu/linux_arm/macroAssembler_linux_arm_32.cpp
index e74daaa6d66..e4737191cfc 100644
--- a/src/hotspot/os_cpu/linux_arm/macroAssembler_linux_arm_32.cpp
+++ b/src/hotspot/os_cpu/linux_arm/macroAssembler_linux_arm_32.cpp
@@ -246,9 +246,9 @@ void MacroAssembler::atomic_cas64(Register memval_lo, Register memval_hi, Regist
Label loop;
assert_different_registers(memval_lo, memval_hi, result, oldval_lo,
oldval_hi, newval_lo, newval_hi, base);
- assert(memval_hi == memval_lo + 1 && memval_lo < R9, "cmpxchg_long: illegal registers");
- assert(oldval_hi == oldval_lo + 1 && oldval_lo < R9, "cmpxchg_long: illegal registers");
- assert(newval_hi == newval_lo + 1 && newval_lo < R9, "cmpxchg_long: illegal registers");
+ assert(memval_hi == as_Register(memval_lo->encoding() + 1) && memval_lo->encoding() < R9->encoding(), "cmpxchg_long: illegal registers");
+ assert(oldval_hi == as_Register(oldval_lo->encoding() + 1) && oldval_lo->encoding() < R9->encoding(), "cmpxchg_long: illegal registers");
+ assert(newval_hi == as_Register(newval_lo->encoding() + 1) && newval_lo->encoding() < R9->encoding(), "cmpxchg_long: illegal registers");
assert(result != R10, "cmpxchg_long: illegal registers");
assert(base != R10, "cmpxchg_long: illegal registers");
diff --git a/src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp
index 6d57ea55a83..bdbc0b8ac7f 100644
--- a/src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp
@@ -152,6 +152,9 @@ inline T AtomicAccess::PlatformCmpxchg<4>::operator()(T volatile* dest __attribu
}
#endif
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template
template
inline T AtomicAccess::PlatformXchg::operator()(T volatile* dest,
@@ -164,6 +167,7 @@ inline T AtomicAccess::PlatformXchg::operator()(T volatile* dest,
#endif
STATIC_ASSERT(byte_size == sizeof(T));
+ STATIC_ASSERT(byte_size == 4 || byte_size == 8);
if (order != memory_order_relaxed) {
FULL_MEM_BARRIER;
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
index 0799de014a9..35cbb75e8ff 100644
--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -104,11 +104,15 @@ uint32_t VM_Version::cpu_vector_length() {
}
void VM_Version::RVExtFeatureValue::log_enabled() {
- log_debug(os, cpu)("Enabled RV64 feature \"%s\"", pretty());
+ log_info(os, cpu)("Enabled RV64 feature \"%s\"", pretty());
+}
+
+void VM_Version::RVExtFeatureValue::log_disabled(const char* reason) {
+ log_info(os, cpu)("Disabled RV64 feature \"%s\" (%s)", pretty(), reason);
}
void VM_Version::RVNonExtFeatureValue::log_enabled() {
- log_debug(os, cpu)("Enabled RV64 feature \"%s\" (%ld)", pretty(), value());
+ log_info(os, cpu)("Enabled RV64 feature \"%s\" (%ld)", pretty(), value());
}
void VM_Version::setup_cpu_available_features() {
@@ -193,7 +197,7 @@ void VM_Version::setup_cpu_available_features() {
// via PR_RISCV_SCOPE_PER_THREAD, i.e. on VM attach/deattach.
int ret = prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
if (ret == 0) {
- log_debug(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) enabled.");
+ log_info(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) enabled.");
} else {
FLAG_SET_ERGO(UseCtxFencei, false);
log_info(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) disabled, unsupported by kernel.");
diff --git a/src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp b/src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp
index 5849d69ae2f..f3c1e8f1a2c 100644
--- a/src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp
+++ b/src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp
@@ -209,6 +209,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I inc,
//
// The return value is the (unchanged) value from memory as it was when the
// replacement succeeded.
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp
index dd91444d0a3..6b43b5e8e09 100644
--- a/src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp
+++ b/src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp
@@ -52,6 +52,9 @@ inline D AtomicAccess::PlatformAdd<4>::fetch_then_add(D volatile* dest, I add_va
return old_value;
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp b/src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp
index 376ef7a9dc9..96c46c6f59a 100644
--- a/src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp
+++ b/src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp
@@ -65,6 +65,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
return res;
}
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
template<>
template
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
diff --git a/src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp
index 62b6e3f87ec..f8119654c50 100644
--- a/src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp
+++ b/src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp
@@ -68,6 +68,9 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
#undef DEFINE_INTRINSIC_ADD
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \
template<> \
template \
@@ -75,6 +78,8 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
T exchange_value, \
atomic_memory_order order) const { \
STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \
+ STATIC_ASSERT(sizeof(IntrinsicType) == 4 || \
+ sizeof(IntrinsicType) == 8); \
return PrimitiveConversions::cast( \
IntrinsicName(reinterpret_cast(dest), \
PrimitiveConversions::cast(exchange_value))); \
diff --git a/src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp
index a95da151688..aa78a401235 100644
--- a/src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp
@@ -70,6 +70,9 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
#undef DEFINE_INTRINSIC_ADD
+template<>
+struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
+
#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \
template<> \
template \
@@ -77,6 +80,8 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
T exchange_value, \
atomic_memory_order order) const { \
STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \
+ STATIC_ASSERT(sizeof(IntrinsicType) == 4 || \
+ sizeof(IntrinsicType) == 8); \
return PrimitiveConversions::cast( \
IntrinsicName(reinterpret_cast(dest), \
PrimitiveConversions::cast(exchange_value))); \
diff --git a/src/hotspot/share/cds/aotConstantPoolResolver.cpp b/src/hotspot/share/cds/aotConstantPoolResolver.cpp
index ddf7d32ed70..c4bb26f6fb1 100644
--- a/src/hotspot/share/cds/aotConstantPoolResolver.cpp
+++ b/src/hotspot/share/cds/aotConstantPoolResolver.cpp
@@ -449,7 +449,7 @@ bool AOTConstantPoolResolver::check_lambda_metafactory_signature(ConstantPool* c
}
bool AOTConstantPoolResolver::check_lambda_metafactory_methodtype_arg(ConstantPool* cp, int bsms_attribute_index, int arg_i) {
- int mt_index = cp->bsm_attribute_entry(bsms_attribute_index)->argument_index(arg_i);
+ int mt_index = cp->bsm_attribute_entry(bsms_attribute_index)->argument(arg_i);
if (!cp->tag_at(mt_index).is_method_type()) {
// malformed class?
return false;
@@ -465,7 +465,7 @@ bool AOTConstantPoolResolver::check_lambda_metafactory_methodtype_arg(ConstantPo
}
bool AOTConstantPoolResolver::check_lambda_metafactory_methodhandle_arg(ConstantPool* cp, int bsms_attribute_index, int arg_i) {
- int mh_index = cp->bsm_attribute_entry(bsms_attribute_index)->argument_index(arg_i);
+ int mh_index = cp->bsm_attribute_entry(bsms_attribute_index)->argument(arg_i);
if (!cp->tag_at(mh_index).is_method_handle()) {
// malformed class?
return false;
diff --git a/src/hotspot/share/cds/aotMetaspace.cpp b/src/hotspot/share/cds/aotMetaspace.cpp
index 42d41e6ae89..f56050d4d31 100644
--- a/src/hotspot/share/cds/aotMetaspace.cpp
+++ b/src/hotspot/share/cds/aotMetaspace.cpp
@@ -114,6 +114,7 @@ intx AOTMetaspace::_relocation_delta;
char* AOTMetaspace::_requested_base_address;
Array* AOTMetaspace::_archived_method_handle_intrinsics = nullptr;
bool AOTMetaspace::_use_optimized_module_handling = true;
+int volatile AOTMetaspace::_preimage_static_archive_dumped = 0;
FileMapInfo* AOTMetaspace::_output_mapinfo = nullptr;
// The CDS archive is divided into the following regions:
@@ -1056,7 +1057,21 @@ void AOTMetaspace::exercise_runtime_cds_code(TRAPS) {
CDSProtectionDomain::to_file_URL("dummy.jar", Handle(), CHECK);
}
+bool AOTMetaspace::preimage_static_archive_dumped() {
+ assert(CDSConfig::is_dumping_preimage_static_archive(), "Required");
+ return AtomicAccess::load_acquire(&_preimage_static_archive_dumped) == 1;
+}
+
void AOTMetaspace::dump_static_archive_impl(StaticArchiveBuilder& builder, TRAPS) {
+ if (CDSConfig::is_dumping_preimage_static_archive()) {
+ // When dumping to the AOT configuration file ensure this function is only executed once.
+ // Multiple invocations may happen via JCmd, during VM exit or other means (in the future)
+ // from different threads and possibly concurrently.
+ if (AtomicAccess::cmpxchg(&_preimage_static_archive_dumped, 0, 1) != 0) {
+ return;
+ }
+ }
+
if (CDSConfig::is_dumping_classic_static_archive()) {
// We are running with -Xshare:dump
load_classes(CHECK);
@@ -1355,8 +1370,11 @@ bool AOTMetaspace::try_link_class(JavaThread* current, InstanceKlass* ik) {
ik->link_class(THREAD);
if (HAS_PENDING_EXCEPTION) {
ResourceMark rm(THREAD);
- aot_log_warning(aot)("Preload Warning: Verification failed for %s",
- ik->external_name());
+ oop message = java_lang_Throwable::message(current->pending_exception());
+ aot_log_warning(aot)("Preload Warning: Verification failed for %s because a %s was thrown: %s",
+ ik->external_name(),
+ current->pending_exception()->klass()->external_name(),
+ message == nullptr ? "(no message)" : java_lang_String::as_utf8_string(message));
CLEAR_PENDING_EXCEPTION;
SystemDictionaryShared::set_class_has_failed_verification(ik);
} else {
diff --git a/src/hotspot/share/cds/aotMetaspace.hpp b/src/hotspot/share/cds/aotMetaspace.hpp
index 1712a7865ad..ab78787288f 100644
--- a/src/hotspot/share/cds/aotMetaspace.hpp
+++ b/src/hotspot/share/cds/aotMetaspace.hpp
@@ -60,6 +60,7 @@ class AOTMetaspace : AllStatic {
static char* _requested_base_address;
static bool _use_optimized_module_handling;
static Array* _archived_method_handle_intrinsics;
+ static int volatile _preimage_static_archive_dumped;
static FileMapInfo* _output_mapinfo;
public:
@@ -115,6 +116,8 @@ public:
// inside the metaspace of the dynamic static CDS archive
static bool in_aot_cache_dynamic_region(void* p) NOT_CDS_RETURN_(false);
+ static bool preimage_static_archive_dumped() NOT_CDS_RETURN_(false);
+
static void unrecoverable_loading_error(const char* message = "unrecoverable error");
static void report_loading_error(const char* format, ...) ATTRIBUTE_PRINTF(1, 0);
static void unrecoverable_writing_error(const char* message = nullptr);
diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
index 79ab881e7f6..92bacc4c2c3 100644
--- a/src/hotspot/share/ci/ciEnv.cpp
+++ b/src/hotspot/share/ci/ciEnv.cpp
@@ -1057,7 +1057,9 @@ void ciEnv::register_method(ciMethod* target,
}
assert(offsets->value(CodeOffsets::Deopt) != -1, "must have deopt entry");
- assert(offsets->value(CodeOffsets::Exceptions) != -1, "must have exception entry");
+
+ assert(compiler->type() == compiler_c2 ||
+ offsets->value(CodeOffsets::Exceptions) != -1, "must have exception entry");
nm = nmethod::new_nmethod(method,
compile_id(),
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index eb8a2a389b9..68890775051 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -47,6 +47,7 @@
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "oops/annotations.hpp"
+#include "oops/bsmAttribute.inline.hpp"
#include "oops/constantPool.inline.hpp"
#include "oops/fieldInfo.hpp"
#include "oops/fieldStreams.inline.hpp"
@@ -3298,8 +3299,9 @@ void ClassFileParser::parse_classfile_bootstrap_methods_attribute(const ClassFil
TRAPS) {
assert(cfs != nullptr, "invariant");
assert(cp != nullptr, "invariant");
+ const int cp_size = cp->length();
- const u1* const current_start = cfs->current();
+ const u1* const current_before_parsing = cfs->current();
guarantee_property(attribute_byte_length >= sizeof(u2),
"Invalid BootstrapMethods attribute length %u in class file %s",
@@ -3308,57 +3310,40 @@ void ClassFileParser::parse_classfile_bootstrap_methods_attribute(const ClassFil
cfs->guarantee_more(attribute_byte_length, CHECK);
- const int attribute_array_length = cfs->get_u2_fast();
+ const int num_bootstrap_methods = cfs->get_u2_fast();
- guarantee_property(_max_bootstrap_specifier_index < attribute_array_length,
+ guarantee_property(_max_bootstrap_specifier_index < num_bootstrap_methods,
"Short length on BootstrapMethods in class file %s",
CHECK);
+ const u4 bootstrap_methods_u2_len = (attribute_byte_length - sizeof(u2)) / sizeof(u2);
- // The attribute contains a counted array of counted tuples of shorts,
- // represending bootstrap specifiers:
- // length*{bootstrap_method_index, argument_count*{argument_index}}
- const unsigned int operand_count = (attribute_byte_length - (unsigned)sizeof(u2)) / (unsigned)sizeof(u2);
- // operand_count = number of shorts in attr, except for leading length
-
- // The attribute is copied into a short[] array.
- // The array begins with a series of short[2] pairs, one for each tuple.
- const int index_size = (attribute_array_length * 2);
-
- Array* const operands =
- MetadataFactory::new_array(_loader_data, index_size + operand_count, CHECK);
-
- // Eagerly assign operands so they will be deallocated with the constant
+ // Eagerly assign the arrays so that they will be deallocated with the constant
// pool if there is an error.
- cp->set_operands(operands);
+ BSMAttributeEntries::InsertionIterator iter =
+ cp->bsm_entries().start_extension(num_bootstrap_methods,
+ bootstrap_methods_u2_len,
+ _loader_data,
+ CHECK);
- int operand_fill_index = index_size;
- const int cp_size = cp->length();
-
- for (int n = 0; n < attribute_array_length; n++) {
- // Store a 32-bit offset into the header of the operand array.
- ConstantPool::operand_offset_at_put(operands, n, operand_fill_index);
-
- // Read a bootstrap specifier.
+ for (int i = 0; i < num_bootstrap_methods; i++) {
cfs->guarantee_more(sizeof(u2) * 2, CHECK); // bsm, argc
- const u2 bootstrap_method_index = cfs->get_u2_fast();
- const u2 argument_count = cfs->get_u2_fast();
+ u2 bootstrap_method_ref = cfs->get_u2_fast();
+ u2 num_bootstrap_arguments = cfs->get_u2_fast();
guarantee_property(
- valid_cp_range(bootstrap_method_index, cp_size) &&
- cp->tag_at(bootstrap_method_index).is_method_handle(),
- "bootstrap_method_index %u has bad constant type in class file %s",
- bootstrap_method_index,
- CHECK);
+ valid_cp_range(bootstrap_method_ref, cp_size) &&
+ cp->tag_at(bootstrap_method_ref).is_method_handle(),
+ "bootstrap_method_index %u has bad constant type in class file %s",
+ bootstrap_method_ref,
+ CHECK);
+ cfs->guarantee_more(sizeof(u2) * num_bootstrap_arguments, CHECK); // argv[argc]
- guarantee_property((operand_fill_index + 1 + argument_count) < operands->length(),
- "Invalid BootstrapMethods num_bootstrap_methods or num_bootstrap_arguments value in class file %s",
- CHECK);
+ BSMAttributeEntry* entry = iter.reserve_new_entry(bootstrap_method_ref, num_bootstrap_arguments);
+ guarantee_property(entry != nullptr,
+ "Invalid BootstrapMethods num_bootstrap_methods."
+ " The total amount of space reserved for the BootstrapMethod attribute was not sufficient", CHECK);
- operands->at_put(operand_fill_index++, bootstrap_method_index);
- operands->at_put(operand_fill_index++, argument_count);
-
- cfs->guarantee_more(sizeof(u2) * argument_count, CHECK); // argv[argc]
- for (int j = 0; j < argument_count; j++) {
+ for (int argi = 0; argi < num_bootstrap_arguments; argi++) {
const u2 argument_index = cfs->get_u2_fast();
guarantee_property(
valid_cp_range(argument_index, cp_size) &&
@@ -3366,10 +3351,11 @@ void ClassFileParser::parse_classfile_bootstrap_methods_attribute(const ClassFil
"argument_index %u has bad constant type in class file %s",
argument_index,
CHECK);
- operands->at_put(operand_fill_index++, argument_index);
+ entry->set_argument(argi, argument_index);
}
}
- guarantee_property(current_start + attribute_byte_length == cfs->current(),
+ cp->bsm_entries().end_extension(iter, _loader_data, CHECK);
+ guarantee_property(current_before_parsing + attribute_byte_length == cfs->current(),
"Bad length on BootstrapMethods in class file %s",
CHECK);
}
diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
index d91af9b4991..c2f8b46f00e 100644
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@@ -1302,7 +1302,7 @@ nmethod::nmethod(
}
// Native wrappers do not have deopt handlers. Make the values
// something that will never match a pc like the nmethod vtable entry
- _deopt_handler_offset = 0;
+ _deopt_handler_entry_offset = 0;
_unwind_handler_offset = 0;
CHECKED_CAST(_oops_size, uint16_t, align_up(code_buffer->total_oop_size(), oopSize));
@@ -1442,7 +1442,7 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
_skipped_instructions_size = nm._skipped_instructions_size;
_stub_offset = nm._stub_offset;
_exception_offset = nm._exception_offset;
- _deopt_handler_offset = nm._deopt_handler_offset;
+ _deopt_handler_entry_offset = nm._deopt_handler_entry_offset;
_unwind_handler_offset = nm._unwind_handler_offset;
_num_stack_arg_slots = nm._num_stack_arg_slots;
_oops_size = nm._oops_size;
@@ -1704,19 +1704,26 @@ nmethod::nmethod(
_exception_offset = -1;
}
if (offsets->value(CodeOffsets::Deopt) != -1) {
- _deopt_handler_offset = code_offset() + offsets->value(CodeOffsets::Deopt);
+ _deopt_handler_entry_offset = code_offset() + offsets->value(CodeOffsets::Deopt);
} else {
- _deopt_handler_offset = -1;
+ _deopt_handler_entry_offset = -1;
}
} else
#endif
{
// Exception handler and deopt handler are in the stub section
- assert(offsets->value(CodeOffsets::Exceptions) != -1, "must be set");
assert(offsets->value(CodeOffsets::Deopt ) != -1, "must be set");
- _exception_offset = _stub_offset + offsets->value(CodeOffsets::Exceptions);
- _deopt_handler_offset = _stub_offset + offsets->value(CodeOffsets::Deopt);
+ bool has_exception_handler = (offsets->value(CodeOffsets::Exceptions) != -1);
+ assert(has_exception_handler == (compiler->type() != compiler_c2),
+ "C2 compiler doesn't provide exception handler stub code.");
+ if (has_exception_handler) {
+ _exception_offset = _stub_offset + offsets->value(CodeOffsets::Exceptions);
+ } else {
+ _exception_offset = -1;
+ }
+
+ _deopt_handler_entry_offset = _stub_offset + offsets->value(CodeOffsets::Deopt);
}
if (offsets->value(CodeOffsets::UnwindHandler) != -1) {
// C1 generates UnwindHandler at the end of instructions section.
@@ -4024,7 +4031,7 @@ const char* nmethod::nmethod_section_label(address pos) const {
// Check stub_code before checking exception_handler or deopt_handler.
if (pos == this->stub_begin()) label = "[Stub Code]";
if (JVMCI_ONLY(_exception_offset >= 0 &&) pos == exception_begin()) label = "[Exception Handler]";
- if (JVMCI_ONLY(_deopt_handler_offset != -1 &&) pos == deopt_handler_begin()) label = "[Deopt Handler Code]";
+ if (JVMCI_ONLY(_deopt_handler_entry_offset != -1 &&) pos == deopt_handler_entry()) label = "[Deopt Handler Entry Point]";
return label;
}
diff --git a/src/hotspot/share/code/nmethod.hpp b/src/hotspot/share/code/nmethod.hpp
index 34accf428b6..0fa9d7fda9e 100644
--- a/src/hotspot/share/code/nmethod.hpp
+++ b/src/hotspot/share/code/nmethod.hpp
@@ -229,7 +229,7 @@ class nmethod : public CodeBlob {
int _exception_offset;
// All deoptee's will resume execution at this location described by
// this offset.
- int _deopt_handler_offset;
+ int _deopt_handler_entry_offset;
// Offset (from insts_end) of the unwind handler if it exists
int16_t _unwind_handler_offset;
// Number of arguments passed on the stack
@@ -617,7 +617,7 @@ public:
address stub_begin () const { return header_begin() + _stub_offset ; }
address stub_end () const { return code_end() ; }
address exception_begin () const { return header_begin() + _exception_offset ; }
- address deopt_handler_begin () const { return header_begin() + _deopt_handler_offset ; }
+ address deopt_handler_entry () const { return header_begin() + _deopt_handler_entry_offset ; }
address unwind_handler_begin () const { return _unwind_handler_offset != -1 ? (insts_end() - _unwind_handler_offset) : nullptr; }
oop* oops_begin () const { return (oop*) data_begin(); }
oop* oops_end () const { return (oop*) data_end(); }
diff --git a/src/hotspot/share/code/nmethod.inline.hpp b/src/hotspot/share/code/nmethod.inline.hpp
index 44331db669c..ecee3c0c31a 100644
--- a/src/hotspot/share/code/nmethod.inline.hpp
+++ b/src/hotspot/share/code/nmethod.inline.hpp
@@ -34,7 +34,7 @@
inline bool nmethod::is_deopt_pc(address pc) { return is_deopt_entry(pc); }
inline bool nmethod::is_deopt_entry(address pc) {
- return pc == deopt_handler_begin();
+ return pc == deopt_handler_entry();
}
// class ExceptionCache methods
diff --git a/src/hotspot/share/compiler/compilationMemoryStatistic.cpp b/src/hotspot/share/compiler/compilationMemoryStatistic.cpp
index d1e2f6f34a0..1951fd066fc 100644
--- a/src/hotspot/share/compiler/compilationMemoryStatistic.cpp
+++ b/src/hotspot/share/compiler/compilationMemoryStatistic.cpp
@@ -1010,8 +1010,10 @@ void CompilationMemoryStatistic::print_error_report(outputStream* st) {
oom_stats->print_peak_state_on(st);
st->cr();
}
- st->print_cr("Compiler Memory Statistic, 10 most expensive compilations:");
- print_all_by_size(st, false, false, 0, 10);
+ if (Thread::current_or_null_safe() != nullptr) {
+ st->print_cr("Compiler Memory Statistic, 10 most expensive compilations:");
+ print_all_by_size(st, false, false, 0, 10);
+ }
}
void CompilationMemoryStatistic::print_final_report(outputStream* st) {
diff --git a/src/hotspot/share/cppstdlib/new.hpp b/src/hotspot/share/cppstdlib/new.hpp
index 3536ac13288..ea9d6c88c87 100644
--- a/src/hotspot/share/cppstdlib/new.hpp
+++ b/src/hotspot/share/cppstdlib/new.hpp
@@ -79,11 +79,10 @@ class [[deprecated]] bad_array_new_length;
// version to decide whether to redeclare deprecated.
#if defined(__clang__)
-#if __clang_major__ >= 19
-// clang18 and earlier may accept the declaration but go wrong with uses.
-// Different warnings and link-time failures are both possible.
-#define CAN_DEPRECATE_HARDWARE_INTERFERENCE_SIZES 1
-#endif // restrict clang version
+// Some versions of clang with some stdlibs reject the declaration. Others may
+// accept the declaration but go wrong with uses. Different warnings and
+// link-time failures are both possible.
+// Known to have problems at least through clang19.
#elif defined(__GNUC__)
#if (__GNUC__ > 13) || (__GNUC__ == 13 && __GNUC_MINOR__ >= 2)
diff --git a/src/hotspot/share/gc/g1/g1AllocRegion.cpp b/src/hotspot/share/gc/g1/g1AllocRegion.cpp
index 7e748cf7e9f..1af7638102a 100644
--- a/src/hotspot/share/gc/g1/g1AllocRegion.cpp
+++ b/src/hotspot/share/gc/g1/g1AllocRegion.cpp
@@ -33,10 +33,10 @@
#include "utilities/align.hpp"
G1CollectedHeap* G1AllocRegion::_g1h = nullptr;
-G1HeapRegion* G1AllocRegion::_dummy_region = nullptr;
+Atomic G1AllocRegion::_dummy_region;
void G1AllocRegion::setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region) {
- assert(_dummy_region == nullptr, "should be set once");
+ assert(_dummy_region.load_relaxed() == nullptr, "should be set once");
assert(dummy_region != nullptr, "pre-condition");
assert(dummy_region->free() == 0, "pre-condition");
@@ -46,11 +46,11 @@ void G1AllocRegion::setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region) {
assert(dummy_region->par_allocate(1, 1, &assert_tmp) == nullptr, "should fail");
_g1h = g1h;
- _dummy_region = dummy_region;
+ _dummy_region.release_store(dummy_region);
}
size_t G1AllocRegion::fill_up_remaining_space(G1HeapRegion* alloc_region) {
- assert(alloc_region != nullptr && alloc_region != _dummy_region,
+ assert(alloc_region != nullptr && alloc_region != _dummy_region.load_relaxed(),
"pre-condition");
size_t result = 0;
@@ -111,13 +111,13 @@ size_t G1AllocRegion::retire_internal(G1HeapRegion* alloc_region, bool fill_up)
}
size_t G1AllocRegion::retire(bool fill_up) {
- assert_alloc_region(_alloc_region != nullptr, "not initialized properly");
+ assert_alloc_region(_alloc_region.load_relaxed() != nullptr, "not initialized properly");
size_t waste = 0;
trace("retiring");
- G1HeapRegion* alloc_region = _alloc_region;
- if (alloc_region != _dummy_region) {
+ G1HeapRegion* alloc_region = _alloc_region.load_acquire();
+ if (alloc_region != _dummy_region.load_relaxed()) {
waste = retire_internal(alloc_region, fill_up);
reset_alloc_region();
}
@@ -127,7 +127,7 @@ size_t G1AllocRegion::retire(bool fill_up) {
}
HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
- assert_alloc_region(_alloc_region == _dummy_region, "pre-condition");
+ assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed(), "pre-condition");
trace("attempting region allocation");
G1HeapRegion* new_alloc_region = allocate_new_region(word_size);
@@ -138,7 +138,6 @@ HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
HeapWord* result = new_alloc_region->allocate(word_size);
assert_alloc_region(result != nullptr, "the allocation should succeeded");
- OrderAccess::storestore();
// Note that we first perform the allocation and then we store the
// region in _alloc_region. This is the reason why an active region
// can never be empty.
@@ -154,16 +153,16 @@ HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
void G1AllocRegion::init() {
trace("initializing");
- assert_alloc_region(_alloc_region == nullptr, "pre-condition");
- assert_alloc_region(_dummy_region != nullptr, "should have been set");
- _alloc_region = _dummy_region;
+ assert_alloc_region(_alloc_region.load_relaxed() == nullptr, "pre-condition");
+ assert_alloc_region(_dummy_region.load_relaxed() != nullptr, "should have been set");
+ _alloc_region.release_store(_dummy_region.load_relaxed());
_count = 0;
trace("initialized");
}
void G1AllocRegion::set(G1HeapRegion* alloc_region) {
trace("setting");
- assert_alloc_region(_alloc_region == _dummy_region && _count == 0, "pre-condition");
+ assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed() && _count == 0, "pre-condition");
update_alloc_region(alloc_region);
trace("set");
@@ -175,19 +174,19 @@ void G1AllocRegion::update_alloc_region(G1HeapRegion* alloc_region) {
// maintain the "the alloc region cannot be empty" invariant.
assert_alloc_region(alloc_region != nullptr && !alloc_region->is_empty(), "pre-condition");
- _alloc_region = alloc_region;
+ _alloc_region.release_store(alloc_region);
_count += 1;
trace("updated");
}
G1HeapRegion* G1AllocRegion::release() {
trace("releasing");
- G1HeapRegion* alloc_region = _alloc_region;
+ G1HeapRegion* alloc_region = _alloc_region.load_acquire();
retire(false /* fill_up */);
- assert_alloc_region(_alloc_region == _dummy_region, "post-condition of retire()");
- _alloc_region = nullptr;
+ assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed(), "post-condition of retire()");
+ _alloc_region.store_relaxed(nullptr);
trace("released");
- return (alloc_region == _dummy_region) ? nullptr : alloc_region;
+ return (alloc_region == _dummy_region.load_relaxed()) ? nullptr : alloc_region;
}
#ifndef PRODUCT
@@ -211,12 +210,13 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
out->print("%s: %u ", _name, _count);
- if (_alloc_region == nullptr) {
+ G1HeapRegion* alloc_region = _alloc_region.load_acquire();
+ if (alloc_region == nullptr) {
out->print("null");
- } else if (_alloc_region == _dummy_region) {
+ } else if (alloc_region == _dummy_region.load_relaxed()) {
out->print("DUMMY");
} else {
- out->print(HR_FORMAT, HR_FORMAT_PARAMS(_alloc_region));
+ out->print(HR_FORMAT, HR_FORMAT_PARAMS(alloc_region));
}
out->print(" : %s", str);
@@ -235,7 +235,7 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
#endif // PRODUCT
G1AllocRegion::G1AllocRegion(const char* name, uint node_index)
- : _alloc_region(nullptr),
+ : _alloc_region(),
_count(0),
_name(name),
_node_index(node_index)
@@ -250,7 +250,7 @@ void MutatorAllocRegion::retire_region(G1HeapRegion* alloc_region) {
}
void MutatorAllocRegion::init() {
- assert(_retained_alloc_region == nullptr, "Pre-condition");
+ assert(_retained_alloc_region.load_relaxed() == nullptr, "Pre-condition");
G1AllocRegion::init();
_wasted_bytes = 0;
}
@@ -261,8 +261,9 @@ bool MutatorAllocRegion::should_retain(G1HeapRegion* region) {
return false;
}
- if (_retained_alloc_region != nullptr &&
- free_bytes < _retained_alloc_region->free()) {
+ G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
+ if (retained_alloc_region != nullptr &&
+ free_bytes < retained_alloc_region->free()) {
return false;
}
@@ -278,10 +279,11 @@ size_t MutatorAllocRegion::retire(bool fill_up) {
// free than the currently retained region.
if (should_retain(current_region)) {
trace("mutator retained");
- if (_retained_alloc_region != nullptr) {
- waste = retire_internal(_retained_alloc_region, true);
+ G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
+ if (retained_alloc_region != nullptr) {
+ waste = retire_internal(retained_alloc_region, true);
}
- _retained_alloc_region = current_region;
+ _retained_alloc_region.release_store(current_region);
} else {
waste = retire_internal(current_region, fill_up);
}
@@ -300,7 +302,7 @@ size_t MutatorAllocRegion::used_in_alloc_regions() {
used += hr->used();
}
- hr = _retained_alloc_region;
+ hr = _retained_alloc_region.load_acquire();
if (hr != nullptr) {
used += hr->used();
}
@@ -313,9 +315,10 @@ G1HeapRegion* MutatorAllocRegion::release() {
// The retained alloc region must be retired and this must be
// done after the above call to release the mutator alloc region,
// since it might update the _retained_alloc_region member.
- if (_retained_alloc_region != nullptr) {
- _wasted_bytes += retire_internal(_retained_alloc_region, false);
- _retained_alloc_region = nullptr;
+ G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
+ if (retained_alloc_region != nullptr) {
+ _wasted_bytes += retire_internal(retained_alloc_region, false);
+ _retained_alloc_region.store_relaxed(nullptr);
}
log_debug(gc, alloc, region)("Mutator Allocation stats, regions: %u, wasted size: %zu%s (%4.1f%%)",
count(),
diff --git a/src/hotspot/share/gc/g1/g1AllocRegion.hpp b/src/hotspot/share/gc/g1/g1AllocRegion.hpp
index 3e38332ee6f..248aa0a9da0 100644
--- a/src/hotspot/share/gc/g1/g1AllocRegion.hpp
+++ b/src/hotspot/share/gc/g1/g1AllocRegion.hpp
@@ -29,6 +29,7 @@
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1HeapRegionAttr.hpp"
#include "gc/g1/g1NUMA.hpp"
+#include "runtime/atomic.hpp"
class G1CollectedHeap;
@@ -40,8 +41,6 @@ class G1CollectedHeap;
// replaced.
class G1AllocRegion : public CHeapObj {
-
-private:
// The active allocating region we are currently allocating out
// of. The invariant is that if this object is initialized (i.e.,
// init() has been called and release() has not) then _alloc_region
@@ -52,7 +51,7 @@ private:
// then _alloc_region is null and this object should not be used to
// satisfy allocation requests (it was done this way to force the
// correct use of init() and release()).
- G1HeapRegion* volatile _alloc_region;
+ Atomic _alloc_region;
// It keeps track of the distinct number of regions that are used
// for allocation in the active interval of this object, i.e.,
@@ -71,7 +70,7 @@ private:
// == end()). When we don't have a valid active region we make
// _alloc_region point to this. This allows us to skip checking
// whether the _alloc_region is null or not.
- static G1HeapRegion* _dummy_region;
+ static Atomic _dummy_region;
// After a region is allocated by alloc_new_region, this
// method is used to set it as the active alloc_region
@@ -124,9 +123,9 @@ public:
static void setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region);
G1HeapRegion* get() const {
- G1HeapRegion * hr = _alloc_region;
+ G1HeapRegion * hr = _alloc_region.load_acquire();
// Make sure that the dummy region does not escape this class.
- return (hr == _dummy_region) ? nullptr : hr;
+ return (hr == _dummy_region.load_relaxed()) ? nullptr : hr;
}
uint count() { return _count; }
@@ -177,7 +176,7 @@ private:
// Retained allocation region. Used to lower the waste generated
// during mutation by having two active regions if the free space
// in a region about to be retired still could fit a TLAB.
- G1HeapRegion* volatile _retained_alloc_region;
+ Atomic _retained_alloc_region;
// Decide if the region should be retained, based on the free size
// in it and the free size in the currently retained region, if any.
diff --git a/src/hotspot/share/gc/g1/g1AllocRegion.inline.hpp b/src/hotspot/share/gc/g1/g1AllocRegion.inline.hpp
index af9156163ac..e1d23867ea3 100644
--- a/src/hotspot/share/gc/g1/g1AllocRegion.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1AllocRegion.inline.hpp
@@ -32,13 +32,13 @@
#define assert_alloc_region(p, message) \
do { \
assert((p), "[%s] %s c: %u r: " PTR_FORMAT, \
- _name, (message), _count, p2i(_alloc_region) \
+ _name, (message), _count, p2i(_alloc_region.load_relaxed()) \
); \
} while (0)
inline void G1AllocRegion::reset_alloc_region() {
- _alloc_region = _dummy_region;
+ _alloc_region.store_relaxed(_dummy_region.load_relaxed());
}
inline HeapWord* G1AllocRegion::par_allocate(G1HeapRegion* alloc_region, size_t word_size) {
@@ -51,7 +51,7 @@ inline HeapWord* G1AllocRegion::par_allocate(G1HeapRegion* alloc_region, size_t
inline HeapWord* G1AllocRegion::attempt_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
- G1HeapRegion* alloc_region = _alloc_region;
+ G1HeapRegion* alloc_region = _alloc_region.load_acquire();
assert_alloc_region(alloc_region != nullptr && !alloc_region->is_empty(), "not initialized properly");
HeapWord* result = alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
@@ -97,8 +97,9 @@ inline HeapWord* G1AllocRegion::attempt_allocation_using_new_region(size_t min_w
inline HeapWord* MutatorAllocRegion::attempt_retained_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
- if (_retained_alloc_region != nullptr) {
- HeapWord* result = _retained_alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
+ G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
+ if (retained_alloc_region != nullptr) {
+ HeapWord* result = retained_alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
if (result != nullptr) {
trace("alloc retained", min_word_size, desired_word_size, *actual_word_size, result);
return result;
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
index d18f61ff507..061241c24e2 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@@ -478,11 +478,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_
log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating %zu words",
Thread::current()->name(), word_size);
- if (is_shutting_down()) {
- stall_for_vm_shutdown();
- return nullptr;
- }
-
// Has the gc overhead limit been reached in the meantime? If so, this mutator
// should receive null even when unsuccessfully scheduling a collection as well
// for global consistency.
@@ -738,11 +733,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size) {
log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating %zu",
Thread::current()->name(), word_size);
- if (is_shutting_down()) {
- stall_for_vm_shutdown();
- return nullptr;
- }
-
// Has the gc overhead limit been reached in the meantime? If so, this mutator
// should receive null even when unsuccessfully scheduling a collection as well
// for global consistency.
@@ -1645,6 +1635,10 @@ jint G1CollectedHeap::initialize() {
return JNI_OK;
}
+bool G1CollectedHeap::concurrent_mark_is_terminating() const {
+ return _cm_thread->should_terminate();
+}
+
void G1CollectedHeap::stop() {
// Stop all concurrent threads. We do this to make sure these threads
// do not continue to execute and access resources (e.g. logging)
@@ -1965,8 +1959,8 @@ bool G1CollectedHeap::try_collect_concurrently(size_t allocation_word_size,
}
// If VMOp skipped initiating concurrent marking cycle because
- // we're terminating, then we're done.
- if (is_shutting_down()) {
+ // we're shutting down, then we're done.
+ if (op.is_shutting_down()) {
LOG_COLLECT_CONCURRENTLY(cause, "skipped: terminating");
return false;
}
@@ -2361,7 +2355,8 @@ static void print_region_type(outputStream* st, const char* type, uint count, bo
}
void G1CollectedHeap::print_heap_on(outputStream* st) const {
- size_t heap_used = Heap_lock->owned_by_self() ? used() : used_unlocked();
+ size_t heap_used = (Thread::current_or_null_safe() != nullptr &&
+ Heap_lock->owned_by_self()) ? used() : used_unlocked();
st->print("%-20s", "garbage-first heap");
st->print(" total reserved %zuK, committed %zuK, used %zuK",
_hrm.reserved().byte_size()/K, capacity()/K, heap_used/K);
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
index 5dccf41e909..aff7166d391 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
@@ -917,6 +917,9 @@ public:
// specified by the policy object.
jint initialize() override;
+ // Returns whether concurrent mark threads (and the VM) are about to terminate.
+ bool concurrent_mark_is_terminating() const;
+
void safepoint_synchronize_begin() override;
void safepoint_synchronize_end() override;
diff --git a/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp b/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp
index 47340fad768..d71108d4d0e 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp
@@ -267,8 +267,6 @@ void G1CollectionSetCandidates::set_candidates_from_marking(G1HeapRegion** candi
// the same MixedGC.
uint group_limit = p->calc_min_old_cset_length(num_candidates);
- uint num_added_to_group = 0;
-
G1CSetCandidateGroup::reset_next_group_id();
G1CSetCandidateGroup* current = nullptr;
@@ -279,7 +277,7 @@ void G1CollectionSetCandidates::set_candidates_from_marking(G1HeapRegion** candi
assert(!contains(r), "must not contain region %u", r->hrm_index());
_contains_map[r->hrm_index()] = CandidateOrigin::Marking;
- if (num_added_to_group == group_limit) {
+ if (current->length() == group_limit) {
if (group_limit != G1OldCSetGroupSize) {
group_limit = G1OldCSetGroupSize;
}
@@ -287,10 +285,8 @@ void G1CollectionSetCandidates::set_candidates_from_marking(G1HeapRegion** candi
_from_marking_groups.append(current);
current = new G1CSetCandidateGroup();
- num_added_to_group = 0;
}
current->add(r);
- num_added_to_group++;
}
_from_marking_groups.append(current);
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
index d37fe9ea7ba..456d543fa10 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
@@ -1883,7 +1883,7 @@ bool G1ConcurrentMark::concurrent_cycle_abort() {
// nothing, but this situation should be extremely rare (a full gc after shutdown
// has been signalled is already rare), and this work should be negligible compared
// to actual full gc work.
- if (!cm_thread()->in_progress() && !_g1h->is_shutting_down()) {
+ if (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating()) {
return false;
}
diff --git a/src/hotspot/share/gc/g1/g1HeapRegion.cpp b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
index b1eeb333d8d..361e19d4be5 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegion.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
@@ -307,10 +307,6 @@ void G1HeapRegion::add_code_root(nmethod* nm) {
rem_set()->add_code_root(nm);
}
-void G1HeapRegion::remove_code_root(nmethod* nm) {
- rem_set()->remove_code_root(nm);
-}
-
void G1HeapRegion::code_roots_do(NMethodClosure* blk) const {
rem_set()->code_roots_do(blk);
}
diff --git a/src/hotspot/share/gc/g1/g1HeapRegion.hpp b/src/hotspot/share/gc/g1/g1HeapRegion.hpp
index 17ec3055b52..fe915b0dafe 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegion.hpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegion.hpp
@@ -543,7 +543,6 @@ public:
// Routines for managing a list of code roots (attached to the
// this region's RSet) that point into this heap region.
void add_code_root(nmethod* nm);
- void remove_code_root(nmethod* nm);
// Applies blk->do_nmethod() to each of the entries in
// the code roots list for this region
diff --git a/src/hotspot/share/gc/g1/g1IHOPControl.cpp b/src/hotspot/share/gc/g1/g1IHOPControl.cpp
index 34c8cd0366b..43698e9f12b 100644
--- a/src/hotspot/share/gc/g1/g1IHOPControl.cpp
+++ b/src/hotspot/share/gc/g1/g1IHOPControl.cpp
@@ -28,14 +28,63 @@
#include "gc/g1/g1Trace.hpp"
#include "logging/log.hpp"
-G1IHOPControl::G1IHOPControl(double initial_ihop_percent,
- G1OldGenAllocationTracker const* old_gen_alloc_tracker) :
- _initial_ihop_percent(initial_ihop_percent),
- _target_occupancy(0),
- _last_allocation_time_s(0.0),
- _old_gen_alloc_tracker(old_gen_alloc_tracker)
-{
- assert(_initial_ihop_percent >= 0.0 && _initial_ihop_percent <= 100.0, "Initial IHOP value must be between 0 and 100 but is %.3f", initial_ihop_percent);
+double G1IHOPControl::predict(const TruncatedSeq* seq) const {
+ assert(_is_adaptive, "precondition");
+ assert(_predictor != nullptr, "precondition");
+
+ return _predictor->predict_zero_bounded(seq);
+}
+
+bool G1IHOPControl::have_enough_data_for_prediction() const {
+ assert(_is_adaptive, "precondition");
+
+ return ((size_t)_marking_times_s.num() >= G1AdaptiveIHOPNumInitialSamples) &&
+ ((size_t)_allocation_rate_s.num() >= G1AdaptiveIHOPNumInitialSamples);
+}
+
+double G1IHOPControl::last_marking_length_s() const {
+ return _marking_times_s.last();
+}
+
+size_t G1IHOPControl::actual_target_threshold() const {
+ assert(_is_adaptive, "precondition");
+
+ // The actual target threshold takes the heap reserve and the expected waste in
+ // free space into account.
+ // _heap_reserve is that part of the total heap capacity that is reserved for
+ // eventual promotion failure.
+ // _heap_waste is the amount of space will never be reclaimed in any
+ // heap, so can not be used for allocation during marking and must always be
+ // considered.
+ double safe_total_heap_percentage =
+ MIN2((double)(_heap_reserve_percent + _heap_waste_percent), 100.0);
+
+ return (size_t)MIN2(
+ G1CollectedHeap::heap()->max_capacity() * (100.0 - safe_total_heap_percentage) / 100.0,
+ _target_occupancy * (100.0 - _heap_waste_percent) / 100.0
+ );
+}
+
+G1IHOPControl::G1IHOPControl(double ihop_percent,
+ const G1OldGenAllocationTracker* old_gen_alloc_tracker,
+ bool adaptive,
+ const G1Predictions* predictor,
+ size_t heap_reserve_percent,
+ size_t heap_waste_percent)
+ : _is_adaptive(adaptive),
+ _initial_ihop_percent(ihop_percent),
+ _target_occupancy(0),
+ _heap_reserve_percent(heap_reserve_percent),
+ _heap_waste_percent(heap_waste_percent),
+ _last_allocation_time_s(0.0),
+ _old_gen_alloc_tracker(old_gen_alloc_tracker),
+ _predictor(predictor),
+ _marking_times_s(10, 0.05),
+ _allocation_rate_s(10, 0.05),
+ _last_unrestrained_young_size(0) {
+ assert(_initial_ihop_percent >= 0.0 && _initial_ihop_percent <= 100.0,
+ "IHOP percent out of range: %.3f", ihop_percent);
+ assert(!_is_adaptive || _predictor != nullptr, "precondition");
}
void G1IHOPControl::update_target_occupancy(size_t new_target_occupancy) {
@@ -50,9 +99,34 @@ void G1IHOPControl::report_statistics(G1NewTracer* new_tracer, size_t non_young_
}
void G1IHOPControl::update_allocation_info(double allocation_time_s, size_t additional_buffer_size) {
- assert(allocation_time_s >= 0.0, "Allocation time must be positive but is %.3f", allocation_time_s);
-
+ assert(allocation_time_s > 0, "Invalid allocation time: %.3f", allocation_time_s);
_last_allocation_time_s = allocation_time_s;
+ double alloc_rate = _old_gen_alloc_tracker->last_period_old_gen_growth() / allocation_time_s;
+ _allocation_rate_s.add(alloc_rate);
+ _last_unrestrained_young_size = additional_buffer_size;
+}
+
+void G1IHOPControl::update_marking_length(double marking_length_s) {
+ assert(marking_length_s >= 0.0, "Invalid marking length: %.3f", marking_length_s);
+ _marking_times_s.add(marking_length_s);
+}
+
+size_t G1IHOPControl::get_conc_mark_start_threshold() {
+ guarantee(_target_occupancy > 0, "Target occupancy must be initialized");
+
+ if (!_is_adaptive || !have_enough_data_for_prediction()) {
+ return (size_t)(_initial_ihop_percent * _target_occupancy / 100.0);
+ }
+
+ double pred_marking_time = predict(&_marking_times_s);
+ double pred_rate = predict(&_allocation_rate_s);
+ size_t pred_bytes = (size_t)(pred_marking_time * pred_rate);
+ size_t predicted_needed = pred_bytes + _last_unrestrained_young_size;
+ size_t internal_threshold = actual_target_threshold();
+
+ return predicted_needed < internal_threshold
+ ? internal_threshold - predicted_needed
+ : 0;
}
void G1IHOPControl::print_log(size_t non_young_occupancy) {
@@ -68,6 +142,23 @@ void G1IHOPControl::print_log(size_t non_young_occupancy) {
_last_allocation_time_s * 1000.0,
_last_allocation_time_s > 0.0 ? _old_gen_alloc_tracker->last_period_old_gen_bytes() / _last_allocation_time_s : 0.0,
last_marking_length_s() * 1000.0);
+
+ if (!_is_adaptive) {
+ return;
+ }
+
+ size_t actual_threshold = actual_target_threshold();
+ log_debug(gc, ihop)("Adaptive IHOP information (value update), threshold: %zuB (%1.2f), internal target threshold: %zuB, "
+ "non-young occupancy: %zuB, additional buffer size: %zuB, predicted old gen allocation rate: %1.2fB/s, "
+ "predicted marking phase length: %1.2fms, prediction active: %s",
+ cur_conc_mark_start_threshold,
+ percent_of(cur_conc_mark_start_threshold, actual_threshold),
+ actual_threshold,
+ non_young_occupancy,
+ _last_unrestrained_young_size,
+ predict(&_allocation_rate_s),
+ predict(&_marking_times_s) * 1000.0,
+ have_enough_data_for_prediction() ? "true" : "false");
}
void G1IHOPControl::send_trace_event(G1NewTracer* tracer, size_t non_young_occupancy) {
@@ -78,121 +169,14 @@ void G1IHOPControl::send_trace_event(G1NewTracer* tracer, size_t non_young_occup
_old_gen_alloc_tracker->last_period_old_gen_bytes(),
_last_allocation_time_s,
last_marking_length_s());
-}
-G1StaticIHOPControl::G1StaticIHOPControl(double ihop_percent,
- G1OldGenAllocationTracker const* old_gen_alloc_tracker) :
- G1IHOPControl(ihop_percent, old_gen_alloc_tracker),
- _last_marking_length_s(0.0) {
-}
-
-G1AdaptiveIHOPControl::G1AdaptiveIHOPControl(double ihop_percent,
- G1OldGenAllocationTracker const* old_gen_alloc_tracker,
- G1Predictions const* predictor,
- size_t heap_reserve_percent,
- size_t heap_waste_percent) :
- G1IHOPControl(ihop_percent, old_gen_alloc_tracker),
- _heap_reserve_percent(heap_reserve_percent),
- _heap_waste_percent(heap_waste_percent),
- _predictor(predictor),
- _marking_times_s(10, 0.05),
- _allocation_rate_s(10, 0.05),
- _last_unrestrained_young_size(0)
-{
-}
-
-size_t G1AdaptiveIHOPControl::actual_target_threshold() const {
- guarantee(_target_occupancy > 0, "Target occupancy still not updated yet.");
- // The actual target threshold takes the heap reserve and the expected waste in
- // free space into account.
- // _heap_reserve is that part of the total heap capacity that is reserved for
- // eventual promotion failure.
- // _heap_waste is the amount of space will never be reclaimed in any
- // heap, so can not be used for allocation during marking and must always be
- // considered.
-
- double safe_total_heap_percentage = MIN2((double)(_heap_reserve_percent + _heap_waste_percent), 100.0);
-
- return (size_t)MIN2(
- G1CollectedHeap::heap()->max_capacity() * (100.0 - safe_total_heap_percentage) / 100.0,
- _target_occupancy * (100.0 - _heap_waste_percent) / 100.0
- );
-}
-
-double G1AdaptiveIHOPControl::predict(TruncatedSeq const* seq) const {
- return _predictor->predict_zero_bounded(seq);
-}
-
-bool G1AdaptiveIHOPControl::have_enough_data_for_prediction() const {
- return ((size_t)_marking_times_s.num() >= G1AdaptiveIHOPNumInitialSamples) &&
- ((size_t)_allocation_rate_s.num() >= G1AdaptiveIHOPNumInitialSamples);
-}
-
-size_t G1AdaptiveIHOPControl::get_conc_mark_start_threshold() {
- if (have_enough_data_for_prediction()) {
- double pred_marking_time = predict(&_marking_times_s);
- double pred_promotion_rate = predict(&_allocation_rate_s);
- size_t pred_promotion_size = (size_t)(pred_marking_time * pred_promotion_rate);
-
- size_t predicted_needed_bytes_during_marking =
- pred_promotion_size +
- // In reality we would need the maximum size of the young gen during
- // marking. This is a conservative estimate.
- _last_unrestrained_young_size;
-
- size_t internal_threshold = actual_target_threshold();
- size_t predicted_initiating_threshold = predicted_needed_bytes_during_marking < internal_threshold ?
- internal_threshold - predicted_needed_bytes_during_marking :
- 0;
- return predicted_initiating_threshold;
- } else {
- // Use the initial value.
- return (size_t)(_initial_ihop_percent * _target_occupancy / 100.0);
+ if (_is_adaptive) {
+ tracer->report_adaptive_ihop_statistics(get_conc_mark_start_threshold(),
+ actual_target_threshold(),
+ non_young_occupancy,
+ _last_unrestrained_young_size,
+ predict(&_allocation_rate_s),
+ predict(&_marking_times_s),
+ have_enough_data_for_prediction());
}
}
-
-double G1AdaptiveIHOPControl::last_mutator_period_old_allocation_rate() const {
- assert(_last_allocation_time_s > 0, "This should not be called when the last GC is full");
-
- return _old_gen_alloc_tracker->last_period_old_gen_growth() / _last_allocation_time_s;
-}
-
-void G1AdaptiveIHOPControl::update_allocation_info(double allocation_time_s,
- size_t additional_buffer_size) {
- G1IHOPControl::update_allocation_info(allocation_time_s, additional_buffer_size);
- _allocation_rate_s.add(last_mutator_period_old_allocation_rate());
-
- _last_unrestrained_young_size = additional_buffer_size;
-}
-
-void G1AdaptiveIHOPControl::update_marking_length(double marking_length_s) {
- assert(marking_length_s >= 0.0, "Marking length must be larger than zero but is %.3f", marking_length_s);
- _marking_times_s.add(marking_length_s);
-}
-
-void G1AdaptiveIHOPControl::print_log(size_t non_young_occupancy) {
- G1IHOPControl::print_log(non_young_occupancy);
- size_t actual_threshold = actual_target_threshold();
- log_debug(gc, ihop)("Adaptive IHOP information (value update), threshold: %zuB (%1.2f), internal target threshold: %zuB, "
- "non-young occupancy: %zuB, additional buffer size: %zuB, predicted old gen allocation rate: %1.2fB/s, "
- "predicted marking phase length: %1.2fms, prediction active: %s",
- get_conc_mark_start_threshold(),
- percent_of(get_conc_mark_start_threshold(), actual_threshold),
- actual_threshold,
- non_young_occupancy,
- _last_unrestrained_young_size,
- predict(&_allocation_rate_s),
- predict(&_marking_times_s) * 1000.0,
- have_enough_data_for_prediction() ? "true" : "false");
-}
-
-void G1AdaptiveIHOPControl::send_trace_event(G1NewTracer* tracer, size_t non_young_occupancy) {
- G1IHOPControl::send_trace_event(tracer, non_young_occupancy);
- tracer->report_adaptive_ihop_statistics(get_conc_mark_start_threshold(),
- actual_target_threshold(),
- non_young_occupancy,
- _last_unrestrained_young_size,
- predict(&_allocation_rate_s),
- predict(&_marking_times_s),
- have_enough_data_for_prediction());
-}
diff --git a/src/hotspot/share/gc/g1/g1IHOPControl.hpp b/src/hotspot/share/gc/g1/g1IHOPControl.hpp
index 392a12a785a..b6e80d9b422 100644
--- a/src/hotspot/share/gc/g1/g1IHOPControl.hpp
+++ b/src/hotspot/share/gc/g1/g1IHOPControl.hpp
@@ -32,89 +32,32 @@
class G1Predictions;
class G1NewTracer;
-// Base class for algorithms that calculate the heap occupancy at which
-// concurrent marking should start. This heap usage threshold should be relative
-// to old gen size.
+// Implements two strategies for calculating the concurrent mark starting occupancy threshold:
+// - Static mode: Uses a fixed percentage of the target heap occupancy.
+// - Adaptive mode: Predicts a threshold based on allocation rates and marking durations
+// to ensure the target occupancy is never exceeded during marking.
class G1IHOPControl : public CHeapObj {
- protected:
+ private:
+ const bool _is_adaptive;
+
// The initial IHOP value relative to the target occupancy.
double _initial_ihop_percent;
+
// The target maximum occupancy of the heap. The target occupancy is the number
// of bytes when marking should be finished and reclaim started.
size_t _target_occupancy;
+ // Percentage of maximum heap capacity we should avoid to touch
+ const size_t _heap_reserve_percent;
+
+ // Percentage of free heap that should be considered as waste.
+ const size_t _heap_waste_percent;
+
// Most recent complete mutator allocation period in seconds.
double _last_allocation_time_s;
-
const G1OldGenAllocationTracker* _old_gen_alloc_tracker;
- // Initialize an instance with the old gen allocation tracker and the
- // initial IHOP value in percent. The target occupancy will be updated
- // at the first heap expansion.
- G1IHOPControl(double ihop_percent, G1OldGenAllocationTracker const* old_gen_alloc_tracker);
-
- // Most recent time from the end of the concurrent start to the start of the first
- // mixed gc.
- virtual double last_marking_length_s() const = 0;
-
- virtual void print_log(size_t non_young_occupancy);
- virtual void send_trace_event(G1NewTracer* tracer, size_t non_young_occupancy);
-
-public:
- virtual ~G1IHOPControl() { }
-
- // Get the current non-young occupancy at which concurrent marking should start.
- virtual size_t get_conc_mark_start_threshold() = 0;
-
- // Adjust target occupancy.
- virtual void update_target_occupancy(size_t new_target_occupancy);
- // Update information about time during which allocations in the Java heap occurred,
- // how large these allocations were in bytes, and an additional buffer.
- // The allocations should contain any amount of space made unusable for further
- // allocation, e.g. any waste caused by TLAB allocation, space at the end of
- // humongous objects that can not be used for allocation, etc.
- // Together with the target occupancy, this additional buffer should contain the
- // difference between old gen size and total heap size at the start of reclamation,
- // and space required for that reclamation.
- virtual void update_allocation_info(double allocation_time_s, size_t additional_buffer_size);
- // Update the time spent in the mutator beginning from the end of concurrent start to
- // the first mixed gc.
- virtual void update_marking_length(double marking_length_s) = 0;
-
- void report_statistics(G1NewTracer* tracer, size_t non_young_occupancy);
-};
-
-// The returned concurrent mark starting occupancy threshold is a fixed value
-// relative to the maximum heap size.
-class G1StaticIHOPControl : public G1IHOPControl {
- // Most recent mutator time between the end of concurrent mark to the start of the
- // first mixed gc.
- double _last_marking_length_s;
- protected:
- double last_marking_length_s() const { return _last_marking_length_s; }
- public:
- G1StaticIHOPControl(double ihop_percent, G1OldGenAllocationTracker const* old_gen_alloc_tracker);
-
- size_t get_conc_mark_start_threshold() {
- guarantee(_target_occupancy > 0, "Target occupancy must have been initialized.");
- return (size_t) (_initial_ihop_percent * _target_occupancy / 100.0);
- }
-
- virtual void update_marking_length(double marking_length_s) {
- assert(marking_length_s > 0.0, "Marking length must be larger than zero but is %.3f", marking_length_s);
- _last_marking_length_s = marking_length_s;
- }
-};
-
-// This algorithm tries to return a concurrent mark starting occupancy value that
-// makes sure that during marking the given target occupancy is never exceeded,
-// based on predictions of current allocation rate and time periods between
-// concurrent start and the first mixed gc.
-class G1AdaptiveIHOPControl : public G1IHOPControl {
- size_t _heap_reserve_percent; // Percentage of maximum heap capacity we should avoid to touch
- size_t _heap_waste_percent; // Percentage of free heap that should be considered as waste.
-
- const G1Predictions * _predictor;
+ const G1Predictions* _predictor;
TruncatedSeq _marking_times_s;
TruncatedSeq _allocation_rate_s;
@@ -128,35 +71,48 @@ class G1AdaptiveIHOPControl : public G1IHOPControl {
size_t _last_unrestrained_young_size;
// Get a new prediction bounded below by zero from the given sequence.
- double predict(TruncatedSeq const* seq) const;
+ double predict(const TruncatedSeq* seq) const;
bool have_enough_data_for_prediction() const;
+ double last_marking_length_s() const;
// The "actual" target threshold the algorithm wants to keep during and at the
// end of marking. This is typically lower than the requested threshold, as the
// algorithm needs to consider restrictions by the environment.
size_t actual_target_threshold() const;
- // This method calculates the old gen allocation rate based on the net survived
- // bytes that are allocated in the old generation in the last mutator period.
- double last_mutator_period_old_allocation_rate() const;
- protected:
- virtual double last_marking_length_s() const { return _marking_times_s.last(); }
-
- virtual void print_log(size_t non_young_occupancy);
- virtual void send_trace_event(G1NewTracer* tracer, size_t non_young_occupancy);
+ void print_log(size_t non_young_occupancy);
+ void send_trace_event(G1NewTracer* tracer, size_t non_young_occupancy);
public:
- G1AdaptiveIHOPControl(double ihop_percent,
- G1OldGenAllocationTracker const* old_gen_alloc_tracker,
- G1Predictions const* predictor,
- size_t heap_reserve_percent, // The percentage of total heap capacity that should not be tapped into.
- size_t heap_waste_percent); // The percentage of the free space in the heap that we think is not usable for allocation.
+ G1IHOPControl(double ihop_percent,
+ const G1OldGenAllocationTracker* old_gen_alloc_tracker,
+ bool adaptive,
+ const G1Predictions* predictor,
+ size_t heap_reserve_percent,
+ size_t heap_waste_percent);
- virtual size_t get_conc_mark_start_threshold();
+ // Adjust target occupancy.
+ void update_target_occupancy(size_t new_target_occupancy);
- virtual void update_allocation_info(double allocation_time_s, size_t additional_buffer_size);
- virtual void update_marking_length(double marking_length_s);
+ // Update information about time during which allocations in the Java heap occurred,
+ // how large these allocations were in bytes, and an additional buffer.
+ // The allocations should contain any amount of space made unusable for further
+ // allocation, e.g. any waste caused by TLAB allocation, space at the end of
+ // humongous objects that can not be used for allocation, etc.
+ // Together with the target occupancy, this additional buffer should contain the
+ // difference between old gen size and total heap size at the start of reclamation,
+ // and space required for that reclamation.
+ void update_allocation_info(double allocation_time_s, size_t additional_buffer_size);
+
+ // Update the time spent in the mutator beginning from the end of concurrent start to
+ // the first mixed gc.
+ void update_marking_length(double marking_length_s);
+
+ // Get the current non-young occupancy at which concurrent marking should start.
+ size_t get_conc_mark_start_threshold();
+
+ void report_statistics(G1NewTracer* tracer, size_t non_young_occupancy);
};
#endif // SHARE_GC_G1_G1IHOPCONTROL_HPP
diff --git a/src/hotspot/share/gc/g1/g1OldGenAllocationTracker.hpp b/src/hotspot/share/gc/g1/g1OldGenAllocationTracker.hpp
index 265c7029e14..aa5e3c6c942 100644
--- a/src/hotspot/share/gc/g1/g1OldGenAllocationTracker.hpp
+++ b/src/hotspot/share/gc/g1/g1OldGenAllocationTracker.hpp
@@ -28,8 +28,6 @@
#include "gc/g1/g1HeapRegion.hpp"
#include "memory/allocation.hpp"
-class G1AdaptiveIHOPControl;
-
// Track allocation details in the old generation.
class G1OldGenAllocationTracker : public CHeapObj {
// Total number of bytes allocated in the old generation at the end
diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp
index 19573e11cd7..6eef6cbfa87 100644
--- a/src/hotspot/share/gc/g1/g1Policy.cpp
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp
@@ -669,7 +669,6 @@ bool G1Policy::should_retain_evac_failed_region(uint index) const {
}
void G1Policy::record_pause_start_time() {
- assert(!_g1h->is_shutting_down(), "Invariant!");
Ticks now = Ticks::now();
_cur_pause_start_sec = now.seconds();
@@ -1026,15 +1025,12 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
G1IHOPControl* G1Policy::create_ihop_control(const G1OldGenAllocationTracker* old_gen_alloc_tracker,
const G1Predictions* predictor) {
- if (G1UseAdaptiveIHOP) {
- return new G1AdaptiveIHOPControl(InitiatingHeapOccupancyPercent,
- old_gen_alloc_tracker,
- predictor,
- G1ReservePercent,
- G1HeapWastePercent);
- } else {
- return new G1StaticIHOPControl(InitiatingHeapOccupancyPercent, old_gen_alloc_tracker);
- }
+ return new G1IHOPControl(InitiatingHeapOccupancyPercent,
+ old_gen_alloc_tracker,
+ G1UseAdaptiveIHOP,
+ predictor,
+ G1ReservePercent,
+ G1HeapWastePercent);
}
bool G1Policy::update_ihop_prediction(double mutator_time_s,
@@ -1280,12 +1276,6 @@ void G1Policy::decide_on_concurrent_start_pause() {
// concurrent start pause).
assert(!collector_state()->in_concurrent_start_gc(), "pre-condition");
- // We should not be starting a concurrent start pause if the concurrent mark
- // thread is terminating.
- if (_g1h->is_shutting_down()) {
- return;
- }
-
if (collector_state()->initiate_conc_mark_if_possible()) {
// We had noticed on a previous pause that the heap occupancy has
// gone over the initiating threshold and we should start a
diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp
index f0bacefd71c..d0633466f37 100644
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp
@@ -992,10 +992,11 @@ class G1MergeHeapRootsTask : public WorkerTask {
}
};
- // Closure to make sure that the marking bitmap is clear for any old region in
- // the collection set.
- // This is needed to be able to use the bitmap for evacuation failure handling.
- class G1ClearBitmapClosure : public G1HeapRegionClosure {
+ // Closure to prepare the collection set regions for evacuation failure, i.e. make
+ // sure that the mark bitmap is clear for any old region in the collection set.
+ //
+ // These mark bitmaps record the evacuation failed objects.
+ class G1PrepareRegionsForEvacFailClosure : public G1HeapRegionClosure {
G1CollectedHeap* _g1h;
G1RemSetScanState* _scan_state;
bool _initial_evacuation;
@@ -1018,18 +1019,12 @@ class G1MergeHeapRootsTask : public WorkerTask {
// the pause occurs during the Concurrent Cleanup for Next Mark phase.
// Only at that point the region's bitmap may contain marks while being in the collection
// set at the same time.
- //
- // There is one exception: shutdown might have aborted the Concurrent Cleanup for Next
- // Mark phase midway, which might have also left stale marks in old generation regions.
- // There might actually have been scheduled multiple collections, but at that point we do
- // not care that much about performance and just do the work multiple times if needed.
- return (_g1h->collector_state()->clear_bitmap_in_progress() ||
- _g1h->is_shutting_down()) &&
- hr->is_old();
+ return _g1h->collector_state()->clear_bitmap_in_progress() &&
+ hr->is_old();
}
public:
- G1ClearBitmapClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state, bool initial_evacuation) :
+ G1PrepareRegionsForEvacFailClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state, bool initial_evacuation) :
_g1h(g1h),
_scan_state(scan_state),
_initial_evacuation(initial_evacuation)
@@ -1178,8 +1173,8 @@ public:
// Preparation for evacuation failure handling.
{
- G1ClearBitmapClosure clear(g1h, _scan_state, _initial_evacuation);
- g1h->collection_set_iterate_increment_from(&clear, &_hr_claimer, worker_id);
+ G1PrepareRegionsForEvacFailClosure prepare_evac_failure(g1h, _scan_state, _initial_evacuation);
+ g1h->collection_set_iterate_increment_from(&prepare_evac_failure, &_hr_claimer, worker_id);
}
}
};
diff --git a/src/hotspot/share/gc/parallel/parallelArguments.cpp b/src/hotspot/share/gc/parallel/parallelArguments.cpp
index 629690a6258..be9673224f5 100644
--- a/src/hotspot/share/gc/parallel/parallelArguments.cpp
+++ b/src/hotspot/share/gc/parallel/parallelArguments.cpp
@@ -37,8 +37,45 @@
#include "utilities/defaultStream.hpp"
#include "utilities/powerOfTwo.hpp"
-size_t ParallelArguments::conservative_max_heap_alignment() {
- return compute_heap_alignment();
+static size_t num_young_spaces() {
+ // When using NUMA, we create one MutableNUMASpace for each NUMA node
+ const size_t num_eden_spaces = UseNUMA ? os::numa_get_groups_num() : 1;
+
+ // The young generation must have room for eden + two survivors
+ return num_eden_spaces + 2;
+}
+
+static size_t num_old_spaces() {
+ return 1;
+}
+
+void ParallelArguments::initialize_alignments() {
+ // Initialize card size before initializing alignments
+ CardTable::initialize_card_size();
+ const size_t card_table_alignment = CardTable::ct_max_alignment_constraint();
+ SpaceAlignment = ParallelScavengeHeap::default_space_alignment();
+
+ if (UseLargePages) {
+ const size_t total_spaces = num_young_spaces() + num_old_spaces();
+ const size_t page_size = os::page_size_for_region_unaligned(MaxHeapSize, total_spaces);
+ ParallelScavengeHeap::set_desired_page_size(page_size);
+
+ if (page_size == os::vm_page_size()) {
+ log_warning(gc, heap)("MaxHeapSize (%zu) must be large enough for %zu * page-size; Disabling UseLargePages for heap",
+ MaxHeapSize, total_spaces);
+ }
+
+ if (page_size > SpaceAlignment) {
+ SpaceAlignment = page_size;
+ }
+
+ HeapAlignment = lcm(page_size, card_table_alignment);
+
+ } else {
+ assert(is_aligned(SpaceAlignment, os::vm_page_size()), "");
+ ParallelScavengeHeap::set_desired_page_size(os::vm_page_size());
+ HeapAlignment = card_table_alignment;
+ }
}
void ParallelArguments::initialize() {
@@ -98,49 +135,36 @@ void ParallelArguments::initialize() {
FullGCForwarding::initialize_flags(heap_reserved_size_bytes());
}
-void ParallelArguments::initialize_alignments() {
- // Initialize card size before initializing alignments
- CardTable::initialize_card_size();
- SpaceAlignment = ParallelScavengeHeap::default_space_alignment();
- HeapAlignment = compute_heap_alignment();
-}
+size_t ParallelArguments::conservative_max_heap_alignment() {
+ // The card marking array and the offset arrays for old generations are
+ // committed in os pages as well. Make sure they are entirely full (to
+ // avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
+ // byte entry and the os page size is 4096, the maximum heap size should
+ // be 512*4096 = 2MB aligned.
-void ParallelArguments::initialize_heap_flags_and_sizes_one_pass() {
- // Do basic sizing work
- GenArguments::initialize_heap_flags_and_sizes();
-}
+ size_t alignment = CardTable::ct_max_alignment_constraint();
-void ParallelArguments::initialize_heap_flags_and_sizes() {
- initialize_heap_flags_and_sizes_one_pass();
-
- if (!UseLargePages) {
- ParallelScavengeHeap::set_desired_page_size(os::vm_page_size());
- return;
+ if (UseLargePages) {
+ // In presence of large pages we have to make sure that our
+ // alignment is large page aware.
+ alignment = lcm(os::large_page_size(), alignment);
}
- // If using large-page, need to update SpaceAlignment so that spaces are page-size aligned.
- const size_t min_pages = 4; // 1 for eden + 1 for each survivor + 1 for old
- const size_t page_sz = os::page_size_for_region_aligned(MinHeapSize, min_pages);
- ParallelScavengeHeap::set_desired_page_size(page_sz);
-
- if (page_sz == os::vm_page_size()) {
- log_warning(gc, heap)("MinHeapSize (%zu) must be large enough for 4 * page-size; Disabling UseLargePages for heap", MinHeapSize);
- return;
- }
-
- // Space is largepage-aligned.
- size_t new_alignment = page_sz;
- if (new_alignment != SpaceAlignment) {
- SpaceAlignment = new_alignment;
- // Redo everything from the start
- initialize_heap_flags_and_sizes_one_pass();
- }
-}
-
-size_t ParallelArguments::heap_reserved_size_bytes() {
- return MaxHeapSize;
+ return alignment;
}
CollectedHeap* ParallelArguments::create_heap() {
return new ParallelScavengeHeap();
}
+
+size_t ParallelArguments::young_gen_size_lower_bound() {
+ return num_young_spaces() * SpaceAlignment;
+}
+
+size_t ParallelArguments::old_gen_size_lower_bound() {
+ return num_old_spaces() * SpaceAlignment;
+}
+
+size_t ParallelArguments::heap_reserved_size_bytes() {
+ return MaxHeapSize;
+}
diff --git a/src/hotspot/share/gc/parallel/parallelArguments.hpp b/src/hotspot/share/gc/parallel/parallelArguments.hpp
index 159441be792..729fe43b879 100644
--- a/src/hotspot/share/gc/parallel/parallelArguments.hpp
+++ b/src/hotspot/share/gc/parallel/parallelArguments.hpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,21 +26,16 @@
#ifndef SHARE_GC_PARALLEL_PARALLELARGUMENTS_HPP
#define SHARE_GC_PARALLEL_PARALLELARGUMENTS_HPP
-#include "gc/shared/gcArguments.hpp"
#include "gc/shared/genArguments.hpp"
-class CollectedHeap;
-
class ParallelArguments : public GenArguments {
private:
virtual void initialize_alignments();
- virtual void initialize_heap_flags_and_sizes();
-
- void initialize_heap_flags_and_sizes_one_pass();
-
virtual void initialize();
virtual size_t conservative_max_heap_alignment();
virtual CollectedHeap* create_heap();
+ virtual size_t young_gen_size_lower_bound();
+ virtual size_t old_gen_size_lower_bound();
public:
static size_t heap_reserved_size_bytes();
diff --git a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
index 747e2f3228c..3a13d0d0535 100644
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
@@ -344,11 +344,6 @@ HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
assert(is_in_or_null(op.result()), "result not in heap");
return op.result();
}
-
- if (is_shutting_down()) {
- stall_for_vm_shutdown();
- return nullptr;
- }
}
// Was the gc-overhead reached inside the safepoint? If so, this mutator
diff --git a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
index 0221fd2a90e..5d8ddbcaaed 100644
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
@@ -202,7 +202,6 @@ public:
bool requires_barriers(stackChunkOop obj) const override;
MemRegion reserved_region() const { return _reserved; }
- HeapWord* base() const { return _reserved.start(); }
// Memory allocation.
HeapWord* mem_allocate(size_t size) override;
diff --git a/src/hotspot/share/gc/serial/serialArguments.cpp b/src/hotspot/share/gc/serial/serialArguments.cpp
index aed1c2353b4..efebec4fa38 100644
--- a/src/hotspot/share/gc/serial/serialArguments.cpp
+++ b/src/hotspot/share/gc/serial/serialArguments.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,11 +28,49 @@
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/gcArguments.hpp"
+static size_t compute_heap_alignment() {
+ // The card marking array and the offset arrays for old generations are
+ // committed in os pages as well. Make sure they are entirely full (to
+ // avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
+ // byte entry and the os page size is 4096, the maximum heap size should
+ // be 512*4096 = 2MB aligned.
+
+ size_t alignment = CardTable::ct_max_alignment_constraint();
+
+ if (UseLargePages) {
+ // In presence of large pages we have to make sure that our
+ // alignment is large page aware.
+ alignment = lcm(os::large_page_size(), alignment);
+ }
+
+ return alignment;
+}
+
+void SerialArguments::initialize_alignments() {
+ // Initialize card size before initializing alignments
+ CardTable::initialize_card_size();
+ SpaceAlignment = (size_t)Generation::GenGrain;
+ HeapAlignment = compute_heap_alignment();
+}
+
void SerialArguments::initialize() {
GCArguments::initialize();
FullGCForwarding::initialize_flags(MaxHeapSize);
}
+size_t SerialArguments::conservative_max_heap_alignment() {
+ return MAX2((size_t)Generation::GenGrain, compute_heap_alignment());
+}
+
CollectedHeap* SerialArguments::create_heap() {
return new SerialHeap();
}
+
+size_t SerialArguments::young_gen_size_lower_bound() {
+ // The young generation must be aligned and have room for eden + two survivors
+ return 3 * SpaceAlignment;
+}
+
+size_t SerialArguments::old_gen_size_lower_bound() {
+ return SpaceAlignment;
+}
diff --git a/src/hotspot/share/gc/serial/serialArguments.hpp b/src/hotspot/share/gc/serial/serialArguments.hpp
index 90c3225ff8d..774168eb626 100644
--- a/src/hotspot/share/gc/serial/serialArguments.hpp
+++ b/src/hotspot/share/gc/serial/serialArguments.hpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,12 +28,14 @@
#include "gc/shared/genArguments.hpp"
-class CollectedHeap;
-
class SerialArguments : public GenArguments {
private:
+ virtual void initialize_alignments();
virtual void initialize();
+ virtual size_t conservative_max_heap_alignment();
virtual CollectedHeap* create_heap();
+ virtual size_t young_gen_size_lower_bound();
+ virtual size_t old_gen_size_lower_bound();
};
#endif // SHARE_GC_SERIAL_SERIALARGUMENTS_HPP
diff --git a/src/hotspot/share/gc/serial/serialHeap.cpp b/src/hotspot/share/gc/serial/serialHeap.cpp
index 00d74e691eb..03ad1282f5f 100644
--- a/src/hotspot/share/gc/serial/serialHeap.cpp
+++ b/src/hotspot/share/gc/serial/serialHeap.cpp
@@ -337,11 +337,6 @@ HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
break;
}
- if (is_shutting_down()) {
- stall_for_vm_shutdown();
- return nullptr;
- }
-
// Give a warning if we seem to be looping forever.
if ((QueuedAllocationWarningCount > 0) &&
(try_count % QueuedAllocationWarningCount == 0)) {
diff --git a/src/hotspot/share/gc/shared/bufferNode.hpp b/src/hotspot/share/gc/shared/bufferNode.hpp
index a453bbc964b..e4e2ff23fb1 100644
--- a/src/hotspot/share/gc/shared/bufferNode.hpp
+++ b/src/hotspot/share/gc/shared/bufferNode.hpp
@@ -27,6 +27,7 @@
#include "cppstdlib/limits.hpp"
#include "gc/shared/freeListAllocator.hpp"
+#include "runtime/atomic.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/lockFreeStack.hpp"
@@ -38,7 +39,7 @@ class BufferNode {
InternalSizeType _index;
InternalSizeType _capacity;
- BufferNode* volatile _next;
+ Atomic _next;
void* _buffer[1]; // Pseudo flexible array member.
BufferNode(InternalSizeType capacity)
@@ -58,11 +59,11 @@ public:
return std::numeric_limits::max();
}
- static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; }
+ static Atomic* next_ptr(BufferNode& bn) { return &bn._next; }
typedef LockFreeStack Stack;
- BufferNode* next() const { return _next; }
- void set_next(BufferNode* n) { _next = n; }
+ BufferNode* next() const { return _next.load_relaxed(); }
+ void set_next(BufferNode* n) { _next.store_relaxed(n); }
size_t index() const { return _index; }
void set_index(size_t i) {
diff --git a/src/hotspot/share/gc/shared/collectedHeap.cpp b/src/hotspot/share/gc/shared/collectedHeap.cpp
index c8dd39e72be..a59ea3745ab 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp
@@ -62,12 +62,14 @@
class ClassLoaderData;
+bool CollectedHeap::_is_shutting_down = false;
+
size_t CollectedHeap::_lab_alignment_reserve = SIZE_MAX;
Klass* CollectedHeap::_filler_object_klass = nullptr;
size_t CollectedHeap::_filler_array_max_size = 0;
size_t CollectedHeap::_stack_chunk_max_size = 0;
-class GCLogMessage : public FormatBuffer<512> {};
+class GCLogMessage : public FormatBuffer<1024> {};
template <>
void EventLogBase::print(outputStream* st, GCLogMessage& m) {
@@ -377,8 +379,7 @@ MetaWord* CollectedHeap::satisfy_failed_metadata_allocation(ClassLoaderData* loa
word_size,
mdtype,
gc_count,
- full_gc_count,
- GCCause::_metadata_GC_threshold);
+ full_gc_count);
VMThread::execute(&op);
@@ -386,11 +387,6 @@ MetaWord* CollectedHeap::satisfy_failed_metadata_allocation(ClassLoaderData* loa
return op.result();
}
- if (is_shutting_down()) {
- stall_for_vm_shutdown();
- return nullptr;
- }
-
loop_count++;
if ((QueuedAllocationWarningCount > 0) &&
(loop_count % QueuedAllocationWarningCount == 0)) {
@@ -605,30 +601,20 @@ void CollectedHeap::post_initialize() {
initialize_serviceability();
}
-bool CollectedHeap::is_shutting_down() const {
- return Universe::is_shutting_down();
+bool CollectedHeap::is_shutting_down() {
+ assert(Heap_lock->owned_by_self(), "Protected by this lock");
+ return _is_shutting_down;
}
-void CollectedHeap::stall_for_vm_shutdown() {
- assert(is_shutting_down(), "Precondition");
- // Stall the thread (2 seconds) instead of an indefinite wait to avoid deadlock
- // if the VM shutdown triggers a GC.
- // The 2-seconds sleep is:
- // - long enough to keep daemon threads stalled, while the shutdown
- // sequence completes in the common case.
- // - short enough to avoid excessive stall time if the shutdown itself
- // triggers a GC.
- JavaThread::current()->sleep(2 * MILLIUNITS);
+void CollectedHeap::initiate_shutdown() {
+ {
+ // Acquire the Heap_lock to synchronize with VM_Heap_Sync_Operations,
+ // which may depend on the value of _is_shutting_down flag.
+ MutexLocker hl(Heap_lock);
+ _is_shutting_down = true;
+ }
- ResourceMark rm;
- log_warning(gc, alloc)("%s: Stall for VM-Shutdown timed out; allocation may fail with OOME", Thread::current()->name());
-}
-
-void CollectedHeap::before_exit() {
print_tracing_info();
-
- // Stop any on-going concurrent work and prepare for exit.
- stop();
}
size_t CollectedHeap::bootstrap_max_memory() const {
diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp
index 6be0057480d..6f335b1cdf4 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp
@@ -96,6 +96,8 @@ class CollectedHeap : public CHeapObj {
friend class MemAllocator;
private:
+ static bool _is_shutting_down;
+
GCHeapLog* _heap_log;
GCMetaspaceLog* _metaspace_log;
@@ -209,11 +211,10 @@ protected:
// Default implementation does nothing.
virtual void print_tracing_info() const = 0;
+ public:
// Stop any onging concurrent work and prepare for exit.
virtual void stop() = 0;
- public:
-
static inline size_t filler_array_max_size() {
return _filler_array_max_size;
}
@@ -245,14 +246,9 @@ protected:
// This is the correct place to place such initialization methods.
virtual void post_initialize();
- bool is_shutting_down() const;
+ static bool is_shutting_down();
- // If the VM is shutting down, we may have skipped VM_CollectForAllocation.
- // In this case, stall the allocation request briefly in the hope that
- // the VM shutdown completes before the allocation request returns.
- void stall_for_vm_shutdown();
-
- void before_exit();
+ void initiate_shutdown();
// Stop and resume concurrent GC threads interfering with safepoint operations
virtual void safepoint_synchronize_begin() {}
diff --git a/src/hotspot/share/gc/shared/freeListAllocator.cpp b/src/hotspot/share/gc/shared/freeListAllocator.cpp
index c6801c2be18..990bf88aade 100644
--- a/src/hotspot/share/gc/shared/freeListAllocator.cpp
+++ b/src/hotspot/share/gc/shared/freeListAllocator.cpp
@@ -41,26 +41,26 @@ FreeListAllocator::PendingList::PendingList() :
size_t FreeListAllocator::PendingList::add(FreeNode* node) {
assert(node->next() == nullptr, "precondition");
- FreeNode* old_head = AtomicAccess::xchg(&_head, node);
+ FreeNode* old_head = _head.exchange(node);
if (old_head != nullptr) {
node->set_next(old_head);
} else {
assert(_tail == nullptr, "invariant");
_tail = node;
}
- return AtomicAccess::add(&_count, size_t(1));
+ return _count.add_then_fetch(1u);
}
typename FreeListAllocator::NodeList FreeListAllocator::PendingList::take_all() {
- NodeList result{AtomicAccess::load(&_head), _tail, AtomicAccess::load(&_count)};
- AtomicAccess::store(&_head, (FreeNode*)nullptr);
+ NodeList result{_head.load_relaxed(), _tail, _count.load_relaxed()};
+ _head.store_relaxed(nullptr);
_tail = nullptr;
- AtomicAccess::store(&_count, size_t(0));
+ _count.store_relaxed(0u);
return result;
}
size_t FreeListAllocator::PendingList::count() const {
- return AtomicAccess::load(&_count);
+ return _count.load_relaxed();
}
FreeListAllocator::FreeListAllocator(const char* name, FreeListConfig* config) :
@@ -85,7 +85,7 @@ void FreeListAllocator::delete_list(FreeNode* list) {
}
FreeListAllocator::~FreeListAllocator() {
- uint index = AtomicAccess::load(&_active_pending_list);
+ uint index = _active_pending_list.load_relaxed();
NodeList pending_list = _pending_lists[index].take_all();
delete_list(pending_list._head);
delete_list(_free_list.pop_all());
@@ -93,18 +93,18 @@ FreeListAllocator::~FreeListAllocator() {
// Drop existing nodes and reset all counters
void FreeListAllocator::reset() {
- uint index = AtomicAccess::load(&_active_pending_list);
+ uint index = _active_pending_list.load_relaxed();
_pending_lists[index].take_all();
_free_list.pop_all();
- _free_count = 0;
+ _free_count.store_relaxed(0u);
}
size_t FreeListAllocator::free_count() const {
- return AtomicAccess::load(&_free_count);
+ return _free_count.load_relaxed();
}
size_t FreeListAllocator::pending_count() const {
- uint index = AtomicAccess::load(&_active_pending_list);
+ uint index = _active_pending_list.load_relaxed();
return _pending_lists[index].count();
}
@@ -124,7 +124,7 @@ void* FreeListAllocator::allocate() {
// Decrement count after getting buffer from free list. This, along
// with incrementing count before adding to free list, ensures count
// never underflows.
- size_t count = AtomicAccess::sub(&_free_count, 1u);
+ size_t count = _free_count.sub_then_fetch(1u);
assert((count + 1) != 0, "_free_count underflow");
return node;
} else {
@@ -149,7 +149,7 @@ void FreeListAllocator::release(void* free_node) {
// we're done with what might be the pending list to be transferred.
{
GlobalCounter::CriticalSection cs(Thread::current());
- uint index = AtomicAccess::load_acquire(&_active_pending_list);
+ uint index = _active_pending_list.load_acquire();
size_t count = _pending_lists[index].add(node);
if (count <= _config->transfer_threshold()) return;
}
@@ -164,17 +164,17 @@ void FreeListAllocator::release(void* free_node) {
// in-progress transfer.
bool FreeListAllocator::try_transfer_pending() {
// Attempt to claim the lock.
- if (AtomicAccess::load(&_transfer_lock) || // Skip CAS if likely to fail.
- AtomicAccess::cmpxchg(&_transfer_lock, false, true)) {
+ if (_transfer_lock.load_relaxed() || // Skip CAS if likely to fail.
+ _transfer_lock.compare_exchange(false, true)) {
return false;
}
// Have the lock; perform the transfer.
// Change which pending list is active. Don't need an atomic RMW since
// we have the lock and we're the only writer.
- uint index = AtomicAccess::load(&_active_pending_list);
+ uint index = _active_pending_list.load_relaxed();
uint new_active = (index + 1) % ARRAY_SIZE(_pending_lists);
- AtomicAccess::release_store(&_active_pending_list, new_active);
+ _active_pending_list.release_store(new_active);
// Wait for all critical sections in the buffer life-cycle to complete.
// This includes _free_list pops and adding to the now inactive pending
@@ -186,11 +186,11 @@ bool FreeListAllocator::try_transfer_pending() {
size_t count = transfer_list._entry_count;
if (count > 0) {
// Update count first so no underflow in allocate().
- AtomicAccess::add(&_free_count, count);
+ _free_count.add_then_fetch(count);
_free_list.prepend(*transfer_list._head, *transfer_list._tail);
log_trace(gc, freelist)
("Transferred %s pending to free: %zu", name(), count);
}
- AtomicAccess::release_store(&_transfer_lock, false);
+ _transfer_lock.release_store(false);
return true;
}
diff --git a/src/hotspot/share/gc/shared/freeListAllocator.hpp b/src/hotspot/share/gc/shared/freeListAllocator.hpp
index 07e075a6725..dd163f0fe67 100644
--- a/src/hotspot/share/gc/shared/freeListAllocator.hpp
+++ b/src/hotspot/share/gc/shared/freeListAllocator.hpp
@@ -27,7 +27,7 @@
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
-#include "runtime/atomicAccess.hpp"
+#include "runtime/atomic.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/lockFreeStack.hpp"
@@ -62,15 +62,15 @@ public:
// to the free list making them available for re-allocation.
class FreeListAllocator {
struct FreeNode {
- FreeNode* volatile _next;
+ Atomic _next;
FreeNode() : _next (nullptr) { }
- FreeNode* next() { return AtomicAccess::load(&_next); }
+ FreeNode* next() { return _next.load_relaxed(); }
- FreeNode* volatile* next_addr() { return &_next; }
+ Atomic* next_addr() { return &_next; }
- void set_next(FreeNode* next) { AtomicAccess::store(&_next, next); }
+ void set_next(FreeNode* next) { _next.store_relaxed(next); }
};
struct NodeList {
@@ -85,8 +85,8 @@ class FreeListAllocator {
class PendingList {
FreeNode* _tail;
- FreeNode* volatile _head;
- volatile size_t _count;
+ Atomic _head;
+ Atomic _count;
NONCOPYABLE(PendingList);
@@ -105,20 +105,20 @@ class FreeListAllocator {
NodeList take_all();
};
- static FreeNode* volatile* next_ptr(FreeNode& node) { return node.next_addr(); }
- typedef LockFreeStack Stack;
+ static Atomic* next_ptr(FreeNode& node) { return node.next_addr(); }
+ using Stack = LockFreeStack;
FreeListConfig* _config;
char _name[DEFAULT_PADDING_SIZE - sizeof(FreeListConfig*)]; // Use name as padding.
#define DECLARE_PADDED_MEMBER(Id, Type, Name) \
Type Name; DEFINE_PAD_MINUS_SIZE(Id, DEFAULT_PADDING_SIZE, sizeof(Type))
- DECLARE_PADDED_MEMBER(1, volatile size_t, _free_count);
+ DECLARE_PADDED_MEMBER(1, Atomic, _free_count);
DECLARE_PADDED_MEMBER(2, Stack, _free_list);
- DECLARE_PADDED_MEMBER(3, volatile bool, _transfer_lock);
+ DECLARE_PADDED_MEMBER(3, Atomic, _transfer_lock);
#undef DECLARE_PADDED_MEMBER
- volatile uint _active_pending_list;
+ Atomic _active_pending_list;
PendingList _pending_lists[2];
void delete_list(FreeNode* list);
diff --git a/src/hotspot/share/gc/shared/gcArguments.cpp b/src/hotspot/share/gc/shared/gcArguments.cpp
index d45e6a9c7dd..424427c12b6 100644
--- a/src/hotspot/share/gc/shared/gcArguments.cpp
+++ b/src/hotspot/share/gc/shared/gcArguments.cpp
@@ -62,24 +62,6 @@ void GCArguments::initialize_heap_sizes() {
initialize_size_info();
}
-size_t GCArguments::compute_heap_alignment() {
- // The card marking array and the offset arrays for old generations are
- // committed in os pages as well. Make sure they are entirely full (to
- // avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
- // byte entry and the os page size is 4096, the maximum heap size should
- // be 512*4096 = 2MB aligned.
-
- size_t alignment = CardTable::ct_max_alignment_constraint();
-
- if (UseLargePages) {
- // In presence of large pages we have to make sure that our
- // alignment is large page aware.
- alignment = lcm(os::large_page_size(), alignment);
- }
-
- return alignment;
-}
-
#ifdef ASSERT
void GCArguments::assert_flags() {
assert(InitialHeapSize <= MaxHeapSize, "Ergonomics decided on incompatible initial and maximum heap sizes");
diff --git a/src/hotspot/share/gc/shared/gcArguments.hpp b/src/hotspot/share/gc/shared/gcArguments.hpp
index fff41e85d8c..d8a4901f887 100644
--- a/src/hotspot/share/gc/shared/gcArguments.hpp
+++ b/src/hotspot/share/gc/shared/gcArguments.hpp
@@ -45,6 +45,8 @@ protected:
public:
virtual void initialize();
+
+ // Return the (conservative) maximum heap alignment
virtual size_t conservative_max_heap_alignment() = 0;
// Used by heap size heuristics to determine max
@@ -59,8 +61,6 @@ public:
}
void initialize_heap_sizes();
-
- static size_t compute_heap_alignment();
};
#endif // SHARE_GC_SHARED_GCARGUMENTS_HPP
diff --git a/src/hotspot/share/gc/shared/gcLogPrecious.cpp b/src/hotspot/share/gc/shared/gcLogPrecious.cpp
index 43bd58db1aa..d556eed1b69 100644
--- a/src/hotspot/share/gc/shared/gcLogPrecious.cpp
+++ b/src/hotspot/share/gc/shared/gcLogPrecious.cpp
@@ -25,6 +25,7 @@
#include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
+#include "runtime/thread.hpp"
#include "utilities/ostream.hpp"
stringStream* GCLogPrecious::_lines = nullptr;
@@ -83,7 +84,8 @@ void GCLogPrecious::print_on_error(outputStream* st) {
return;
}
- if (!_lock->try_lock_without_rank_check()) {
+ if (Thread::current_or_null_safe() == nullptr ||
+ !_lock->try_lock_without_rank_check()) {
st->print_cr("\n");
return;
}
diff --git a/src/hotspot/share/gc/shared/gcVMOperations.cpp b/src/hotspot/share/gc/shared/gcVMOperations.cpp
index 36aa0c9843d..6dbfd56b4e9 100644
--- a/src/hotspot/share/gc/shared/gcVMOperations.cpp
+++ b/src/hotspot/share/gc/shared/gcVMOperations.cpp
@@ -92,6 +92,22 @@ static bool should_use_gclocker() {
return UseSerialGC || UseParallelGC;
}
+static void block_if_java_thread() {
+ Thread* thread = Thread::current();
+ if (thread->is_Java_thread()) {
+ // Block here and allow the shutdown to complete
+ while (true) {
+ // The call to wait has a few important effects:
+ // 1) Block forever (minus spurious wake-ups, hence the loop)
+ // 2) Release the Heap_lock, which is taken by the shutdown code
+ // 3) Transition to blocked state so that the final VM_Exit operation can be scheduled
+ Heap_lock->wait();
+ }
+ } else {
+ assert(thread->is_ConcurrentGC_thread(), "Unexpected thread type");
+ }
+}
+
bool VM_GC_Operation::doit_prologue() {
assert(_gc_cause != GCCause::_no_gc, "Illegal GCCause");
@@ -110,8 +126,15 @@ bool VM_GC_Operation::doit_prologue() {
}
VM_Heap_Sync_Operation::doit_prologue();
+ _is_shutting_down = CollectedHeap::is_shutting_down();
+ if (_is_shutting_down) {
+ // Block forever if a Java thread is triggering a GC after
+ // the GC has started to shut down.
+ block_if_java_thread();
+ }
+
// Check invocations
- if (skip_operation() || Universe::is_shutting_down()) {
+ if (skip_operation() || _is_shutting_down) {
// skip collection
Heap_lock->unlock();
if (should_use_gclocker()) {
@@ -197,9 +220,8 @@ VM_CollectForMetadataAllocation::VM_CollectForMetadataAllocation(ClassLoaderData
size_t size,
Metaspace::MetadataType mdtype,
uint gc_count_before,
- uint full_gc_count_before,
- GCCause::Cause gc_cause)
- : VM_GC_Collect_Operation(gc_count_before, gc_cause, full_gc_count_before, true),
+ uint full_gc_count_before)
+ : VM_GC_Collect_Operation(gc_count_before, GCCause::_metadata_GC_threshold, full_gc_count_before, true),
_result(nullptr), _size(size), _mdtype(mdtype), _loader_data(loader_data) {
assert(_size != 0, "An allocation should always be requested with this operation.");
AllocTracer::send_allocation_requiring_gc_event(_size * HeapWordSize, GCId::peek());
@@ -208,8 +230,11 @@ VM_CollectForMetadataAllocation::VM_CollectForMetadataAllocation(ClassLoaderData
void VM_CollectForMetadataAllocation::doit() {
SvcGCMarker sgcm(SvcGCMarker::FULL);
- CollectedHeap* heap = Universe::heap();
- GCCauseSetter gccs(heap, _gc_cause);
+ // Note: GCCauseSetter is intentionally not used here.
+ // The specific GC cause is set directly in downstream calls that initiate
+ // collections, allowing us to accurately reflect different situations:
+ // - A typical metadata allocation failure triggers a collection.
+ // - As a last resort, a collection clears soft references if prior attempts fail.
// Check again if the space is available. Another thread
// may have similarly failed a metadata allocation and induced
@@ -232,8 +257,10 @@ void VM_CollectForMetadataAllocation::doit() {
}
#endif
+ CollectedHeap* heap = Universe::heap();
+
// Don't clear the soft refs yet.
- heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
+ heap->collect_as_vm_thread(_gc_cause);
// After a GC try to allocate without expanding. Could fail
// and expansion will be tried below.
_result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
diff --git a/src/hotspot/share/gc/shared/gcVMOperations.hpp b/src/hotspot/share/gc/shared/gcVMOperations.hpp
index 5048bc3c1ed..a9aee2faf5d 100644
--- a/src/hotspot/share/gc/shared/gcVMOperations.hpp
+++ b/src/hotspot/share/gc/shared/gcVMOperations.hpp
@@ -110,23 +110,23 @@ class VM_GC_Operation: public VM_Heap_Sync_Operation {
uint _full_gc_count_before; // full gc count before acquiring the Heap_lock
bool _full; // whether a "full" collection
bool _prologue_succeeded; // whether doit_prologue succeeded
+ bool _is_shutting_down; // whether the operation found that the GC is shutting down
GCCause::Cause _gc_cause; // the putative cause for this gc op
virtual bool skip_operation() const;
public:
VM_GC_Operation(uint gc_count_before,
- GCCause::Cause _cause,
+ GCCause::Cause cause,
uint full_gc_count_before,
- bool full) : VM_Heap_Sync_Operation() {
- _full = full;
- _prologue_succeeded = false;
- _gc_count_before = gc_count_before;
-
- _gc_cause = _cause;
-
- _full_gc_count_before = full_gc_count_before;
- }
+ bool full)
+ : VM_Heap_Sync_Operation(),
+ _gc_count_before(gc_count_before),
+ _full_gc_count_before(full_gc_count_before),
+ _full(full),
+ _prologue_succeeded(false),
+ _is_shutting_down(false),
+ _gc_cause(cause) {}
virtual const char* cause() const;
@@ -139,6 +139,14 @@ class VM_GC_Operation: public VM_Heap_Sync_Operation {
virtual bool allow_nested_vm_operations() const { return true; }
virtual bool gc_succeeded() const { return _prologue_succeeded; }
+ // This function returns the value of CollectedHeap::is_shutting_down() that
+ // was recorded in the prologue. Unlike CollectedHeap::is_shutting_down(),
+ // this function can be called without acquiring the Heap_lock.
+ //
+ // This function exists so that code that tries to schedule a GC operation
+ // can check if it was refused because the JVM is about to shut down.
+ bool is_shutting_down() const { return _is_shutting_down; }
+
static void notify_gc_begin(bool full = false);
static void notify_gc_end();
};
@@ -214,8 +222,7 @@ class VM_CollectForMetadataAllocation: public VM_GC_Collect_Operation {
size_t size,
Metaspace::MetadataType mdtype,
uint gc_count_before,
- uint full_gc_count_before,
- GCCause::Cause gc_cause);
+ uint full_gc_count_before);
virtual VMOp_Type type() const { return VMOp_CollectForMetadataAllocation; }
virtual void doit();
diff --git a/src/hotspot/share/gc/shared/genArguments.cpp b/src/hotspot/share/gc/shared/genArguments.cpp
index 9618c515b7d..5d5003f8d9f 100644
--- a/src/hotspot/share/gc/shared/genArguments.cpp
+++ b/src/hotspot/share/gc/shared/genArguments.cpp
@@ -42,17 +42,6 @@ size_t MaxOldSize = 0;
// See more in JDK-8346005
size_t OldSize = ScaleForWordSize(4*M);
-size_t GenArguments::conservative_max_heap_alignment() { return (size_t)Generation::GenGrain; }
-
-static size_t young_gen_size_lower_bound() {
- // The young generation must be aligned and have room for eden + two survivors
- return 3 * SpaceAlignment;
-}
-
-static size_t old_gen_size_lower_bound() {
- return SpaceAlignment;
-}
-
size_t GenArguments::scale_by_NewRatio_aligned(size_t base_size, size_t alignment) {
return align_down_bounded(base_size / (NewRatio + 1), alignment);
}
@@ -64,13 +53,6 @@ static size_t bound_minus_alignment(size_t desired_size,
return MIN2(desired_size, max_minus);
}
-void GenArguments::initialize_alignments() {
- // Initialize card size before initializing alignments
- CardTable::initialize_card_size();
- SpaceAlignment = (size_t)Generation::GenGrain;
- HeapAlignment = compute_heap_alignment();
-}
-
void GenArguments::initialize_heap_flags_and_sizes() {
GCArguments::initialize_heap_flags_and_sizes();
diff --git a/src/hotspot/share/gc/shared/genArguments.hpp b/src/hotspot/share/gc/shared/genArguments.hpp
index 80133bd1ec1..0ff9568575d 100644
--- a/src/hotspot/share/gc/shared/genArguments.hpp
+++ b/src/hotspot/share/gc/shared/genArguments.hpp
@@ -38,17 +38,16 @@ extern size_t OldSize;
class GenArguments : public GCArguments {
friend class TestGenCollectorPolicy; // Testing
private:
- virtual void initialize_alignments();
virtual void initialize_size_info();
- // Return the (conservative) maximum heap alignment
- virtual size_t conservative_max_heap_alignment();
-
DEBUG_ONLY(void assert_flags();)
DEBUG_ONLY(void assert_size_info();)
static size_t scale_by_NewRatio_aligned(size_t base_size, size_t alignment);
+ virtual size_t young_gen_size_lower_bound() = 0;
+ virtual size_t old_gen_size_lower_bound() = 0;
+
protected:
virtual void initialize_heap_flags_and_sizes();
};
diff --git a/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp b/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp
index 1ed3701fdab..ea3d644d105 100644
--- a/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp
+++ b/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp
@@ -250,7 +250,7 @@ static JVMFlag::Error MaxSizeForHeapAlignment(const char* name, size_t value, bo
} else
#endif
{
- heap_alignment = GCArguments::compute_heap_alignment();
+ heap_alignment = Arguments::conservative_max_heap_alignment();
}
return MaxSizeForAlignment(name, value, heap_alignment, verbose);
@@ -285,7 +285,7 @@ JVMFlag::Error SoftMaxHeapSizeConstraintFunc(size_t value, bool verbose) {
JVMFlag::Error HeapBaseMinAddressConstraintFunc(size_t value, bool verbose) {
// If an overflow happened in Arguments::set_heap_size(), MaxHeapSize will have too large a value.
// Check for this by ensuring that MaxHeapSize plus the requested min base address still fit within max_uintx.
- if (UseCompressedOops && FLAG_IS_ERGO(MaxHeapSize) && (value > (max_uintx - MaxHeapSize))) {
+ if (value > (max_uintx - MaxHeapSize)) {
JVMFlag::printError(verbose,
"HeapBaseMinAddress (%zu) or MaxHeapSize (%zu) is too large. "
"Sum of them must be less than or equal to maximum of size_t (%zu)\n",
diff --git a/src/hotspot/share/gc/shared/partialArrayState.cpp b/src/hotspot/share/gc/shared/partialArrayState.cpp
index 39c1fe4fc78..6f714d48a35 100644
--- a/src/hotspot/share/gc/shared/partialArrayState.cpp
+++ b/src/hotspot/share/gc/shared/partialArrayState.cpp
@@ -47,7 +47,7 @@ PartialArrayState::PartialArrayState(oop src, oop dst,
}
void PartialArrayState::add_references(size_t count) {
- size_t new_count = AtomicAccess::add(&_refcount, count, memory_order_relaxed);
+ size_t new_count = _refcount.add_then_fetch(count, memory_order_relaxed);
assert(new_count >= count, "reference count overflow");
}
@@ -92,7 +92,7 @@ PartialArrayState* PartialArrayStateAllocator::allocate(oop src, oop dst,
}
void PartialArrayStateAllocator::release(PartialArrayState* state) {
- size_t refcount = AtomicAccess::sub(&state->_refcount, size_t(1), memory_order_release);
+ size_t refcount = state->_refcount.sub_then_fetch(1u, memory_order_release);
if (refcount != 0) {
assert(refcount + 1 != 0, "refcount underflow");
} else {
@@ -116,25 +116,25 @@ PartialArrayStateManager::~PartialArrayStateManager() {
}
Arena* PartialArrayStateManager::register_allocator() {
- uint idx = AtomicAccess::fetch_then_add(&_registered_allocators, 1u, memory_order_relaxed);
+ uint idx = _registered_allocators.fetch_then_add(1u, memory_order_relaxed);
assert(idx < _max_allocators, "exceeded configured max number of allocators");
return ::new (&_arenas[idx]) Arena(mtGC);
}
#ifdef ASSERT
void PartialArrayStateManager::release_allocator() {
- uint old = AtomicAccess::fetch_then_add(&_released_allocators, 1u, memory_order_relaxed);
- assert(old < AtomicAccess::load(&_registered_allocators), "too many releases");
+ uint old = _released_allocators.fetch_then_add(1u, memory_order_relaxed);
+ assert(old < _registered_allocators.load_relaxed(), "too many releases");
}
#endif // ASSERT
void PartialArrayStateManager::reset() {
- uint count = AtomicAccess::load(&_registered_allocators);
- assert(count == AtomicAccess::load(&_released_allocators),
+ uint count = _registered_allocators.load_relaxed();
+ assert(count == _released_allocators.load_relaxed(),
"some allocators still active");
for (uint i = 0; i < count; ++i) {
_arenas[i].~Arena();
}
- AtomicAccess::store(&_registered_allocators, 0u);
- DEBUG_ONLY(AtomicAccess::store(&_released_allocators, 0u);)
+ _registered_allocators.store_relaxed(0u);
+ DEBUG_ONLY(_released_allocators.store_relaxed(0u);)
}
diff --git a/src/hotspot/share/gc/shared/partialArrayState.hpp b/src/hotspot/share/gc/shared/partialArrayState.hpp
index 3208c6d6807..3dafeb0f14c 100644
--- a/src/hotspot/share/gc/shared/partialArrayState.hpp
+++ b/src/hotspot/share/gc/shared/partialArrayState.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
#include "memory/allocation.hpp"
#include "oops/oopsHierarchy.hpp"
+#include "runtime/atomic.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
@@ -60,8 +61,8 @@ class PartialArrayState {
oop _source;
oop _destination;
size_t _length;
- volatile size_t _index;
- volatile size_t _refcount;
+ Atomic _index;
+ Atomic _refcount;
friend class PartialArrayStateAllocator;
@@ -90,7 +91,7 @@ public:
// A pointer to the start index for the next segment to process, for atomic
// update.
- volatile size_t* index_addr() { return &_index; }
+ Atomic* index_addr() { return &_index; }
};
// This class provides memory management for PartialArrayStates.
@@ -178,8 +179,8 @@ class PartialArrayStateManager : public CHeapObj {
// The number of allocators that have been registered/released.
// Atomic to support concurrent registration, and concurrent release.
// Phasing restriction forbids registration concurrent with release.
- volatile uint _registered_allocators;
- DEBUG_ONLY(volatile uint _released_allocators;)
+ Atomic _registered_allocators;
+ DEBUG_ONLY(Atomic _released_allocators;)
// These are all for sole use of the befriended allocator class.
Arena* register_allocator();
diff --git a/src/hotspot/share/gc/shared/partialArrayTaskStepper.hpp b/src/hotspot/share/gc/shared/partialArrayTaskStepper.hpp
index a68d9bd3612..11499ca2ffe 100644
--- a/src/hotspot/share/gc/shared/partialArrayTaskStepper.hpp
+++ b/src/hotspot/share/gc/shared/partialArrayTaskStepper.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
#define SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
#include "oops/arrayOop.hpp"
+#include "runtime/atomic.hpp"
#include "utilities/globalDefinitions.hpp"
class PartialArrayState;
@@ -73,7 +74,7 @@ private:
uint _task_fanout;
// For unit tests.
- inline Step next_impl(size_t length, volatile size_t* index_addr) const;
+ inline Step next_impl(size_t length, Atomic* index_addr) const;
};
#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
diff --git a/src/hotspot/share/gc/shared/partialArrayTaskStepper.inline.hpp b/src/hotspot/share/gc/shared/partialArrayTaskStepper.inline.hpp
index 3693abaf8cf..aaa86e2de16 100644
--- a/src/hotspot/share/gc/shared/partialArrayTaskStepper.inline.hpp
+++ b/src/hotspot/share/gc/shared/partialArrayTaskStepper.inline.hpp
@@ -46,15 +46,13 @@ PartialArrayTaskStepper::start(size_t length) const {
}
PartialArrayTaskStepper::Step
-PartialArrayTaskStepper::next_impl(size_t length, volatile size_t* index_addr) const {
+PartialArrayTaskStepper::next_impl(size_t length, Atomic* index_addr) const {
// The start of the next task is in the state's index.
// Atomically increment by the chunk size to claim the associated chunk.
// Because we limit the number of enqueued tasks to being no more than the
// number of remaining chunks to process, we can use an atomic add for the
// claim, rather than a CAS loop.
- size_t start = AtomicAccess::fetch_then_add(index_addr,
- _chunk_size,
- memory_order_relaxed);
+ size_t start = index_addr->fetch_then_add(_chunk_size, memory_order_relaxed);
assert(start < length, "invariant: start %zu, length %zu", start, length);
assert(((length - start) % _chunk_size) == 0,
diff --git a/src/hotspot/share/gc/shared/satbMarkQueue.cpp b/src/hotspot/share/gc/shared/satbMarkQueue.cpp
index e6ffe39facf..93c52b499a0 100644
--- a/src/hotspot/share/gc/shared/satbMarkQueue.cpp
+++ b/src/hotspot/share/gc/shared/satbMarkQueue.cpp
@@ -27,7 +27,6 @@
#include "logging/log.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/oop.inline.hpp"
-#include "runtime/atomicAccess.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
@@ -85,28 +84,28 @@ SATBMarkQueueSet::~SATBMarkQueueSet() {
// remains set until the count is reduced to zero.
// Increment count. If count > threshold, set flag, else maintain flag.
-static void increment_count(volatile size_t* cfptr, size_t threshold) {
+static void increment_count(Atomic* cfptr, size_t threshold) {
size_t old;
- size_t value = AtomicAccess::load(cfptr);
+ size_t value = cfptr->load_relaxed();
do {
old = value;
value += 2;
assert(value > old, "overflow");
if (value > threshold) value |= 1;
- value = AtomicAccess::cmpxchg(cfptr, old, value);
+ value = cfptr->compare_exchange(old, value);
} while (value != old);
}
// Decrement count. If count == 0, clear flag, else maintain flag.
-static void decrement_count(volatile size_t* cfptr) {
+static void decrement_count(Atomic* cfptr) {
size_t old;
- size_t value = AtomicAccess::load(cfptr);
+ size_t value = cfptr->load_relaxed();
do {
assert((value >> 1) != 0, "underflow");
old = value;
value -= 2;
if (value <= 1) value = 0;
- value = AtomicAccess::cmpxchg(cfptr, old, value);
+ value = cfptr->compare_exchange(old, value);
} while (value != old);
}
@@ -332,7 +331,7 @@ void SATBMarkQueueSet::print_all(const char* msg) {
#endif // PRODUCT
void SATBMarkQueueSet::abandon_completed_buffers() {
- AtomicAccess::store(&_count_and_process_flag, size_t(0));
+ _count_and_process_flag.store_relaxed(0u);
BufferNode* buffers_to_delete = _list.pop_all();
while (buffers_to_delete != nullptr) {
BufferNode* bn = buffers_to_delete;
diff --git a/src/hotspot/share/gc/shared/satbMarkQueue.hpp b/src/hotspot/share/gc/shared/satbMarkQueue.hpp
index e40b2a3ecf3..d2b14a3cc92 100644
--- a/src/hotspot/share/gc/shared/satbMarkQueue.hpp
+++ b/src/hotspot/share/gc/shared/satbMarkQueue.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
#include "oops/oopsHierarchy.hpp"
+#include "runtime/atomic.hpp"
class Thread;
class Monitor;
@@ -87,7 +88,7 @@ class SATBMarkQueueSet: public PtrQueueSet {
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
PaddedEnd _list;
- volatile size_t _count_and_process_flag;
+ Atomic _count_and_process_flag;
// These are rarely (if ever) changed, so same cache line as count.
size_t _process_completed_buffers_threshold;
size_t _buffer_enqueue_threshold;
@@ -148,12 +149,12 @@ public:
// The number of buffers in the list. Racy and not updated atomically
// with the set of completed buffers.
size_t completed_buffers_num() const {
- return _count_and_process_flag >> 1;
+ return _count_and_process_flag.load_relaxed() >> 1;
}
// Return true if completed buffers should be processed.
bool process_completed_buffers() const {
- return (_count_and_process_flag & 1) != 0;
+ return (_count_and_process_flag.load_relaxed() & 1) != 0;
}
#ifndef PRODUCT
diff --git a/src/hotspot/share/gc/shared/taskqueue.hpp b/src/hotspot/share/gc/shared/taskqueue.hpp
index 1c36e18894a..3a751852ab6 100644
--- a/src/hotspot/share/gc/shared/taskqueue.hpp
+++ b/src/hotspot/share/gc/shared/taskqueue.hpp
@@ -25,13 +25,16 @@
#ifndef SHARE_GC_SHARED_TASKQUEUE_HPP
#define SHARE_GC_SHARED_TASKQUEUE_HPP
+#include "cppstdlib/type_traits.hpp"
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
+#include "metaprogramming/primitiveConversions.hpp"
#include "oops/oopsHierarchy.hpp"
-#include "runtime/atomicAccess.hpp"
+#include "runtime/atomic.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ostream.hpp"
+#include "utilities/powerOfTwo.hpp"
#include "utilities/stack.hpp"
#if TASKQUEUE_STATS
@@ -100,76 +103,92 @@ void TaskQueueStats::reset() {
}
#endif // TASKQUEUE_STATS
+// Helper for TaskQueueSuper, encoding {queue index, tag} pair in a form that
+// supports atomic access to the pair.
+class TaskQueueAge {
+ friend struct PrimitiveConversions::Translate;
+
+public:
+ // Internal type used for indexing the queue, and for the tag.
+ using idx_t = NOT_LP64(uint16_t) LP64_ONLY(uint32_t);
+
+ explicit TaskQueueAge(size_t data = 0) : _data{data} {}
+ TaskQueueAge(idx_t top, idx_t tag) : _fields{top, tag} {}
+
+ idx_t top() const { return _fields._top; }
+ idx_t tag() const { return _fields._tag; }
+
+ bool operator==(const TaskQueueAge& other) const { return _data == other._data; }
+
+private:
+ struct Fields {
+ idx_t _top;
+ idx_t _tag;
+ };
+ union {
+ size_t _data; // Provides access to _fields as a single integral value.
+ Fields _fields;
+ };
+ // _data must be able to hold combined _fields. Must be equal to ensure
+ // there isn't any padding that could be uninitialized by 2-arg ctor.
+ static_assert(sizeof(_data) == sizeof(_fields));
+};
+
+// Support for Atomic.
+template<>
+struct PrimitiveConversions::Translate : public std::true_type {
+ using Value = TaskQueueAge;
+ using Decayed = decltype(TaskQueueAge::_data);
+
+ static Decayed decay(Value x) { return x._data; }
+ static Value recover(Decayed x) { return Value(x); }
+};
+
// TaskQueueSuper collects functionality common to all GenericTaskQueue instances.
template
class TaskQueueSuper: public CHeapObj {
protected:
- // Internal type for indexing the queue; also used for the tag.
- typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
- STATIC_ASSERT(N == idx_t(N)); // Ensure N fits in an idx_t.
+ using Age = TaskQueueAge;
+ using idx_t = Age::idx_t;
+ static_assert(N == idx_t(N)); // Ensure N fits in an idx_t.
// N must be a power of 2 for computing modulo via masking.
// N must be >= 2 for the algorithm to work at all, though larger is better.
- STATIC_ASSERT(N >= 2);
- STATIC_ASSERT(is_power_of_2(N));
+ static_assert(N >= 2);
+ static_assert(is_power_of_2(N));
static const uint MOD_N_MASK = N - 1;
- class Age {
- friend class TaskQueueSuper;
-
- public:
- explicit Age(size_t data = 0) : _data(data) {}
- Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; }
-
- idx_t top() const { return _fields._top; }
- idx_t tag() const { return _fields._tag; }
-
- bool operator ==(const Age& other) const { return _data == other._data; }
-
- private:
- struct fields {
- idx_t _top;
- idx_t _tag;
- };
- union {
- size_t _data;
- fields _fields;
- };
- STATIC_ASSERT(sizeof(size_t) >= sizeof(fields));
- };
-
uint bottom_relaxed() const {
- return AtomicAccess::load(&_bottom);
+ return _bottom.load_relaxed();
}
uint bottom_acquire() const {
- return AtomicAccess::load_acquire(&_bottom);
+ return _bottom.load_acquire();
}
void set_bottom_relaxed(uint new_bottom) {
- AtomicAccess::store(&_bottom, new_bottom);
+ _bottom.store_relaxed(new_bottom);
}
void release_set_bottom(uint new_bottom) {
- AtomicAccess::release_store(&_bottom, new_bottom);
+ _bottom.release_store(new_bottom);
}
Age age_relaxed() const {
- return Age(AtomicAccess::load(&_age._data));
+ return _age.load_relaxed();
}
void set_age_relaxed(Age new_age) {
- AtomicAccess::store(&_age._data, new_age._data);
+ _age.store_relaxed(new_age);
}
Age cmpxchg_age(Age old_age, Age new_age) {
- return Age(AtomicAccess::cmpxchg(&_age._data, old_age._data, new_age._data));
+ return _age.compare_exchange(old_age, new_age);
}
idx_t age_top_relaxed() const {
- // Atomically accessing a subfield of an "atomic" member.
- return AtomicAccess::load(&_age._fields._top);
+ return _age.load_relaxed().top();
}
// These both operate mod N.
@@ -222,16 +241,16 @@ private:
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
// Index of the first free element after the last one pushed (mod N).
- volatile uint _bottom;
- DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(uint));
+ Atomic _bottom;
+ DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(_bottom));
// top() is the index of the oldest pushed element (mod N), and tag()
// is the associated epoch, to distinguish different modifications of
// the age. There is no available element if top() == _bottom or
// (_bottom - top()) mod N == N-1; the latter indicates underflow
// during concurrent pop_local/pop_global.
- volatile Age _age;
- DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(Age));
+ Atomic _age;
+ DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(_age));
NONCOPYABLE(TaskQueueSuper);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
index 78ae78f4c24..05ecfb254a2 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
@@ -31,15 +31,37 @@
class ShenandoahAllocRequest : StackObj {
public:
- enum Type {
- _alloc_shared, // Allocate common, outside of TLAB
- _alloc_shared_gc, // Allocate common, outside of GCLAB/PLAB
- _alloc_cds, // Allocate for CDS
- _alloc_tlab, // Allocate TLAB
- _alloc_gclab, // Allocate GCLAB
- _alloc_plab, // Allocate PLAB
- _ALLOC_LIMIT
- };
+ // Alloc type is an int value with encoded bits in scheme as:
+ // [x|xx|xx|xx]
+ // ^---- Requester:
+ // 00 -- mutator
+ // 10 -- mutator (CDS)
+ // 01 -- GC
+ // ^------- Purpose:
+ // 00 -- shared
+ // 01 -- TLAB/GCLAB
+ // 11 -- PLAB
+ // ^---------- Affiliation:
+ // 00 -- YOUNG
+ // 01 -- OLD
+ // 11 -- OLD, promotion
+ typedef int Type;
+
+ static constexpr int bit_gc_alloc = 1 << 0;
+ static constexpr int bit_cds_alloc = 1 << 1;
+ static constexpr int bit_lab_alloc = 1 << 2;
+ static constexpr int bit_plab_alloc = 1 << 3;
+ static constexpr int bit_old_alloc = 1 << 4;
+ static constexpr int bit_promotion_alloc = 1 << 5;
+
+ static constexpr Type _alloc_shared = 0;
+ static constexpr Type _alloc_tlab = bit_lab_alloc;
+ static constexpr Type _alloc_cds = bit_cds_alloc;
+ static constexpr Type _alloc_shared_gc = bit_gc_alloc;
+ static constexpr Type _alloc_shared_gc_old = bit_gc_alloc | bit_old_alloc;
+ static constexpr Type _alloc_shared_gc_promotion = bit_gc_alloc | bit_old_alloc | bit_promotion_alloc;
+ static constexpr Type _alloc_gclab = bit_gc_alloc | bit_lab_alloc;
+ static constexpr Type _alloc_plab = bit_gc_alloc | bit_lab_alloc | bit_plab_alloc | bit_old_alloc;
static const char* alloc_type_to_string(Type type) {
switch (type) {
@@ -47,6 +69,10 @@ public:
return "Shared";
case _alloc_shared_gc:
return "Shared GC";
+ case _alloc_shared_gc_old:
+ return "Shared GC Old";
+ case _alloc_shared_gc_promotion:
+ return "Shared GC Promotion";
case _alloc_cds:
return "CDS";
case _alloc_tlab:
@@ -80,20 +106,14 @@ private:
// This is the type of the request.
Type _alloc_type;
- // This is the generation which the request is targeting.
- ShenandoahAffiliation const _affiliation;
-
- // True if this request is trying to copy any object from young to old (promote).
- bool _is_promotion;
-
#ifdef ASSERT
// Check that this is set before being read.
bool _actual_size_set;
#endif
- ShenandoahAllocRequest(size_t _min_size, size_t _requested_size, Type _alloc_type, ShenandoahAffiliation affiliation, bool is_promotion = false) :
+ ShenandoahAllocRequest(size_t _min_size, size_t _requested_size, Type _alloc_type) :
_min_size(_min_size), _requested_size(_requested_size),
- _actual_size(0), _waste(0), _alloc_type(_alloc_type), _affiliation(affiliation), _is_promotion(is_promotion)
+ _actual_size(0), _waste(0), _alloc_type(_alloc_type)
#ifdef ASSERT
, _actual_size_set(false)
#endif
@@ -101,31 +121,34 @@ private:
public:
static inline ShenandoahAllocRequest for_tlab(size_t min_size, size_t requested_size) {
- return ShenandoahAllocRequest(min_size, requested_size, _alloc_tlab, ShenandoahAffiliation::YOUNG_GENERATION);
+ return ShenandoahAllocRequest(min_size, requested_size, _alloc_tlab);
}
static inline ShenandoahAllocRequest for_gclab(size_t min_size, size_t requested_size) {
- return ShenandoahAllocRequest(min_size, requested_size, _alloc_gclab, ShenandoahAffiliation::YOUNG_GENERATION);
+ return ShenandoahAllocRequest(min_size, requested_size, _alloc_gclab);
}
static inline ShenandoahAllocRequest for_plab(size_t min_size, size_t requested_size) {
- return ShenandoahAllocRequest(min_size, requested_size, _alloc_plab, ShenandoahAffiliation::OLD_GENERATION);
+ return ShenandoahAllocRequest(min_size, requested_size, _alloc_plab);
}
static inline ShenandoahAllocRequest for_shared_gc(size_t requested_size, ShenandoahAffiliation affiliation, bool is_promotion = false) {
if (is_promotion) {
- assert(affiliation == ShenandoahAffiliation::OLD_GENERATION, "Should only promote to old generation");
- return ShenandoahAllocRequest(0, requested_size, _alloc_shared_gc, affiliation, true);
+ assert(affiliation == OLD_GENERATION, "Should only promote to old generation");
+ return ShenandoahAllocRequest(0, requested_size, _alloc_shared_gc_promotion);
}
- return ShenandoahAllocRequest(0, requested_size, _alloc_shared_gc, affiliation);
+ if (affiliation == OLD_GENERATION) {
+ return ShenandoahAllocRequest(0, requested_size, _alloc_shared_gc_old);
+ }
+ return ShenandoahAllocRequest(0, requested_size, _alloc_shared_gc);
}
static inline ShenandoahAllocRequest for_shared(size_t requested_size) {
- return ShenandoahAllocRequest(0, requested_size, _alloc_shared, ShenandoahAffiliation::YOUNG_GENERATION);
+ return ShenandoahAllocRequest(0, requested_size, _alloc_shared);
}
static inline ShenandoahAllocRequest for_cds(size_t requested_size) {
- return ShenandoahAllocRequest(0, requested_size, _alloc_cds, ShenandoahAffiliation::YOUNG_GENERATION);
+ return ShenandoahAllocRequest(0, requested_size, _alloc_cds);
}
inline size_t size() const {
@@ -167,71 +190,35 @@ public:
}
inline bool is_mutator_alloc() const {
- switch (_alloc_type) {
- case _alloc_tlab:
- case _alloc_shared:
- case _alloc_cds:
- return true;
- case _alloc_gclab:
- case _alloc_plab:
- case _alloc_shared_gc:
- return false;
- default:
- ShouldNotReachHere();
- return false;
- }
+ return (_alloc_type & bit_gc_alloc) == 0;
}
inline bool is_gc_alloc() const {
- switch (_alloc_type) {
- case _alloc_tlab:
- case _alloc_shared:
- case _alloc_cds:
- return false;
- case _alloc_gclab:
- case _alloc_plab:
- case _alloc_shared_gc:
- return true;
- default:
- ShouldNotReachHere();
- return false;
- }
+ return (_alloc_type & bit_gc_alloc) != 0;
}
inline bool is_lab_alloc() const {
- switch (_alloc_type) {
- case _alloc_tlab:
- case _alloc_gclab:
- case _alloc_plab:
- return true;
- case _alloc_shared:
- case _alloc_shared_gc:
- case _alloc_cds:
- return false;
- default:
- ShouldNotReachHere();
- return false;
- }
+ return (_alloc_type & bit_lab_alloc) != 0;
}
- bool is_old() const {
- return _affiliation == OLD_GENERATION;
+ inline bool is_old() const {
+ return (_alloc_type & bit_old_alloc) != 0;
}
- bool is_young() const {
- return _affiliation == YOUNG_GENERATION;
+ inline bool is_young() const {
+ return (_alloc_type & bit_old_alloc) == 0;
}
- ShenandoahAffiliation affiliation() const {
- return _affiliation;
+ inline ShenandoahAffiliation affiliation() const {
+ return (_alloc_type & bit_old_alloc) == 0 ? YOUNG_GENERATION : OLD_GENERATION ;
}
const char* affiliation_name() const {
- return shenandoah_affiliation_name(_affiliation);
+ return shenandoah_affiliation_name(affiliation());
}
- bool is_promotion() const {
- return _is_promotion;
+ inline bool is_promotion() const {
+ return (_alloc_type & bit_promotion_alloc) != 0;
}
};
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
index a7cf8e638dd..c1fa4b964b7 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
@@ -37,6 +37,7 @@
#include "runtime/globals_extension.hpp"
#include "runtime/java.hpp"
#include "utilities/defaultStream.hpp"
+#include "utilities/powerOfTwo.hpp"
void ShenandoahArguments::initialize() {
#if !(defined AARCH64 || defined AMD64 || defined PPC64 || defined RISCV64)
@@ -205,7 +206,7 @@ void ShenandoahArguments::initialize() {
}
size_t ShenandoahArguments::conservative_max_heap_alignment() {
- size_t align = ShenandoahMaxRegionSize;
+ size_t align = next_power_of_2(ShenandoahMaxRegionSize);
if (UseLargePages) {
align = MAX2(align, os::large_page_size());
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
index 0d38cc757f4..2b5bc766a46 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
@@ -129,7 +129,7 @@ public:
private:
template
- inline void arraycopy_marking(T* src, T* dst, size_t count, bool is_old_marking);
+ inline void arraycopy_marking(T* dst, size_t count);
template
inline void arraycopy_evacuation(T* src, size_t count);
template
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
index b176446452a..adeea8ebf96 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
@@ -387,13 +387,11 @@ template
void ShenandoahBarrierSet::arraycopy_work(T* src, size_t count) {
// Young cycles are allowed to run when old marking is in progress. When old marking is in progress,
// this barrier will be called with ENQUEUE=true and HAS_FWD=false, even though the young generation
- // may have forwarded objects. In this case, the `arraycopy_work` is first called with HAS_FWD=true and
- // ENQUEUE=false.
- assert(HAS_FWD == _heap->has_forwarded_objects() || _heap->is_concurrent_old_mark_in_progress(),
- "Forwarded object status is sane");
+ // may have forwarded objects.
+ assert(HAS_FWD == _heap->has_forwarded_objects() || _heap->is_concurrent_old_mark_in_progress(), "Forwarded object status is sane");
// This function cannot be called to handle marking and evacuation at the same time (they operate on
// different sides of the copy).
- assert((HAS_FWD || EVAC) != ENQUEUE, "Cannot evacuate and mark both sides of copy.");
+ static_assert((HAS_FWD || EVAC) != ENQUEUE, "Cannot evacuate and mark both sides of copy.");
Thread* thread = Thread::current();
SATBMarkQueue& queue = ShenandoahThreadLocalData::satb_mark_queue(thread);
@@ -412,7 +410,7 @@ void ShenandoahBarrierSet::arraycopy_work(T* src, size_t count) {
shenandoah_assert_forwarded_except(elem_ptr, obj, _heap->cancelled_gc());
ShenandoahHeap::atomic_update_oop(fwd, elem_ptr, o);
}
- if (ENQUEUE && !ctx->is_marked_strong_or_old(obj)) {
+ if (ENQUEUE && !ctx->is_marked_strong(obj)) {
_satb_mark_queue_set.enqueue_known_active(queue, obj);
}
}
@@ -426,68 +424,29 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
return;
}
- char gc_state = ShenandoahThreadLocalData::gc_state(Thread::current());
- if ((gc_state & ShenandoahHeap::EVACUATION) != 0) {
- arraycopy_evacuation(src, count);
- } else if ((gc_state & ShenandoahHeap::UPDATE_REFS) != 0) {
- arraycopy_update(src, count);
+ const char gc_state = ShenandoahThreadLocalData::gc_state(Thread::current());
+ if ((gc_state & ShenandoahHeap::MARKING) != 0) {
+ // If marking old or young, we must evaluate the SATB barrier. This will be the only
+ // action if we are not marking old. If we are marking old, we must still evaluate the
+ // load reference barrier for a young collection.
+ arraycopy_marking(dst, count);
}
- if (_heap->mode()->is_generational()) {
- assert(ShenandoahSATBBarrier, "Generational mode assumes SATB mode");
- if ((gc_state & ShenandoahHeap::YOUNG_MARKING) != 0) {
- arraycopy_marking(src, dst, count, false);
- }
- if ((gc_state & ShenandoahHeap::OLD_MARKING) != 0) {
- arraycopy_marking(src, dst, count, true);
- }
- } else if ((gc_state & ShenandoahHeap::MARKING) != 0) {
- arraycopy_marking(src, dst, count, false);
+ if ((gc_state & ShenandoahHeap::EVACUATION) != 0) {
+ assert((gc_state & ShenandoahHeap::YOUNG_MARKING) == 0, "Cannot be marking young during evacuation");
+ arraycopy_evacuation(src, count);
+ } else if ((gc_state & ShenandoahHeap::UPDATE_REFS) != 0) {
+ assert((gc_state & ShenandoahHeap::YOUNG_MARKING) == 0, "Cannot be marking young during update-refs");
+ arraycopy_update(src, count);
}
}
template
-void ShenandoahBarrierSet::arraycopy_marking(T* src, T* dst, size_t count, bool is_old_marking) {
+void ShenandoahBarrierSet::arraycopy_marking(T* dst, size_t count) {
assert(_heap->is_concurrent_mark_in_progress(), "only during marking");
- /*
- * Note that an old-gen object is considered live if it is live at the start of OLD marking or if it is promoted
- * following the start of OLD marking.
- *
- * 1. Every object promoted following the start of OLD marking will be above TAMS within its old-gen region
- * 2. Every object live at the start of OLD marking will be referenced from a "root" or it will be referenced from
- * another live OLD-gen object. With regards to old-gen, roots include stack locations and all of live young-gen.
- * All root references to old-gen are identified during a bootstrap young collection. All references from other
- * old-gen objects will be marked during the traversal of all old objects, or will be marked by the SATB barrier.
- *
- * During old-gen marking (which is interleaved with young-gen collections), call arraycopy_work() if:
- *
- * 1. The overwritten array resides in old-gen and it is below TAMS within its old-gen region
- * 2. Do not call arraycopy_work for any array residing in young-gen because young-gen collection is idle at this time
- *
- * During young-gen marking, call arraycopy_work() if:
- *
- * 1. The overwritten array resides in young-gen and is below TAMS within its young-gen region
- * 2. Additionally, if array resides in old-gen, regardless of its relationship to TAMS because this old-gen array
- * may hold references to young-gen
- */
if (ShenandoahSATBBarrier) {
- T* array = dst;
- HeapWord* array_addr = reinterpret_cast(array);
- ShenandoahHeapRegion* r = _heap->heap_region_containing(array_addr);
- if (is_old_marking) {
- // Generational, old marking
- assert(_heap->mode()->is_generational(), "Invariant");
- if (r->is_old() && (array_addr < _heap->marking_context()->top_at_mark_start(r))) {
- arraycopy_work(array, count);
- }
- } else if (_heap->mode()->is_generational()) {
- // Generational, young marking
- if (r->is_old() || (array_addr < _heap->marking_context()->top_at_mark_start(r))) {
- arraycopy_work(array, count);
- }
- } else if (array_addr < _heap->marking_context()->top_at_mark_start(r)) {
- // Non-generational, marking
- arraycopy_work(array, count);
+ if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast(dst))) {
+ arraycopy_work(dst, count);
}
}
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
index 0deb3b5ba4c..ab7985b3d34 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
@@ -1311,19 +1311,11 @@ HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool&
// Overwrite with non-zero (non-null) values only if necessary for allocation bookkeeping.
- switch (req.type()) {
- case ShenandoahAllocRequest::_alloc_tlab:
- case ShenandoahAllocRequest::_alloc_shared:
- case ShenandoahAllocRequest::_alloc_cds:
- return allocate_for_mutator(req, in_new_region);
- case ShenandoahAllocRequest::_alloc_gclab:
- case ShenandoahAllocRequest::_alloc_plab:
- case ShenandoahAllocRequest::_alloc_shared_gc:
- return allocate_for_collector(req, in_new_region);
- default:
- ShouldNotReachHere();
+ if (req.is_mutator_alloc()) {
+ return allocate_for_mutator(req, in_new_region);
+ } else {
+ return allocate_for_collector(req, in_new_region);
}
- return nullptr;
}
HeapWord* ShenandoahFreeSet::allocate_for_mutator(ShenandoahAllocRequest &req, bool &in_new_region) {
@@ -1619,21 +1611,13 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
if (req.is_mutator_alloc()) {
request_generation = _heap->mode()->is_generational()? _heap->young_generation(): _heap->global_generation();
orig_partition = ShenandoahFreeSetPartitionId::Mutator;
- } else if (req.type() == ShenandoahAllocRequest::_alloc_gclab) {
- request_generation = _heap->mode()->is_generational()? _heap->young_generation(): _heap->global_generation();
- orig_partition = ShenandoahFreeSetPartitionId::Collector;
- } else if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
+ } else if (req.is_old()) {
request_generation = _heap->old_generation();
orig_partition = ShenandoahFreeSetPartitionId::OldCollector;
} else {
- assert(req.type() == ShenandoahAllocRequest::_alloc_shared_gc, "Unexpected allocation type");
- if (req.is_old()) {
- request_generation = _heap->old_generation();
- orig_partition = ShenandoahFreeSetPartitionId::OldCollector;
- } else {
- request_generation = _heap->mode()->is_generational()? _heap->young_generation(): _heap->global_generation();
- orig_partition = ShenandoahFreeSetPartitionId::Collector;
- }
+ // Not old collector alloc, so this is a young collector gclab or shared allocation
+ request_generation = _heap->mode()->is_generational()? _heap->young_generation(): _heap->global_generation();
+ orig_partition = ShenandoahFreeSetPartitionId::Collector;
}
if (alloc_capacity(r) < PLAB::min_size() * HeapWordSize) {
// Regardless of whether this allocation succeeded, if the remaining memory is less than PLAB:min_size(), retire this region.
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
index cad9dc0e932..636f65e2553 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
@@ -129,6 +129,8 @@ inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::
switch (type) {
case ShenandoahAllocRequest::_alloc_shared:
case ShenandoahAllocRequest::_alloc_shared_gc:
+ case ShenandoahAllocRequest::_alloc_shared_gc_old:
+ case ShenandoahAllocRequest::_alloc_shared_gc_promotion:
case ShenandoahAllocRequest::_alloc_cds:
// Counted implicitly by tlab/gclab allocs
break;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp b/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
index 34713898fc6..44064dbd1a9 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
@@ -250,6 +250,8 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
HeapWord* right = MIN2(region->top(), end_range_of_interest);
HeapWord* end_of_search_next = MIN2(right, tams);
+ // Since end_range_of_interest may not align on a card boundary, last_relevant_card_index is conservative. Not all of the
+ // memory within the last relevant card's span is < right.
size_t last_relevant_card_index;
if (end_range_of_interest == _end_of_heap) {
last_relevant_card_index = _rs->card_index_for_addr(end_range_of_interest - 1);
@@ -352,9 +354,8 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
return nullptr;
}
} while (!starts_object(following_card_index));
- assert(_rs->addr_for_card_index(following_card_index) + get_first_start(following_card_index),
- "Result must precede right");
- return _rs->addr_for_card_index(following_card_index) + get_first_start(following_card_index);
+ HeapWord* result_candidate = _rs->addr_for_card_index(following_card_index) + get_first_start(following_card_index);
+ return (result_candidate >= right)? nullptr: result_candidate;
}
}
}
diff --git a/src/hotspot/share/gc/z/zBarrier.inline.hpp b/src/hotspot/share/gc/z/zBarrier.inline.hpp
index b5923f01628..766a6eb8e4c 100644
--- a/src/hotspot/share/gc/z/zBarrier.inline.hpp
+++ b/src/hotspot/share/gc/z/zBarrier.inline.hpp
@@ -86,10 +86,6 @@ inline void ZBarrier::self_heal(ZBarrierFastPath fast_path, volatile zpointer* p
assert(ZPointer::is_remapped(heal_ptr), "invariant");
for (;;) {
- if (ptr == zpointer::null) {
- assert(!ZVerifyOops || !ZHeap::heap()->is_in(uintptr_t(p)) || !ZHeap::heap()->is_old(p), "No raw null in old");
- }
-
assert_transition_monotonicity(ptr, heal_ptr);
// Heal
diff --git a/src/hotspot/share/gc/z/zBarrierSet.cpp b/src/hotspot/share/gc/z/zBarrierSet.cpp
index 87f93043bdf..643eba1947e 100644
--- a/src/hotspot/share/gc/z/zBarrierSet.cpp
+++ b/src/hotspot/share/gc/z/zBarrierSet.cpp
@@ -223,27 +223,7 @@ void ZBarrierSet::on_slowpath_allocation_exit(JavaThread* thread, oop new_obj) {
// breaks that promise. Take a few steps in the interpreter instead, which has
// no such assumptions about where an object resides.
deoptimize_allocation(thread);
- return;
}
-
- if (!ZGeneration::young()->is_phase_mark_complete()) {
- return;
- }
-
- if (!page->is_relocatable()) {
- return;
- }
-
- if (ZRelocate::compute_to_age(age) != ZPageAge::old) {
- return;
- }
-
- // If the object is young, we have to still be careful that it isn't racingly
- // about to get promoted to the old generation. That causes issues when null
- // pointers are supposed to be coloured, but the JIT is a bit sloppy and
- // reinitializes memory with raw nulls. We detect this situation and detune
- // rather than relying on the JIT to never be sloppy with redundant initialization.
- deoptimize_allocation(thread);
}
void ZBarrierSet::print_on(outputStream* st) const {
diff --git a/src/hotspot/share/gc/z/zGeneration.cpp b/src/hotspot/share/gc/z/zGeneration.cpp
index d1680b6c336..2b632ef29a9 100644
--- a/src/hotspot/share/gc/z/zGeneration.cpp
+++ b/src/hotspot/share/gc/z/zGeneration.cpp
@@ -111,6 +111,16 @@ static const ZStatSampler ZSamplerJavaThreads("System", "Java Threads", ZStatUni
ZGenerationYoung* ZGeneration::_young;
ZGenerationOld* ZGeneration::_old;
+class ZRendezvousHandshakeClosure : public HandshakeClosure {
+public:
+ ZRendezvousHandshakeClosure()
+ : HandshakeClosure("ZRendezvous") {}
+
+ void do_thread(Thread* thread) {
+ // Does nothing
+ }
+};
+
ZGeneration::ZGeneration(ZGenerationId id, ZPageTable* page_table, ZPageAllocator* page_allocator)
: _id(id),
_page_allocator(page_allocator),
@@ -168,11 +178,19 @@ void ZGeneration::free_empty_pages(ZRelocationSetSelector* selector, int bulk) {
}
void ZGeneration::flip_age_pages(const ZRelocationSetSelector* selector) {
- if (is_young()) {
- _relocate.flip_age_pages(selector->not_selected_small());
- _relocate.flip_age_pages(selector->not_selected_medium());
- _relocate.flip_age_pages(selector->not_selected_large());
- }
+ _relocate.flip_age_pages(selector->not_selected_small());
+ _relocate.flip_age_pages(selector->not_selected_medium());
+ _relocate.flip_age_pages(selector->not_selected_large());
+
+ // Perform a handshake between flip promotion and running the promotion barrier. This ensures
+ // that ZBarrierSet::on_slowpath_allocation_exit() observing a young page that was then racingly
+ // flip promoted, will run any stores without barriers to completion before responding to the
+ // handshake at the subsequent safepoint poll. This ensures that the flip promotion barriers always
+ // run after compiled code missing barriers, but before relocate start.
+ ZRendezvousHandshakeClosure cl;
+ Handshake::execute(&cl);
+
+ _relocate.barrier_flip_promoted_pages(_relocation_set.flip_promoted_pages());
}
static double fragmentation_limit(ZGenerationId generation) {
@@ -235,7 +253,9 @@ void ZGeneration::select_relocation_set(bool promote_all) {
_relocation_set.install(&selector);
// Flip age young pages that were not selected
- flip_age_pages(&selector);
+ if (is_young()) {
+ flip_age_pages(&selector);
+ }
// Setup forwarding table
ZRelocationSetIterator rs_iter(&_relocation_set);
@@ -1280,16 +1300,6 @@ bool ZGenerationOld::uses_clear_all_soft_reference_policy() const {
return _reference_processor.uses_clear_all_soft_reference_policy();
}
-class ZRendezvousHandshakeClosure : public HandshakeClosure {
-public:
- ZRendezvousHandshakeClosure()
- : HandshakeClosure("ZRendezvous") {}
-
- void do_thread(Thread* thread) {
- // Does nothing
- }
-};
-
class ZRendezvousGCThreads: public VM_Operation {
public:
VMOp_Type type() const { return VMOp_ZRendezvousGCThreads; }
diff --git a/src/hotspot/share/gc/z/zRelocate.cpp b/src/hotspot/share/gc/z/zRelocate.cpp
index 69233da6f54..24c4bdeac16 100644
--- a/src/hotspot/share/gc/z/zRelocate.cpp
+++ b/src/hotspot/share/gc/z/zRelocate.cpp
@@ -1087,7 +1087,6 @@ private:
ZRelocateSmallAllocator _small_allocator;
ZRelocateMediumAllocator _medium_allocator;
const size_t _total_forwardings;
- volatile size_t _numa_local_forwardings;
public:
ZRelocateTask(ZRelocationSet* relocation_set,
@@ -1104,8 +1103,7 @@ public:
_medium_targets(medium_targets),
_small_allocator(_generation),
_medium_allocator(_generation, shared_medium_targets),
- _total_forwardings(relocation_set->nforwardings()),
- _numa_local_forwardings(0) {
+ _total_forwardings(relocation_set->nforwardings()) {
for (uint32_t i = 0; i < ZNUMA::count(); i++) {
ZRelocationSetParallelIterator* const iter = _iters->addr(i);
@@ -1124,18 +1122,17 @@ public:
// Signal that we're not using the queue anymore. Used mostly for asserts.
_queue->deactivate();
-
- if (ZNUMA::is_enabled()) {
- log_debug(gc, reloc, numa)("Forwardings relocated NUMA-locally: %zu / %zu (%.0f%%)",
- _numa_local_forwardings, _total_forwardings, percent_of(_numa_local_forwardings, _total_forwardings));
- }
}
virtual void work() {
ZRelocateWork small(&_small_allocator, _small_targets->addr(), _generation);
ZRelocateWork medium(&_medium_allocator, _medium_targets->addr(), _generation);
+
const uint32_t num_nodes = ZNUMA::count();
- uint32_t numa_local_forwardings_worker = 0;
+ const uint32_t start_node = ZNUMA::id();
+ uint32_t current_node = start_node;
+ bool has_affinity = false;
+ bool has_affinity_current_node = false;
const auto do_forwarding = [&](ZForwarding* forwarding) {
ZPage* const page = forwarding->page();
@@ -1167,26 +1164,30 @@ public:
const auto do_forwarding_one_from_iter = [&]() {
ZForwarding* forwarding;
- const uint32_t start_node = ZNUMA::id();
- uint32_t current_node = start_node;
- for (uint32_t i = 0; i < num_nodes; i++) {
+ for (;;) {
if (_iters->get(current_node).next_if(&forwarding, check_numa_local, current_node)) {
- claim_and_do_forwarding(forwarding);
-
- if (current_node == start_node) {
- // Track if this forwarding was relocated on the local NUMA node
- numa_local_forwardings_worker++;
+ // Set thread affinity for NUMA-local processing (if needed)
+ if (UseNUMA && !has_affinity_current_node) {
+ os::numa_set_thread_affinity(Thread::current(), ZNUMA::numa_id_to_node(current_node));
+ has_affinity = true;
+ has_affinity_current_node = true;
}
+ // Perform the forwarding task
+ claim_and_do_forwarding(forwarding);
return true;
}
- // Check next node.
+ // No work found on the current node, move to the next node
current_node = (current_node + 1) % num_nodes;
- }
+ has_affinity_current_node = false;
- return false;
+ // If we've looped back to the starting node there's no more work to do
+ if (current_node == start_node) {
+ return false;
+ }
+ }
};
for (;;) {
@@ -1209,11 +1210,13 @@ public:
}
}
- if (ZNUMA::is_enabled()) {
- AtomicAccess::add(&_numa_local_forwardings, numa_local_forwardings_worker, memory_order_relaxed);
- }
-
_queue->leave();
+
+ if (UseNUMA && has_affinity) {
+ // Restore the affinity of the thread so that it isn't bound to a specific
+ // node any more
+ os::numa_set_thread_affinity(Thread::current(), -1);
+ }
}
virtual void resize_workers(uint nworkers) {
@@ -1322,7 +1325,7 @@ private:
public:
ZFlipAgePagesTask(const ZArray* pages)
- : ZTask("ZPromotePagesTask"),
+ : ZTask("ZFlipAgePagesTask"),
_iter(pages) {}
virtual void work() {
@@ -1337,16 +1340,6 @@ public:
// Figure out if this is proper promotion
const bool promotion = to_age == ZPageAge::old;
- if (promotion) {
- // Before promoting an object (and before relocate start), we must ensure that all
- // contained zpointers are store good. The marking code ensures that for non-null
- // pointers, but null pointers are ignored. This code ensures that even null pointers
- // are made store good, for the promoted objects.
- prev_page->object_iterate([&](oop obj) {
- ZIterator::basic_oop_iterate_safe(obj, ZBarrier::promote_barrier_on_young_oop_field);
- });
- }
-
// Logging
prev_page->log_msg(promotion ? " (flip promoted)" : " (flip survived)");
@@ -1360,7 +1353,7 @@ public:
if (promotion) {
ZGeneration::young()->flip_promote(prev_page, new_page);
- // Defer promoted page registration times the lock is taken
+ // Defer promoted page registration
promoted_pages.push(prev_page);
}
@@ -1371,11 +1364,42 @@ public:
}
};
+class ZPromoteBarrierTask : public ZTask {
+private:
+ ZArrayParallelIterator _iter;
+
+public:
+ ZPromoteBarrierTask(const ZArray* pages)
+ : ZTask("ZPromoteBarrierTask"),
+ _iter(pages) {}
+
+ virtual void work() {
+ SuspendibleThreadSetJoiner sts_joiner;
+
+ for (ZPage* page; _iter.next(&page);) {
+ // When promoting an object (and before relocate start), we must ensure that all
+ // contained zpointers are store good. The marking code ensures that for non-null
+ // pointers, but null pointers are ignored. This code ensures that even null pointers
+ // are made store good, for the promoted objects.
+ page->object_iterate([&](oop obj) {
+ ZIterator::basic_oop_iterate_safe(obj, ZBarrier::promote_barrier_on_young_oop_field);
+ });
+
+ SuspendibleThreadSet::yield();
+ }
+ }
+};
+
void ZRelocate::flip_age_pages(const ZArray* pages) {
ZFlipAgePagesTask flip_age_task(pages);
workers()->run(&flip_age_task);
}
+void ZRelocate::barrier_flip_promoted_pages(const ZArray* pages) {
+ ZPromoteBarrierTask promote_barrier_task(pages);
+ workers()->run(&promote_barrier_task);
+}
+
void ZRelocate::synchronize() {
_queue.synchronize();
}
diff --git a/src/hotspot/share/gc/z/zRelocate.hpp b/src/hotspot/share/gc/z/zRelocate.hpp
index d0ddf7deecf..50111f24ee5 100644
--- a/src/hotspot/share/gc/z/zRelocate.hpp
+++ b/src/hotspot/share/gc/z/zRelocate.hpp
@@ -119,6 +119,7 @@ public:
void relocate(ZRelocationSet* relocation_set);
void flip_age_pages(const ZArray* pages);
+ void barrier_flip_promoted_pages(const ZArray* pages);
void synchronize();
void desynchronize();
diff --git a/src/hotspot/share/gc/z/zVerify.cpp b/src/hotspot/share/gc/z/zVerify.cpp
index 55f13be9b44..db3db14afa2 100644
--- a/src/hotspot/share/gc/z/zVerify.cpp
+++ b/src/hotspot/share/gc/z/zVerify.cpp
@@ -130,7 +130,10 @@ static void z_verify_root_oop_object(zaddress addr, void* p) {
static void z_verify_old_oop(zpointer* p) {
const zpointer o = *p;
- assert(o != zpointer::null, "Old should not contain raw null");
+ if (o == zpointer::null) {
+ guarantee(ZGeneration::young()->is_phase_mark_complete(), "Only possible when flip promoting");
+ guarantee(ZHeap::heap()->page(p)->is_allocating(), "Raw nulls only possible in allocating pages");
+ }
if (!z_is_null_relaxed(o)) {
if (ZPointer::is_mark_good(o)) {
// Even though the pointer is mark good, we can't verify that it should
diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp b/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp
index f7a725fce6d..534c9996cfe 100644
--- a/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp
+++ b/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp
@@ -217,7 +217,8 @@ static bool compute_top_frame(const JfrSampleRequest& request, frame& top_frame,
const PcDesc* const pc_desc = get_pc_desc(sampled_nm, sampled_pc);
if (is_valid(pc_desc)) {
intptr_t* const synthetic_sp = sender_sp - sampled_nm->frame_size();
- top_frame = frame(synthetic_sp, synthetic_sp, sender_sp, pc_desc->real_pc(sampled_nm), sampled_nm);
+ intptr_t* const synthetic_fp = sender_sp AARCH64_ONLY( - frame::sender_sp_offset);
+ top_frame = frame(synthetic_sp, synthetic_sp, synthetic_fp, pc_desc->real_pc(sampled_nm), sampled_nm);
in_continuation = is_in_continuation(top_frame, jt);
return true;
}
diff --git a/src/hotspot/share/memory/memoryReserver.cpp b/src/hotspot/share/memory/memoryReserver.cpp
index 11a0422f7b0..e8d1887f59f 100644
--- a/src/hotspot/share/memory/memoryReserver.cpp
+++ b/src/hotspot/share/memory/memoryReserver.cpp
@@ -437,7 +437,7 @@ ReservedSpace HeapReserver::Instance::try_reserve_range(char *highest_start,
if (reserved.is_reserved()) {
if (reserved.base() >= aligned_heap_base_min_address &&
- size <= (uintptr_t)(upper_bound - reserved.base())) {
+ size <= (size_t)(upper_bound - reserved.base())) {
// Got a successful reservation.
return reserved;
}
@@ -546,16 +546,16 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
const size_t attach_point_alignment = lcm(alignment, os_attach_point_alignment);
- char* aligned_heap_base_min_address = align_up((char*)HeapBaseMinAddress, alignment);
- size_t noaccess_prefix = ((aligned_heap_base_min_address + size) > (char*)OopEncodingHeapMax) ?
+ uintptr_t aligned_heap_base_min_address = align_up(MAX2(HeapBaseMinAddress, alignment), alignment);
+ size_t noaccess_prefix = ((aligned_heap_base_min_address + size) > OopEncodingHeapMax) ?
noaccess_prefix_size : 0;
ReservedSpace reserved{};
// Attempt to alloc at user-given address.
if (!FLAG_IS_DEFAULT(HeapBaseMinAddress)) {
- reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, aligned_heap_base_min_address);
- if (reserved.base() != aligned_heap_base_min_address) { // Enforce this exact address.
+ reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, (char*)aligned_heap_base_min_address);
+ if (reserved.base() != (char*)aligned_heap_base_min_address) { // Enforce this exact address.
release(reserved);
reserved = {};
}
@@ -575,38 +575,41 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
// Attempt to allocate so that we can run without base and scale (32-Bit unscaled compressed oops).
// Give it several tries from top of range to bottom.
- if (aligned_heap_base_min_address + size <= (char *)UnscaledOopHeapMax) {
+ if (aligned_heap_base_min_address + size <= UnscaledOopHeapMax) {
// Calc address range within we try to attach (range of possible start addresses).
- char* const highest_start = align_down((char *)UnscaledOopHeapMax - size, attach_point_alignment);
- char* const lowest_start = align_up(aligned_heap_base_min_address, attach_point_alignment);
- reserved = try_reserve_range(highest_start, lowest_start, attach_point_alignment,
- aligned_heap_base_min_address, (char *)UnscaledOopHeapMax, size, alignment, page_size);
+ uintptr_t const highest_start = align_down(UnscaledOopHeapMax - size, attach_point_alignment);
+ uintptr_t const lowest_start = align_up(aligned_heap_base_min_address, attach_point_alignment);
+ assert(lowest_start <= highest_start, "lowest: " INTPTR_FORMAT " highest: " INTPTR_FORMAT ,
+ lowest_start, highest_start);
+ reserved = try_reserve_range((char*)highest_start, (char*)lowest_start, attach_point_alignment,
+ (char*)aligned_heap_base_min_address, (char*)UnscaledOopHeapMax, size, alignment, page_size);
}
// zerobased: Attempt to allocate in the lower 32G.
- char *zerobased_max = (char *)OopEncodingHeapMax;
+ const uintptr_t zerobased_max = OopEncodingHeapMax;
// Give it several tries from top of range to bottom.
if (aligned_heap_base_min_address + size <= zerobased_max && // Zerobased theoretical possible.
((!reserved.is_reserved()) || // No previous try succeeded.
- (reserved.end() > zerobased_max))) { // Unscaled delivered an arbitrary address.
+ (reserved.end() > (char*)zerobased_max))) { // Unscaled delivered an arbitrary address.
// Release previous reservation
release(reserved);
// Calc address range within we try to attach (range of possible start addresses).
- char *const highest_start = align_down(zerobased_max - size, attach_point_alignment);
+ uintptr_t const highest_start = align_down(zerobased_max - size, attach_point_alignment);
// Need to be careful about size being guaranteed to be less
// than UnscaledOopHeapMax due to type constraints.
- char *lowest_start = aligned_heap_base_min_address;
- uint64_t unscaled_end = UnscaledOopHeapMax - size;
- if (unscaled_end < UnscaledOopHeapMax) { // unscaled_end wrapped if size is large
- lowest_start = MAX2(lowest_start, (char*)unscaled_end);
+ uintptr_t lowest_start = aligned_heap_base_min_address;
+ if (size < UnscaledOopHeapMax) {
+ lowest_start = MAX2(lowest_start, UnscaledOopHeapMax - size);
}
lowest_start = align_up(lowest_start, attach_point_alignment);
- reserved = try_reserve_range(highest_start, lowest_start, attach_point_alignment,
- aligned_heap_base_min_address, zerobased_max, size, alignment, page_size);
+ assert(lowest_start <= highest_start, "lowest: " INTPTR_FORMAT " highest: " INTPTR_FORMAT,
+ lowest_start, highest_start);
+ reserved = try_reserve_range((char*)highest_start, (char*)lowest_start, attach_point_alignment,
+ (char*)aligned_heap_base_min_address, (char*)zerobased_max, size, alignment, page_size);
}
// Now we go for heaps with base != 0. We need a noaccess prefix to efficiently
@@ -616,17 +619,17 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
// Try to attach at addresses that are aligned to OopEncodingHeapMax. Disjointbase mode.
char** addresses = get_attach_addresses_for_disjoint_mode();
int i = 0;
- while ((addresses[i] != nullptr) && // End of array not yet reached.
- ((!reserved.is_reserved()) || // No previous try succeeded.
- (reserved.end() > zerobased_max && // Not zerobased or unscaled address.
- // Not disjoint address.
+ while ((addresses[i] != nullptr) && // End of array not yet reached.
+ ((!reserved.is_reserved()) || // No previous try succeeded.
+ (reserved.end() > (char*)zerobased_max && // Not zerobased or unscaled address.
+ // Not disjoint address.
!CompressedOops::is_disjoint_heap_base_address((address)reserved.base())))) {
// Release previous reservation
release(reserved);
char* const attach_point = addresses[i];
- assert(attach_point >= aligned_heap_base_min_address, "Flag support broken");
+ assert((uintptr_t)attach_point >= aligned_heap_base_min_address, "Flag support broken");
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, attach_point);
i++;
}
diff --git a/src/hotspot/share/memory/universe.cpp b/src/hotspot/share/memory/universe.cpp
index d389fe81806..4d2897be5eb 100644
--- a/src/hotspot/share/memory/universe.cpp
+++ b/src/hotspot/share/memory/universe.cpp
@@ -182,7 +182,6 @@ int Universe::_base_vtable_size = 0;
bool Universe::_bootstrapping = false;
bool Universe::_module_initialized = false;
bool Universe::_fully_initialized = false;
-volatile bool Universe::_is_shutting_down = false;
OopStorage* Universe::_vm_weak = nullptr;
OopStorage* Universe::_vm_global = nullptr;
@@ -1374,15 +1373,14 @@ static void log_cpu_time() {
}
void Universe::before_exit() {
- {
- // Acquire the Heap_lock to synchronize with VM_Heap_Sync_Operations,
- // which may depend on the value of _is_shutting_down flag.
- MutexLocker hl(Heap_lock);
- log_cpu_time();
- AtomicAccess::release_store(&_is_shutting_down, true);
- }
+ // Tell the GC that it is time to shutdown and to block requests for new GC pauses.
+ heap()->initiate_shutdown();
- heap()->before_exit();
+ // Log CPU time statistics before stopping the GC threads.
+ log_cpu_time();
+
+ // Stop the GC threads.
+ heap()->stop();
// Print GC/heap related information.
Log(gc, exit) log;
diff --git a/src/hotspot/share/memory/universe.hpp b/src/hotspot/share/memory/universe.hpp
index df2c1d66d3c..b2325c67ca0 100644
--- a/src/hotspot/share/memory/universe.hpp
+++ b/src/hotspot/share/memory/universe.hpp
@@ -128,9 +128,6 @@ class Universe: AllStatic {
static bool _module_initialized; // true after call_initPhase2 called
static bool _fully_initialized; // true after universe_init and initialize_vtables called
- // Shutdown
- static volatile bool _is_shutting_down;
-
// the array of preallocated errors with backtraces
static objArrayOop preallocated_out_of_memory_errors();
@@ -328,8 +325,6 @@ class Universe: AllStatic {
static bool is_module_initialized() { return _module_initialized; }
static bool is_fully_initialized() { return _fully_initialized; }
- static bool is_shutting_down() { return AtomicAccess::load_acquire(&_is_shutting_down); }
-
static bool on_page_boundary(void* addr);
static bool should_fill_in_stack_trace(Handle throwable);
static void check_alignment(uintx size, uintx alignment, const char* name);
diff --git a/src/hotspot/share/oops/bsmAttribute.hpp b/src/hotspot/share/oops/bsmAttribute.hpp
new file mode 100644
index 00000000000..a28d2757fb0
--- /dev/null
+++ b/src/hotspot/share/oops/bsmAttribute.hpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_OOPS_BSMATTRIBUTE_HPP
+#define SHARE_OOPS_BSMATTRIBUTE_HPP
+
+#include "oops/array.hpp"
+#include "utilities/checkedCast.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class ClassLoaderData;
+
+class BSMAttributeEntry {
+ friend class ConstantPool;
+ friend class BSMAttributeEntries;
+
+ u2 _bootstrap_method_index;
+ u2 _argument_count;
+
+ // The argument indexes are stored right after the object, in a contiguous array.
+ // [ bsmi_0 argc_0 arg_00 arg_01 ... arg_0N bsmi_1 argc_1 arg_10 ... arg_1N ... ]
+ // So in order to find the argument array, jump over ourselves.
+ const u2* argument_indexes() const {
+ return reinterpret_cast(this + 1);
+ }
+ u2* argument_indexes() {
+ return reinterpret_cast(this + 1);
+ }
+ // These are overlays on top of the BSMAttributeEntries data array, do not construct.
+ BSMAttributeEntry() = delete;
+ NONCOPYABLE(BSMAttributeEntry);
+
+ void copy_args_into(BSMAttributeEntry* entry) const;
+
+public:
+ // Offsets for SA
+ enum {
+ _bsmi_offset = 0,
+ _argc_offset = 1,
+ _argv_offset = 2
+ };
+
+ int bootstrap_method_index() const {
+ return _bootstrap_method_index;
+ }
+ int argument_count() const {
+ return _argument_count;
+ }
+ int argument(int n) const {
+ assert(checked_cast(n) < _argument_count, "oob");
+ return argument_indexes()[n];
+ }
+
+ void set_argument(int index, u2 value) {
+ assert(index >= 0 && index < argument_count(), "invariant");
+ argument_indexes()[index] = value;
+ }
+
+ // How many u2s are required to store a BSM entry with argc arguments?
+ static int u2s_required (u2 argc) {
+ return 1 /* index */ + 1 /* argc */ + argc /* argv */;
+ }
+};
+
+// The BSMAttributeEntries stores the state of the BootstrapMethods attribute.
+class BSMAttributeEntries {
+ friend class VMStructs;
+ friend class JVMCIVMStructs;
+
+public:
+ class InsertionIterator {
+ friend BSMAttributeEntries;
+ BSMAttributeEntries* _insert_into;
+ // Current unused offset into BSMAEs offset array.
+ int _cur_offset;
+ // Current unused offset into BSMAEs bsm-data array.
+ int _cur_array;
+ public:
+ InsertionIterator() : _insert_into(nullptr), _cur_offset(-1), _cur_array(-1) {}
+ InsertionIterator(BSMAttributeEntries* insert_into, int cur_offset, int cur_array)
+ : _insert_into(insert_into),
+ _cur_offset(cur_offset),
+ _cur_array(cur_array) {}
+ InsertionIterator(const InsertionIterator&) = default;
+ InsertionIterator& operator=(const InsertionIterator&) = default;
+
+ int current_offset() const { return _cur_offset; }
+ // Add a new BSMAE, reserving the necessary memory for filling the argument vector.
+ // Returns null if there isn't enough space.
+ inline BSMAttributeEntry* reserve_new_entry(u2 bsmi, u2 argc);
+ };
+
+private:
+ // Each bootstrap method has a variable-sized array associated with it.
+ // We want constant-time lookup of the Nth BSM. Therefore, we use an offset table,
+ // such that the Nth BSM is located at _bootstrap_methods[_offsets[N]].
+ Array* _offsets;
+ Array* _bootstrap_methods;
+
+ // Copy the first num_entries into iter.
+ void copy_into(InsertionIterator& iter, int num_entries) const;
+
+public:
+ BSMAttributeEntries() : _offsets(nullptr), _bootstrap_methods(nullptr) {}
+ BSMAttributeEntries(Array* offsets, Array* bootstrap_methods)
+ : _offsets(offsets),
+ _bootstrap_methods(bootstrap_methods) {}
+
+ bool is_empty() const {
+ return _offsets == nullptr && _bootstrap_methods == nullptr;
+ }
+
+ Array*& offsets() { return _offsets; }
+ const Array* const& offsets() const { return _offsets; }
+ Array*& bootstrap_methods() { return _bootstrap_methods; }
+ const Array* const& bootstrap_methods() const { return _bootstrap_methods; }
+
+ BSMAttributeEntry* entry(int bsms_attribute_index) {
+ return reinterpret_cast(_bootstrap_methods->adr_at(_offsets->at(bsms_attribute_index)));
+ }
+ const BSMAttributeEntry* entry(int bsms_attribute_index) const {
+ return reinterpret_cast(_bootstrap_methods->adr_at(_offsets->at(bsms_attribute_index)));
+ }
+
+ int number_of_entries() const {
+ return _offsets == nullptr ? 0 : _offsets->length();
+ }
+
+ // The number of U2s the BSM data consists of.
+ int array_length() const {
+ return _bootstrap_methods == nullptr ? 0 : _bootstrap_methods->length();
+ }
+
+ void deallocate_contents(ClassLoaderData* loader_data);
+
+ // Extend to have the space for both this BSMAEntries and other's.
+ // Does not copy in the other's BSMAEntrys, that must be done via the InsertionIterator.
+ // This starts an insertion iterator. Any call to start_extension must have a matching end_extension call.
+ InsertionIterator start_extension(const BSMAttributeEntries& other, ClassLoaderData* loader_data, TRAPS);
+ // Extend the BSMAEntries with an additional number_of_entries with a total data_size.
+ InsertionIterator start_extension(int number_of_entries, int data_size, ClassLoaderData* loader_data, TRAPS);
+ // Reallocates the underlying memory to fit the limits of the InsertionIterator precisely.
+ // This ends an insertion iteration. The memory is truncated to fit exactly the data used.
+ void end_extension(InsertionIterator& iter, ClassLoaderData* loader_data, TRAPS);
+ // Append all of the BSMAEs in other into this.
+ void append(const BSMAttributeEntries& other, ClassLoaderData* loader_data, TRAPS);
+};
+
+#endif // SHARE_OOPS_BSMATTRIBUTE_HPP
diff --git a/src/hotspot/share/oops/bsmAttribute.inline.hpp b/src/hotspot/share/oops/bsmAttribute.inline.hpp
new file mode 100644
index 00000000000..e678c280c26
--- /dev/null
+++ b/src/hotspot/share/oops/bsmAttribute.inline.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_OOPS_BSMATTRIBUTE_INLINE_HPP
+#define SHARE_OOPS_BSMATTRIBUTE_INLINE_HPP
+
+#include "oops/bsmAttribute.hpp"
+
+inline BSMAttributeEntry* BSMAttributeEntries::InsertionIterator::reserve_new_entry(u2 bsmi, u2 argc) {
+ assert(_insert_into->offsets() != nullptr, "must");
+ assert(_insert_into->bootstrap_methods() != nullptr, "must");
+
+ if (_cur_offset + 1 > _insert_into->offsets()->length() ||
+ _cur_array + BSMAttributeEntry::u2s_required(argc) > _insert_into->bootstrap_methods()->length()) {
+ return nullptr;
+ }
+ _insert_into->offsets()->at_put(_cur_offset, _cur_array);
+ BSMAttributeEntry* e = _insert_into->entry(_cur_offset);
+ e->_bootstrap_method_index = bsmi;
+ e->_argument_count = argc;
+
+ _cur_array += 1 + 1 + argc;
+ _cur_offset += 1;
+ return e;
+}
+
+inline void BSMAttributeEntry::copy_args_into(BSMAttributeEntry* entry) const {
+ assert(entry->argument_count() == this->argument_count(), "must be same");
+ for (int i = 0; i < argument_count(); i++) {
+ entry->set_argument(i, this->argument(i));
+ }
+}
+
+#endif // SHARE_OOPS_BSMATTRIBUTE_INLINE_HPP
diff --git a/src/hotspot/share/oops/constantPool.cpp b/src/hotspot/share/oops/constantPool.cpp
index 95a43b07bd7..640b2f2460f 100644
--- a/src/hotspot/share/oops/constantPool.cpp
+++ b/src/hotspot/share/oops/constantPool.cpp
@@ -131,8 +131,7 @@ void ConstantPool::deallocate_contents(ClassLoaderData* loader_data) {
MetadataFactory::free_array(loader_data, resolved_klasses());
set_resolved_klasses(nullptr);
- MetadataFactory::free_array(loader_data, operands());
- set_operands(nullptr);
+ bsm_entries().deallocate_contents(loader_data);
release_C_heap_structures();
@@ -152,7 +151,8 @@ void ConstantPool::metaspace_pointers_do(MetaspaceClosure* it) {
it->push(&_tags, MetaspaceClosure::_writable);
it->push(&_cache);
it->push(&_pool_holder);
- it->push(&_operands);
+ it->push(&bsm_entries().offsets());
+ it->push(&bsm_entries().bootstrap_methods());
it->push(&_resolved_klasses, MetaspaceClosure::_writable);
for (int i = 0; i < length(); i++) {
@@ -761,7 +761,7 @@ Method* ConstantPool::method_at_if_loaded(const constantPoolHandle& cpool,
if (cpool->cache() == nullptr) return nullptr; // nothing to load yet
if (!(which >= 0 && which < cpool->resolved_method_entries_length())) {
// FIXME: should be an assert
- log_debug(class, resolve)("bad operand %d in:", which); cpool->print();
+ log_debug(class, resolve)("bad BSM %d in:", which); cpool->print();
return nullptr;
}
return cpool->cache()->method_if_resolved(which);
@@ -1562,8 +1562,8 @@ bool ConstantPool::compare_entry_to(int index1, const constantPoolHandle& cp2,
int i1 = bootstrap_methods_attribute_index(index1);
int i2 = cp2->bootstrap_methods_attribute_index(index2);
bool match_entry = compare_entry_to(k1, cp2, k2);
- bool match_operand = compare_operand_to(i1, cp2, i2);
- return (match_entry && match_operand);
+ bool match_bsm = compare_bootstrap_entry_to(i1, cp2, i2);
+ return (match_entry && match_bsm);
} break;
case JVM_CONSTANT_InvokeDynamic:
@@ -1573,8 +1573,8 @@ bool ConstantPool::compare_entry_to(int index1, const constantPoolHandle& cp2,
int i1 = bootstrap_methods_attribute_index(index1);
int i2 = cp2->bootstrap_methods_attribute_index(index2);
bool match_entry = compare_entry_to(k1, cp2, k2);
- bool match_operand = compare_operand_to(i1, cp2, i2);
- return (match_entry && match_operand);
+ bool match_bsm = compare_bootstrap_entry_to(i1, cp2, i2);
+ return (match_entry && match_bsm);
} break;
case JVM_CONSTANT_String:
@@ -1608,140 +1608,29 @@ bool ConstantPool::compare_entry_to(int index1, const constantPoolHandle& cp2,
return false;
} // end compare_entry_to()
-
-// Resize the operands array with delta_len and delta_size.
+// Extend the BSMAttributeEntries with the length and size of the ext_cp BSMAttributeEntries.
// Used in RedefineClasses for CP merge.
-void ConstantPool::resize_operands(int delta_len, int delta_size, TRAPS) {
- int old_len = operand_array_length(operands());
- int new_len = old_len + delta_len;
- int min_len = (delta_len > 0) ? old_len : new_len;
-
- int old_size = operands()->length();
- int new_size = old_size + delta_size;
- int min_size = (delta_size > 0) ? old_size : new_size;
-
- ClassLoaderData* loader_data = pool_holder()->class_loader_data();
- Array* new_ops = MetadataFactory::new_array(loader_data, new_size, CHECK);
-
- // Set index in the resized array for existing elements only
- for (int idx = 0; idx < min_len; idx++) {
- int offset = operand_offset_at(idx); // offset in original array
- operand_offset_at_put(new_ops, idx, offset + 2*delta_len); // offset in resized array
- }
- // Copy the bootstrap specifiers only
- Copy::conjoint_memory_atomic(operands()->adr_at(2*old_len),
- new_ops->adr_at(2*new_len),
- (min_size - 2*min_len) * sizeof(u2));
- // Explicitly deallocate old operands array.
- // Note, it is not needed for 7u backport.
- if ( operands() != nullptr) { // the safety check
- MetadataFactory::free_array(loader_data, operands());
- }
- set_operands(new_ops);
-} // end resize_operands()
+BSMAttributeEntries::InsertionIterator
+ConstantPool::start_extension(const constantPoolHandle& ext_cp, TRAPS) {
+ BSMAttributeEntries::InsertionIterator iter =
+ bsm_entries().start_extension(ext_cp->bsm_entries(), pool_holder()->class_loader_data(),
+ CHECK_(BSMAttributeEntries::InsertionIterator()));
+ return iter;
+}
-// Extend the operands array with the length and size of the ext_cp operands.
-// Used in RedefineClasses for CP merge.
-void ConstantPool::extend_operands(const constantPoolHandle& ext_cp, TRAPS) {
- int delta_len = operand_array_length(ext_cp->operands());
- if (delta_len == 0) {
- return; // nothing to do
- }
- int delta_size = ext_cp->operands()->length();
-
- assert(delta_len > 0 && delta_size > 0, "extended operands array must be bigger");
-
- if (operand_array_length(operands()) == 0) {
- ClassLoaderData* loader_data = pool_holder()->class_loader_data();
- Array* new_ops = MetadataFactory::new_array(loader_data, delta_size, CHECK);
- // The first element index defines the offset of second part
- operand_offset_at_put(new_ops, 0, 2*delta_len); // offset in new array
- set_operands(new_ops);
- } else {
- resize_operands(delta_len, delta_size, CHECK);
- }
-
-} // end extend_operands()
+void ConstantPool::end_extension(BSMAttributeEntries::InsertionIterator iter, TRAPS) {
+ bsm_entries().end_extension(iter, pool_holder()->class_loader_data(), THREAD);
+}
-// Shrink the operands array to a smaller array with new_len length.
-// Used in RedefineClasses for CP merge.
-void ConstantPool::shrink_operands(int new_len, TRAPS) {
- int old_len = operand_array_length(operands());
- if (new_len == old_len) {
- return; // nothing to do
- }
- assert(new_len < old_len, "shrunken operands array must be smaller");
-
- int free_base = operand_next_offset_at(new_len - 1);
- int delta_len = new_len - old_len;
- int delta_size = 2*delta_len + free_base - operands()->length();
-
- resize_operands(delta_len, delta_size, CHECK);
-
-} // end shrink_operands()
-
-
-void ConstantPool::copy_operands(const constantPoolHandle& from_cp,
- const constantPoolHandle& to_cp,
- TRAPS) {
-
- int from_oplen = operand_array_length(from_cp->operands());
- int old_oplen = operand_array_length(to_cp->operands());
- if (from_oplen != 0) {
- ClassLoaderData* loader_data = to_cp->pool_holder()->class_loader_data();
- // append my operands to the target's operands array
- if (old_oplen == 0) {
- // Can't just reuse from_cp's operand list because of deallocation issues
- int len = from_cp->operands()->length();
- Array* new_ops = MetadataFactory::new_array(loader_data, len, CHECK);
- Copy::conjoint_memory_atomic(
- from_cp->operands()->adr_at(0), new_ops->adr_at(0), len * sizeof(u2));
- to_cp->set_operands(new_ops);
- } else {
- int old_len = to_cp->operands()->length();
- int from_len = from_cp->operands()->length();
- int old_off = old_oplen * sizeof(u2);
- int from_off = from_oplen * sizeof(u2);
- // Use the metaspace for the destination constant pool
- Array* new_operands = MetadataFactory::new_array(loader_data, old_len + from_len, CHECK);
- int fillp = 0, len = 0;
- // first part of dest
- Copy::conjoint_memory_atomic(to_cp->operands()->adr_at(0),
- new_operands->adr_at(fillp),
- (len = old_off) * sizeof(u2));
- fillp += len;
- // first part of src
- Copy::conjoint_memory_atomic(from_cp->operands()->adr_at(0),
- new_operands->adr_at(fillp),
- (len = from_off) * sizeof(u2));
- fillp += len;
- // second part of dest
- Copy::conjoint_memory_atomic(to_cp->operands()->adr_at(old_off),
- new_operands->adr_at(fillp),
- (len = old_len - old_off) * sizeof(u2));
- fillp += len;
- // second part of src
- Copy::conjoint_memory_atomic(from_cp->operands()->adr_at(from_off),
- new_operands->adr_at(fillp),
- (len = from_len - from_off) * sizeof(u2));
- fillp += len;
- assert(fillp == new_operands->length(), "");
-
- // Adjust indexes in the first part of the copied operands array.
- for (int j = 0; j < from_oplen; j++) {
- int offset = operand_offset_at(new_operands, old_oplen + j);
- assert(offset == operand_offset_at(from_cp->operands(), j), "correct copy");
- offset += old_len; // every new tuple is preceded by old_len extra u2's
- operand_offset_at_put(new_operands, old_oplen + j, offset);
- }
-
- // replace target operands array with combined array
- to_cp->set_operands(new_operands);
- }
- }
-} // end copy_operands()
+void ConstantPool::copy_bsm_entries(const constantPoolHandle& from_cp,
+ const constantPoolHandle& to_cp,
+ TRAPS) {
+ to_cp->bsm_entries().append(from_cp->bsm_entries(),
+ to_cp->pool_holder()->class_loader_data(),
+ THREAD);
+}
// Copy this constant pool's entries at start_i to end_i (inclusive)
@@ -1771,7 +1660,7 @@ void ConstantPool::copy_cp_to_impl(const constantPoolHandle& from_cp, int start_
break;
}
}
- copy_operands(from_cp, to_cp, CHECK);
+ copy_bsm_entries(from_cp, to_cp, THREAD);
} // end copy_cp_to_impl()
@@ -1895,7 +1784,7 @@ void ConstantPool::copy_entry_to(const constantPoolHandle& from_cp, int from_i,
{
int k1 = from_cp->bootstrap_methods_attribute_index(from_i);
int k2 = from_cp->bootstrap_name_and_type_ref_index_at(from_i);
- k1 += operand_array_length(to_cp->operands()); // to_cp might already have operands
+ k1 += to_cp->bsm_entries().array_length(); // to_cp might already have a BSM attribute
to_cp->dynamic_constant_at_put(to_i, k1, k2);
} break;
@@ -1903,7 +1792,7 @@ void ConstantPool::copy_entry_to(const constantPoolHandle& from_cp, int from_i,
{
int k1 = from_cp->bootstrap_methods_attribute_index(from_i);
int k2 = from_cp->bootstrap_name_and_type_ref_index_at(from_i);
- k1 += operand_array_length(to_cp->operands()); // to_cp might already have operands
+ k1 += to_cp->bsm_entries().array_length(); // to_cp might already have a BSM attribute
to_cp->invoke_dynamic_at_put(to_i, k1, k2);
} break;
@@ -1939,9 +1828,9 @@ int ConstantPool::find_matching_entry(int pattern_i,
// Compare this constant pool's bootstrap specifier at idx1 to the constant pool
// cp2's bootstrap specifier at idx2.
-bool ConstantPool::compare_operand_to(int idx1, const constantPoolHandle& cp2, int idx2) {
- BSMAttributeEntry* e1 = bsm_attribute_entry(idx1);
- BSMAttributeEntry* e2 = cp2->bsm_attribute_entry(idx2);
+bool ConstantPool::compare_bootstrap_entry_to(int idx1, const constantPoolHandle& cp2, int idx2) {
+ const BSMAttributeEntry* const e1 = bsm_attribute_entry(idx1);
+ const BSMAttributeEntry* const e2 = cp2->bsm_attribute_entry(idx2);
int k1 = e1->bootstrap_method_index();
int k2 = e2->bootstrap_method_index();
bool match = compare_entry_to(k1, cp2, k2);
@@ -1949,34 +1838,37 @@ bool ConstantPool::compare_operand_to(int idx1, const constantPoolHandle& cp2, i
if (!match) {
return false;
}
- int argc = e1->argument_count();
- if (argc == e2->argument_count()) {
- for (int j = 0; j < argc; j++) {
- k1 = e1->argument_index(j);
- k2 = e2->argument_index(j);
- match = compare_entry_to(k1, cp2, k2);
- if (!match) {
- return false;
- }
- }
- return true; // got through loop; all elements equal
+
+ const int argc = e1->argument_count();
+ if (argc != e2->argument_count()) {
+ return false;
}
- return false;
-} // end compare_operand_to()
+
+ for (int j = 0; j < argc; j++) {
+ k1 = e1->argument(j);
+ k2 = e2->argument(j);
+ match = compare_entry_to(k1, cp2, k2);
+ if (!match) {
+ return false;
+ }
+ }
+
+ return true; // got through loop; all elements equal
+} // end compare_bootstrap_entry_to()
// Search constant pool search_cp for a bootstrap specifier that matches
// this constant pool's bootstrap specifier data at pattern_i index.
// Return the index of a matching bootstrap attribute record or (-1) if there is no match.
-int ConstantPool::find_matching_operand(int pattern_i,
- const constantPoolHandle& search_cp, int search_len) {
- for (int i = 0; i < search_len; i++) {
- bool found = compare_operand_to(pattern_i, search_cp, i);
+int ConstantPool::find_matching_bsm_entry(int pattern_i,
+ const constantPoolHandle& search_cp, int offset_limit) {
+ for (int i = 0; i < offset_limit; i++) {
+ bool found = compare_bootstrap_entry_to(pattern_i, search_cp, i);
if (found) {
return i;
}
}
return -1; // bootstrap specifier data not found; return unused index (-1)
-} // end find_matching_operand()
+} // end find_matching_bsm_entry()
#ifndef PRODUCT
@@ -2411,7 +2303,7 @@ void ConstantPool::print_value_on(outputStream* st) const {
assert(is_constantPool(), "must be constantPool");
st->print("constant pool [%d]", length());
if (has_preresolution()) st->print("/preresolution");
- if (operands() != nullptr) st->print("/operands[%d]", operands()->length());
+ if (!bsm_entries().is_empty()) st->print("/BSMs[%d]", bsm_entries().bootstrap_methods()->length());
print_address_on(st);
if (pool_holder() != nullptr) {
st->print(" for ");
@@ -2446,3 +2338,87 @@ void ConstantPool::verify_on(outputStream* st) {
guarantee(pool_holder()->is_klass(), "should be klass");
}
}
+
+void BSMAttributeEntries::deallocate_contents(ClassLoaderData* loader_data) {
+ MetadataFactory::free_array(loader_data, this->_offsets);
+ MetadataFactory::free_array(loader_data, this->_bootstrap_methods);
+ this->_offsets = nullptr;
+ this->_bootstrap_methods = nullptr;
+}
+
+void BSMAttributeEntries::copy_into(InsertionIterator& iter, int num_entries) const {
+ assert(num_entries + iter._cur_offset <= iter._insert_into->_offsets->length(), "must");
+ for (int i = 0; i < num_entries; i++) {
+ const BSMAttributeEntry* e = entry(i);
+ BSMAttributeEntry* e_new = iter.reserve_new_entry(e->bootstrap_method_index(), e->argument_count());
+ assert(e_new != nullptr, "must be");
+ e->copy_args_into(e_new);
+ }
+}
+
+BSMAttributeEntries::InsertionIterator
+BSMAttributeEntries::start_extension(const BSMAttributeEntries& other, ClassLoaderData* loader_data, TRAPS) {
+ InsertionIterator iter = start_extension(other.number_of_entries(), other.array_length(),
+ loader_data, CHECK_(BSMAttributeEntries::InsertionIterator()));
+ return iter;
+}
+
+BSMAttributeEntries::InsertionIterator
+BSMAttributeEntries::start_extension(int number_of_entries, int array_length,
+ ClassLoaderData* loader_data, TRAPS) {
+ InsertionIterator extension_iterator(this, this->number_of_entries(), this->array_length());
+ int new_number_of_entries = this->number_of_entries() + number_of_entries;
+ int new_array_length = this->array_length() + array_length;
+ int invalid_index = new_array_length;
+
+ Array* new_offsets =
+ MetadataFactory::new_array(loader_data, new_number_of_entries, invalid_index, CHECK_(InsertionIterator()));
+ Array* new_array = MetadataFactory::new_array(loader_data, new_array_length, CHECK_(InsertionIterator()));
+ { // Copy over all the old BSMAEntry's and their respective offsets
+ BSMAttributeEntries carrier(new_offsets, new_array);
+ InsertionIterator copy_iter(&carrier, 0, 0);
+ copy_into(copy_iter, this->number_of_entries());
+ }
+ // Replace content
+ deallocate_contents(loader_data);
+ _offsets = new_offsets;
+ _bootstrap_methods = new_array;
+ return extension_iterator;
+}
+
+
+void BSMAttributeEntries::append(const BSMAttributeEntries& other, ClassLoaderData* loader_data, TRAPS) {
+ if (other.number_of_entries() == 0) {
+ return; // Done!
+ }
+ InsertionIterator iter = start_extension(other, loader_data, CHECK);
+ other.copy_into(iter, other.number_of_entries());
+ end_extension(iter, loader_data, THREAD);
+}
+
+void BSMAttributeEntries::end_extension(InsertionIterator& iter, ClassLoaderData* loader_data, TRAPS) {
+ assert(iter._insert_into == this, "must be");
+ assert(iter._cur_offset <= this->_offsets->length(), "must be");
+ assert(iter._cur_array <= this->_bootstrap_methods->length(), "must be");
+
+ // Did we fill up all of the available space? If so, do nothing.
+ if (iter._cur_offset == this->_offsets->length() &&
+ iter._cur_array == this->_bootstrap_methods->length()) {
+ return;
+ }
+
+ // We used less, truncate by allocating new arrays
+ Array* new_offsets =
+ MetadataFactory::new_array(loader_data, iter._cur_offset, 0, CHECK);
+ Array* new_array =
+ MetadataFactory::new_array(loader_data, iter._cur_array, CHECK);
+ { // Copy over the constructed BSMAEntry's
+ BSMAttributeEntries carrier(new_offsets, new_array);
+ InsertionIterator copy_iter(&carrier, 0, 0);
+ copy_into(copy_iter, iter._cur_offset);
+ }
+
+ deallocate_contents(loader_data);
+ _offsets = new_offsets;
+ _bootstrap_methods = new_array;
+}
diff --git a/src/hotspot/share/oops/constantPool.hpp b/src/hotspot/share/oops/constantPool.hpp
index 9cbeb1245be..6c519945f4d 100644
--- a/src/hotspot/share/oops/constantPool.hpp
+++ b/src/hotspot/share/oops/constantPool.hpp
@@ -27,6 +27,7 @@
#include "memory/allocation.hpp"
#include "oops/arrayOop.hpp"
+#include "oops/bsmAttribute.inline.hpp"
#include "oops/cpCache.hpp"
#include "oops/objArrayOop.hpp"
#include "oops/oopHandle.hpp"
@@ -77,43 +78,6 @@ public:
}
};
-class BSMAttributeEntry {
- friend class ConstantPool;
- u2 _bootstrap_method_index;
- u2 _argument_count;
-
- // The argument indexes are stored right after the object, in a contiguous array.
- // [ bsmi_0 argc_0 arg_00 arg_01 ... arg_0N bsmi_1 argc_1 arg_10 ... arg_1N ... ]
- // So in order to find the argument array, jump over ourselves.
- const u2* argument_indexes() const {
- return reinterpret_cast(this + 1);
- }
- u2* argument_indexes() {
- return reinterpret_cast(this + 1);
- }
- // These are overlays on top of the operands array. Do not construct.
- BSMAttributeEntry() = delete;
-
-public:
- // Offsets for SA
- enum {
- _bsmi_offset = 0,
- _argc_offset = 1,
- _argv_offset = 2
- };
-
- int bootstrap_method_index() const {
- return _bootstrap_method_index;
- }
- int argument_count() const {
- return _argument_count;
- }
- int argument_index(int n) const {
- assert(checked_cast(n) < _argument_count, "oob");
- return argument_indexes()[n];
- }
-};
-
class ConstantPool : public Metadata {
friend class VMStructs;
friend class JVMCIVMStructs;
@@ -126,7 +90,8 @@ class ConstantPool : public Metadata {
Array* _tags; // the tag array describing the constant pool's contents
ConstantPoolCache* _cache; // the cache holding interpreter runtime information
InstanceKlass* _pool_holder; // the corresponding class
- Array* _operands; // for variable-sized (InvokeDynamic) nodes, usually empty
+
+ BSMAttributeEntries _bsm_entries;
// Consider using an array of compressed klass pointers to
// save space on 64-bit platforms.
@@ -167,8 +132,6 @@ class ConstantPool : public Metadata {
u1* tag_addr_at(int cp_index) const { return tags()->adr_at(cp_index); }
- void set_operands(Array* operands) { _operands = operands; }
-
u2 flags() const { return _flags; }
void set_flags(u2 f) { _flags = f; }
@@ -208,7 +171,13 @@ class ConstantPool : public Metadata {
virtual bool is_constantPool() const { return true; }
Array* tags() const { return _tags; }
- Array* operands() const { return _operands; }
+
+ BSMAttributeEntries& bsm_entries() {
+ return _bsm_entries;
+ }
+ const BSMAttributeEntries& bsm_entries() const {
+ return _bsm_entries;
+ }
bool has_preresolution() const { return (_flags & _has_preresolution) != 0; }
void set_has_preresolution() {
@@ -556,76 +525,21 @@ class ConstantPool : public Metadata {
assert(tag_at(cp_index).has_bootstrap(), "Corrupted constant pool");
return extract_low_short_from_int(*int_at_addr(cp_index));
}
- // The first part of the operands array consists of an index into the second part.
- // Extract a 32-bit index value from the first part.
- static int operand_offset_at(Array* operands, int bsms_attribute_index) {
- int n = (bsms_attribute_index * 2);
- assert(n >= 0 && n+2 <= operands->length(), "oob");
- // The first 32-bit index points to the beginning of the second part
- // of the operands array. Make sure this index is in the first part.
- DEBUG_ONLY(int second_part = build_int_from_shorts(operands->at(0),
- operands->at(1)));
- assert(second_part == 0 || n+2 <= second_part, "oob (2)");
- int offset = build_int_from_shorts(operands->at(n+0),
- operands->at(n+1));
- // The offset itself must point into the second part of the array.
- assert(offset == 0 || (offset >= second_part && offset <= operands->length()), "oob (3)");
- return offset;
- }
- static void operand_offset_at_put(Array* operands, int bsms_attribute_index, int offset) {
- int n = bsms_attribute_index * 2;
- assert(n >= 0 && n+2 <= operands->length(), "oob");
- operands->at_put(n+0, extract_low_short_from_int(offset));
- operands->at_put(n+1, extract_high_short_from_int(offset));
- }
- static int operand_array_length(Array* operands) {
- if (operands == nullptr || operands->length() == 0) return 0;
- int second_part = operand_offset_at(operands, 0);
- return (second_part / 2);
- }
-
-#ifdef ASSERT
- // operand tuples fit together exactly, end to end
- static int operand_limit_at(Array* operands, int bsms_attribute_index) {
- int nextidx = bsms_attribute_index + 1;
- if (nextidx == operand_array_length(operands))
- return operands->length();
- else
- return operand_offset_at(operands, nextidx);
- }
-#endif //ASSERT
-
- // These functions are used in RedefineClasses for CP merge
- int operand_offset_at(int bsms_attribute_index) {
- assert(0 <= bsms_attribute_index &&
- bsms_attribute_index < operand_array_length(operands()),
- "Corrupted CP operands");
- return operand_offset_at(operands(), bsms_attribute_index);
- }
BSMAttributeEntry* bsm_attribute_entry(int bsms_attribute_index) {
- int offset = operand_offset_at(bsms_attribute_index);
- return reinterpret_cast(operands()->adr_at(offset));
+ return _bsm_entries.entry(bsms_attribute_index);
}
- int operand_next_offset_at(int bsms_attribute_index) {
- BSMAttributeEntry* bsme = bsm_attribute_entry(bsms_attribute_index);
- u2* argv_start = bsme->argument_indexes();
- int offset = argv_start - operands()->data();
- return offset + bsme->argument_count();
- }
- // Compare a bootstrap specifier data in the operands arrays
- bool compare_operand_to(int bsms_attribute_index1, const constantPoolHandle& cp2,
- int bsms_attribute_index2);
- // Find a bootstrap specifier data in the operands array
- int find_matching_operand(int bsms_attribute_index, const constantPoolHandle& search_cp,
- int operands_cur_len);
- // Resize the operands array with delta_len and delta_size
- void resize_operands(int delta_len, int delta_size, TRAPS);
- // Extend the operands array with the length and size of the ext_cp operands
- void extend_operands(const constantPoolHandle& ext_cp, TRAPS);
- // Shrink the operands array to a smaller array with new_len length
- void shrink_operands(int new_len, TRAPS);
+ bool compare_bootstrap_entry_to(int bsms_attribute_index1, const constantPoolHandle& cp2,
+ int bsms_attribute_index2);
+ // Find a BSM entry in search_cp that matches the BSM at bsm_attribute_index.
+ // Return -1 if not found.
+ int find_matching_bsm_entry(int bsms_attribute_index, const constantPoolHandle& search_cp,
+ int offset_limit);
+ // Extend the BSM attribute storage to fit both the current data and the BSM data in ext_cp.
+ // Use the returned InsertionIterator to fill out the newly allocated space.
+ BSMAttributeEntries::InsertionIterator start_extension(const constantPoolHandle& ext_cp, TRAPS);
+ void end_extension(BSMAttributeEntries::InsertionIterator iter, TRAPS);
u2 bootstrap_method_ref_index_at(int cp_index) {
assert(tag_at(cp_index).has_bootstrap(), "Corrupted constant pool");
@@ -641,7 +555,7 @@ class ConstantPool : public Metadata {
int bsmai = bootstrap_methods_attribute_index(cp_index);
BSMAttributeEntry* bsme = bsm_attribute_entry(bsmai);
assert((uint)j < (uint)bsme->argument_count(), "oob");
- return bsm_attribute_entry(bsmai)->argument_index(j);
+ return bsm_attribute_entry(bsmai)->argument(j);
}
// The following methods (name/signature/klass_ref_at, klass_ref_at_noresolve,
@@ -848,7 +762,7 @@ private:
}
static void copy_cp_to_impl(const constantPoolHandle& from_cp, int start_cpi, int end_cpi, const constantPoolHandle& to_cp, int to_cpi, TRAPS);
static void copy_entry_to(const constantPoolHandle& from_cp, int from_cpi, const constantPoolHandle& to_cp, int to_cpi);
- static void copy_operands(const constantPoolHandle& from_cp, const constantPoolHandle& to_cp, TRAPS);
+ static void copy_bsm_entries(const constantPoolHandle& from_cp, const constantPoolHandle& to_cp, TRAPS);
int find_matching_entry(int pattern_i, const constantPoolHandle& search_cp);
int version() const { return _saved._version; }
void set_version(int version) { _saved._version = version; }
diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
index 524dee6e06a..667270d96b4 100644
--- a/src/hotspot/share/opto/chaitin.cpp
+++ b/src/hotspot/share/opto/chaitin.cpp
@@ -1471,6 +1471,65 @@ static OptoReg::Name find_first_set(LRG& lrg, RegMask& mask) {
return assigned;
}
+OptoReg::Name PhaseChaitin::select_bias_lrg_color(LRG& lrg) {
+ uint bias_lrg1_idx = _lrg_map.find(lrg._copy_bias);
+ uint bias_lrg2_idx = _lrg_map.find(lrg._copy_bias2);
+
+ // If bias_lrg1 has a color
+ if (bias_lrg1_idx != 0 && !_ifg->_yanked->test(bias_lrg1_idx)) {
+ OptoReg::Name reg = lrgs(bias_lrg1_idx).reg();
+ // and it is legal for lrg
+ if (is_legal_reg(lrg, reg)) {
+ return reg;
+ }
+ }
+
+ // If bias_lrg2 has a color
+ if (bias_lrg2_idx != 0 && !_ifg->_yanked->test(bias_lrg2_idx)) {
+ OptoReg::Name reg = lrgs(bias_lrg2_idx).reg();
+ // and it is legal for lrg
+ if (is_legal_reg(lrg, reg)) {
+ return reg;
+ }
+ }
+
+ uint bias_lrg_idx = 0;
+ if (bias_lrg1_idx != 0 && bias_lrg2_idx != 0) {
+ // Since none of the bias live ranges are part of the IFG yet, constrain the
+ // definition mask with the bias live range with the least degrees of
+ // freedom. This will increase the chances of register sharing once the bias
+ // live range becomes part of the IFG.
+ lrgs(bias_lrg1_idx).compute_set_mask_size();
+ lrgs(bias_lrg2_idx).compute_set_mask_size();
+ bias_lrg_idx = lrgs(bias_lrg1_idx).degrees_of_freedom() >
+ lrgs(bias_lrg2_idx).degrees_of_freedom()
+ ? bias_lrg2_idx
+ : bias_lrg1_idx;
+ } else if (bias_lrg1_idx != 0) {
+ bias_lrg_idx = bias_lrg1_idx;
+ } else if (bias_lrg2_idx != 0) {
+ bias_lrg_idx = bias_lrg2_idx;
+ }
+
+ // Register masks with offset excludes all mask bits before the offset.
+ // Such masks are mainly used for allocation from stack slots. Constrain the
+ // register mask of definition live range using bias mask only if
+ // both masks have zero offset.
+ if (bias_lrg_idx != 0 && !lrg.mask().is_offset() &&
+ !lrgs(bias_lrg_idx).mask().is_offset()) {
+ // Choose a color which is legal for bias_lrg
+ ResourceMark rm(C->regmask_arena());
+ RegMask tempmask(lrg.mask(), C->regmask_arena());
+ tempmask.and_with(lrgs(bias_lrg_idx).mask());
+ tempmask.clear_to_sets(lrg.num_regs());
+ OptoReg::Name reg = find_first_set(lrg, tempmask);
+ if (OptoReg::is_valid(reg)) {
+ return reg;
+ }
+ }
+ return OptoReg::Bad;
+}
+
// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
@@ -1492,25 +1551,10 @@ OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
}
}
- uint copy_lrg = _lrg_map.find(lrg._copy_bias);
- if (copy_lrg != 0) {
- // If he has a color,
- if(!_ifg->_yanked->test(copy_lrg)) {
- OptoReg::Name reg = lrgs(copy_lrg).reg();
- // And it is legal for you,
- if (is_legal_reg(lrg, reg)) {
- return reg;
- }
- } else if (!lrg.mask().is_offset()) {
- // Choose a color which is legal for him
- ResourceMark rm(C->regmask_arena());
- RegMask tempmask(lrg.mask(), C->regmask_arena());
- tempmask.and_with(lrgs(copy_lrg).mask());
- tempmask.clear_to_sets(lrg.num_regs());
- OptoReg::Name reg = find_first_set(lrg, tempmask);
- if (OptoReg::is_valid(reg))
- return reg;
- }
+ // Try biasing the color with non-interfering bias live range[s].
+ OptoReg::Name reg = select_bias_lrg_color(lrg);
+ if (OptoReg::is_valid(reg)) {
+ return reg;
}
// If no bias info exists, just go with the register selection ordering
@@ -1524,7 +1568,7 @@ OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
// copy removal to remove many more copies, by preventing a just-assigned
// register from being repeatedly assigned.
- OptoReg::Name reg = lrg.mask().find_first_elem();
+ reg = lrg.mask().find_first_elem();
if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
// This 'Remove; find; Insert' idiom is an expensive way to find the
// SECOND element in the mask.
@@ -1640,6 +1684,27 @@ uint PhaseChaitin::Select( ) {
}
}
}
+
+ Node* def = lrg->_def;
+ if (lrg->is_singledef() && !lrg->_is_bound && def->is_Mach()) {
+ MachNode* mdef = def->as_Mach();
+ if (Matcher::is_register_biasing_candidate(mdef, 1)) {
+ Node* in1 = mdef->in(mdef->operand_index(1));
+ if (in1 != nullptr && lrg->_copy_bias == 0) {
+ lrg->_copy_bias = _lrg_map.find(in1);
+ }
+ }
+
+ // For commutative operations, def allocation can also be
+ // biased towards LRG of second input's def.
+ if (Matcher::is_register_biasing_candidate(mdef, 2)) {
+ Node* in2 = mdef->in(mdef->operand_index(2));
+ if (in2 != nullptr && lrg->_copy_bias2 == 0) {
+ lrg->_copy_bias2 = _lrg_map.find(in2);
+ }
+ }
+ }
+
//assert(is_infinite_stack == lrg->mask().is_infinite_stack(), "nbrs must not change InfiniteStackedness");
// Aligned pairs need aligned masks
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
index b477c54fcae..ac072e94e2b 100644
--- a/src/hotspot/share/opto/chaitin.hpp
+++ b/src/hotspot/share/opto/chaitin.hpp
@@ -63,6 +63,7 @@ public:
uint _risk_bias; // Index of LRG which we want to avoid color
uint _copy_bias; // Index of LRG which we want to share color
+ uint _copy_bias2; // Index of second LRG which we want to share color
uint _next; // Index of next LRG in linked list
uint _prev; // Index of prev LRG in linked list
@@ -703,6 +704,8 @@ private:
OptoReg::Name choose_color(LRG& lrg);
// Helper function which implements biasing heuristic
OptoReg::Name bias_color(LRG& lrg);
+ // Helper function which implements color biasing
+ OptoReg::Name select_bias_lrg_color(LRG& lrg);
// Split uncolorable live ranges
// Return new number of live ranges
diff --git a/src/hotspot/share/opto/idealGraphPrinter.cpp b/src/hotspot/share/opto/idealGraphPrinter.cpp
index 6a738878a1b..5070a9f00e1 100644
--- a/src/hotspot/share/opto/idealGraphPrinter.cpp
+++ b/src/hotspot/share/opto/idealGraphPrinter.cpp
@@ -35,6 +35,97 @@
#ifndef PRODUCT
+// Support for printing properties
+class PrintProperties
+{
+private:
+ IdealGraphPrinter* _printer;
+
+public:
+ PrintProperties(IdealGraphPrinter* printer) : _printer(printer) {}
+ void print_node_properties(Node* node);
+ void print_lrg_properties(const LRG& lrg, const char* buffer);
+ void print_property(int flag, const char* name);
+ void print_property(int flag, const char* name, const char* val);
+ void print_property(int flag, const char* name, int val);
+};
+
+void PrintProperties::print_node_properties(Node* node) {
+ const jushort flags = node->flags();
+ print_property((flags & Node::Flag_is_Copy), "is_copy");
+ print_property((flags & Node::Flag_rematerialize), "rematerialize");
+ print_property((flags & Node::Flag_needs_anti_dependence_check), "needs_anti_dependence_check");
+ print_property((flags & Node::Flag_is_macro), "is_macro");
+ print_property((flags & Node::Flag_is_Con), "is_con");
+ print_property((flags & Node::Flag_is_cisc_alternate), "is_cisc_alternate");
+ print_property((flags & Node::Flag_is_dead_loop_safe), "is_dead_loop_safe");
+ print_property((flags & Node::Flag_may_be_short_branch), "may_be_short_branch");
+ print_property((flags & Node::Flag_has_call), "has_call");
+ print_property((flags & Node::Flag_has_swapped_edges), "has_swapped_edges");
+ Matcher* matcher = _printer->C->matcher();
+ if (matcher != nullptr) {
+ print_property(matcher->is_shared(node),"is_shared");
+ print_property(!(matcher->is_shared(node)), "is_shared", IdealGraphPrinter::FALSE_VALUE);
+ print_property(matcher->is_dontcare(node), "is_dontcare");
+ print_property(!(matcher->is_dontcare(node)),"is_dontcare", IdealGraphPrinter::FALSE_VALUE);
+ Node* old = matcher->find_old_node(node);
+ if (old != nullptr) {
+ print_property(true, "old_node_idx", old->_idx);
+ }
+ }
+}
+
+void PrintProperties::print_lrg_properties(const LRG &lrg, const char *buffer) {
+ print_property(true, "mask", buffer);
+ print_property(true, "mask_size", lrg.mask_size());
+ if (lrg._degree_valid) {
+ print_property(true, "degree", lrg.degree());
+ }
+ print_property(true, "num_regs", lrg.num_regs());
+ print_property(true, "reg_pressure", lrg.reg_pressure());
+ print_property(true, "cost", lrg._cost);
+ print_property(true, "area", lrg._area);
+ print_property(true, "score", lrg.score());
+ print_property((lrg._risk_bias != 0), "risk_bias", lrg._risk_bias);
+ print_property((lrg._copy_bias != 0), "copy_bias", lrg._copy_bias);
+ print_property((lrg._copy_bias2 != 0), "copy_bias2", lrg._copy_bias2);
+ print_property(lrg.is_singledef(), "is_singledef");
+ print_property(lrg.is_multidef(), "is_multidef");
+ print_property(lrg._is_oop, "is_oop");
+ print_property(lrg._is_float, "is_float");
+ print_property(lrg._is_vector, "is_vector");
+ print_property(lrg._is_predicate, "is_predicate");
+ print_property(lrg._is_scalable, "is_scalable");
+ print_property(lrg._was_spilled1, "was_spilled1");
+ print_property(lrg._was_spilled2, "was_spilled2");
+ print_property(lrg._direct_conflict, "direct_conflict");
+ print_property(lrg._fat_proj, "fat_proj");
+ print_property(lrg._was_lo, "_was_lo");
+ print_property(lrg._has_copy, "has_copy");
+ print_property(lrg._at_risk, "at_risk");
+ print_property(lrg._must_spill, "must_spill");
+ print_property(lrg._is_bound, "is_bound");
+ print_property((lrg._msize_valid && lrg._degree_valid && lrg.lo_degree()), "trivial");
+}
+
+void PrintProperties::print_property(int flag, const char* name) {
+ if (flag != 0) {
+ _printer->print_prop(name, IdealGraphPrinter::TRUE_VALUE);
+ }
+}
+
+void PrintProperties::print_property(int flag, const char* name, const char* val) {
+ if (flag != 0) {
+ _printer->print_prop(name, val);
+ }
+}
+
+void PrintProperties::print_property(int flag, const char* name, int val) {
+ if (flag != 0) {
+ _printer->print_prop(name, val);
+ }
+}
+
// Constants
// Keep consistent with Java constants
const char *IdealGraphPrinter::INDENT = " ";
@@ -522,54 +613,8 @@ void IdealGraphPrinter::visit_node(Node* n, bool edges) {
print_prop("jvms", buffer);
}
- const jushort flags = node->flags();
- if (flags & Node::Flag_is_Copy) {
- print_prop("is_copy", "true");
- }
- if (flags & Node::Flag_rematerialize) {
- print_prop("rematerialize", "true");
- }
- if (flags & Node::Flag_needs_anti_dependence_check) {
- print_prop("needs_anti_dependence_check", "true");
- }
- if (flags & Node::Flag_is_macro) {
- print_prop("is_macro", "true");
- }
- if (flags & Node::Flag_is_Con) {
- print_prop("is_con", "true");
- }
- if (flags & Node::Flag_is_cisc_alternate) {
- print_prop("is_cisc_alternate", "true");
- }
- if (flags & Node::Flag_is_dead_loop_safe) {
- print_prop("is_dead_loop_safe", "true");
- }
- if (flags & Node::Flag_may_be_short_branch) {
- print_prop("may_be_short_branch", "true");
- }
- if (flags & Node::Flag_has_call) {
- print_prop("has_call", "true");
- }
- if (flags & Node::Flag_has_swapped_edges) {
- print_prop("has_swapped_edges", "true");
- }
-
- if (C->matcher() != nullptr) {
- if (C->matcher()->is_shared(node)) {
- print_prop("is_shared", "true");
- } else {
- print_prop("is_shared", "false");
- }
- if (C->matcher()->is_dontcare(node)) {
- print_prop("is_dontcare", "true");
- } else {
- print_prop("is_dontcare", "false");
- }
- Node* old = C->matcher()->find_old_node(node);
- if (old != nullptr) {
- print_prop("old_node_idx", old->_idx);
- }
- }
+ PrintProperties print_node(this);
+ print_node.print_node_properties(node);
if (node->is_Proj()) {
print_prop("con", (int)node->as_Proj()->_con);
@@ -1145,73 +1190,10 @@ void IdealGraphPrinter::print(const char* name, Node* node, GrowableArray {
- private:
+ friend class PrintProperties;
+private:
static const char *INDENT;
static const char *TOP_ELEMENT;
static const char *GROUP_ELEMENT;
diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp
index 31d1cbe0443..5c65103677b 100644
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@@ -1411,7 +1411,6 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
C->print_method(PHASE_BEFORE_PRE_MAIN_POST, 4, main_head);
- Node *pre_header= main_head->in(LoopNode::EntryControl);
Node *init = main_head->init_trip();
Node *incr = main_end ->incr();
Node *limit = main_end ->limit();
diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp
index dfff7ef96a5..03cc5cbcff6 100644
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@@ -1162,13 +1162,16 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
class CloneShortLoopPredicateVisitor : public PredicateVisitor {
ClonePredicateToTargetLoop _clone_predicate_to_loop;
PhaseIdealLoop* const _phase;
+ Node* const _new_init;
public:
CloneShortLoopPredicateVisitor(LoopNode* target_loop_head,
+ Node* new_init,
const NodeInSingleLoopBody &node_in_loop_body,
PhaseIdealLoop* phase)
: _clone_predicate_to_loop(target_loop_head, node_in_loop_body, phase),
- _phase(phase) {
+ _phase(phase),
+ _new_init(new_init) {
}
NONCOPYABLE(CloneShortLoopPredicateVisitor);
@@ -1180,11 +1183,32 @@ public:
}
void visit(const TemplateAssertionPredicate& template_assertion_predicate) override {
- _clone_predicate_to_loop.clone_template_assertion_predicate(template_assertion_predicate);
+ _clone_predicate_to_loop.clone_template_assertion_predicate_and_replace_init(template_assertion_predicate, _new_init);
template_assertion_predicate.kill(_phase->igvn());
}
};
+// For an int counted loop, try_make_short_running_loop() transforms the loop from:
+// for (int = start; i < stop; i+= stride) { ... }
+// to
+// for (int = 0; i < stop - start; i+= stride) { ... }
+// Template Assertion Predicates added so far were with an init value of start. They need to be updated with the new
+// init value of 0 (otherwise when a template assertion predicate is turned into an initialized assertion predicate, it
+// performs an incorrect check):
+// zero
+// init |
+// | ===> OpaqueLoopInit init
+// OpaqueLoopInit \ /
+// AddI
+//
+Node* PhaseIdealLoop::new_assertion_predicate_opaque_init(Node* entry_control, Node* init, Node* int_zero) {
+ OpaqueLoopInitNode* new_opaque_init = new OpaqueLoopInitNode(C, int_zero);
+ register_new_node(new_opaque_init, entry_control);
+ Node* new_init = new AddINode(new_opaque_init, init);
+ register_new_node(new_init, entry_control);
+ return new_init;
+}
+
// If the loop is either statically known to run for a small enough number of iterations or if profile data indicates
// that, we don't want an outer loop because the overhead of having an outer loop whose backedge is never taken, has a
// measurable cost. Furthermore, creating the loop nest usually causes one iteration of the loop to be peeled so
@@ -1236,6 +1260,7 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
}
register_new_node(new_limit, entry_control);
+ Node* int_zero = intcon(0);
PhiNode* phi = head->phi()->as_Phi();
if (profile_short_running_loop) {
// Add a Short Running Long Loop Predicate. It's the first predicate in the predicate chain before entering a loop
@@ -1261,9 +1286,11 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
if (!short_running_long_loop_predicate_block->has_parse_predicate()) { // already trapped
return false;
}
+ Node* new_init = new_assertion_predicate_opaque_init(entry_control, init, int_zero);
+
PredicateIterator predicate_iterator(entry_control);
NodeInSingleLoopBody node_in_short_loop_body(this, loop);
- CloneShortLoopPredicateVisitor clone_short_loop_predicates_visitor(head, node_in_short_loop_body, this);
+ CloneShortLoopPredicateVisitor clone_short_loop_predicates_visitor(head, new_init, node_in_short_loop_body, this);
predicate_iterator.for_each(clone_short_loop_predicates_visitor);
entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
@@ -1311,6 +1338,10 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
register_new_node(new_limit, predicates.entry());
} else {
assert(bt == T_INT && known_short_running_loop, "only CountedLoop statically known to be short running");
+ PredicateIterator predicate_iterator(entry_control);
+ Node* new_init = new_assertion_predicate_opaque_init(entry_control, init, int_zero);
+ UpdateInitForTemplateAssertionPredicates update_init_for_template_assertion_predicates(new_init, this);
+ predicate_iterator.for_each(update_init_for_template_assertion_predicates);
}
IfNode* exit_test = head->loopexit();
@@ -1320,7 +1351,6 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
register_new_node(new_limit, entry_control);
}
- Node* int_zero = intcon(0);
if (stride_con < 0) {
new_limit = new SubINode(int_zero, new_limit);
register_new_node(new_limit, entry_control);
diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp
index 1e34331f213..3b97d76773f 100644
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@@ -1969,6 +1969,8 @@ public:
Node* ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node);
+ Node* new_assertion_predicate_opaque_init(Node* entry_control, Node* init, Node* int_zero);
+
bool try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List& range_checks, const uint iters_limit);
ConINode* intcon(jint i);
diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp
index 3ef6a085b1c..ee3f138b8af 100644
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@@ -4180,6 +4180,33 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
return true;
}
+#ifdef ASSERT
+
+// Moves Template Assertion Predicates to a target loop by cloning and killing the old ones. The target loop is the
+// original, not-cloned loop. This is currently only used with StressLoopBackedge which is a develop flag only and
+// false with product builds. We can therefore guard it with an ifdef. More details can be found at the use-site.
+class MoveAssertionPredicatesVisitor : public PredicateVisitor {
+ ClonePredicateToTargetLoop _clone_predicate_to_loop;
+ PhaseIdealLoop* const _phase;
+
+public:
+ MoveAssertionPredicatesVisitor(LoopNode* target_loop_head,
+ const NodeInSingleLoopBody &node_in_loop_body,
+ PhaseIdealLoop* phase)
+ : _clone_predicate_to_loop(target_loop_head, node_in_loop_body, phase),
+ _phase(phase) {
+ }
+ NONCOPYABLE(MoveAssertionPredicatesVisitor);
+
+ using PredicateVisitor::visit;
+
+ void visit(const TemplateAssertionPredicate& template_assertion_predicate) override {
+ _clone_predicate_to_loop.clone_template_assertion_predicate(template_assertion_predicate);
+ template_assertion_predicate.kill(_phase->igvn());
+ }
+};
+#endif // ASSERT
+
// Transform:
//
// loop<-----------------+
@@ -4248,6 +4275,7 @@ bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old
IfNode* exit_test = nullptr;
uint inner;
float f;
+#ifdef ASSERT
if (StressDuplicateBackedge) {
if (head->is_strip_mined()) {
return false;
@@ -4266,7 +4294,9 @@ bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old
}
inner = 1;
- } else {
+ } else
+#endif //ASSERT
+ {
// Is the shape of the loop that of a counted loop...
Node* back_control = loop_exit_control(head, loop);
if (back_control == nullptr) {
@@ -4457,6 +4487,19 @@ bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old
}
}
+#ifdef ASSERT
+ if (StressDuplicateBackedge && head->is_CountedLoop()) {
+ // The Template Assertion Predicates from the old counted loop are now at the new outer loop - clone them to
+ // the inner counted loop and kill the old ones. We only need to do this with debug builds because
+ // StressDuplicateBackedge is a devlop flag and false by default. Without StressDuplicateBackedge 'head' will be a
+ // non-counted loop, and thus we have no Template Assertion Predicates above the old loop to move down.
+ PredicateIterator predicate_iterator(outer_head->in(LoopNode::EntryControl));
+ NodeInSingleLoopBody node_in_body(this, loop);
+ MoveAssertionPredicatesVisitor move_assertion_predicates_visitor(head, node_in_body, this);
+ predicate_iterator.for_each(move_assertion_predicates_visitor);
+ }
+#endif // ASSERT
+
C->set_major_progress();
C->print_method(PHASE_AFTER_DUPLICATE_LOOP_BACKEDGE, 4, outer_head);
diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp
index e58befd8032..ec861865ff5 100644
--- a/src/hotspot/share/opto/machnode.cpp
+++ b/src/hotspot/share/opto/machnode.cpp
@@ -460,6 +460,13 @@ int MachNode::operand_index(Node* def) const {
return -1;
}
+int MachNode::operand_num_edges(uint oper_index) const {
+ if (num_opnds() > oper_index) {
+ return _opnds[oper_index]->num_edges();
+ }
+ return 0;
+}
+
//------------------------------peephole---------------------------------------
// Apply peephole rule(s) to this instruction
int MachNode::peephole(Block *block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc *ra_) {
diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp
index 093f466678c..b60313b7f75 100644
--- a/src/hotspot/share/opto/machnode.hpp
+++ b/src/hotspot/share/opto/machnode.hpp
@@ -266,6 +266,7 @@ public:
int operand_index(uint operand) const;
int operand_index(const MachOper *oper) const;
int operand_index(Node* m) const;
+ int operand_num_edges(uint operand) const;
// Register class input is expected in
virtual const RegMask &in_RegMask(uint) const;
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
index 01f11b1fdc9..ca13d0166a1 100644
--- a/src/hotspot/share/opto/matcher.hpp
+++ b/src/hotspot/share/opto/matcher.hpp
@@ -512,6 +512,8 @@ public:
DEBUG_ONLY( bool verify_after_postselect_cleanup(); )
public:
+ static bool is_register_biasing_candidate(const MachNode* mdef, int oper_index);
+
// This routine is run whenever a graph fails to match.
// If it returns, the compiler should bailout to interpreter without error.
// In non-product mode, SoftMatchFailure is false to detect non-canonical
diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
index 6067bcbac8d..2e19d1d247b 100644
--- a/src/hotspot/share/opto/node.hpp
+++ b/src/hotspot/share/opto/node.hpp
@@ -828,26 +828,26 @@ public:
#undef DEFINE_CLASS_ID
// Flags are sorted by usage frequency.
- enum NodeFlags {
- Flag_is_Copy = 1 << 0, // should be first bit to avoid shift
- Flag_rematerialize = 1 << 1,
- Flag_needs_anti_dependence_check = 1 << 2,
- Flag_is_macro = 1 << 3,
- Flag_is_Con = 1 << 4,
- Flag_is_cisc_alternate = 1 << 5,
- Flag_is_dead_loop_safe = 1 << 6,
- Flag_may_be_short_branch = 1 << 7,
- Flag_avoid_back_to_back_before = 1 << 8,
- Flag_avoid_back_to_back_after = 1 << 9,
- Flag_has_call = 1 << 10,
- Flag_has_swapped_edges = 1 << 11,
- Flag_is_scheduled = 1 << 12,
- Flag_is_expensive = 1 << 13,
- Flag_is_predicated_vector = 1 << 14,
- Flag_for_post_loop_opts_igvn = 1 << 15,
- Flag_for_merge_stores_igvn = 1 << 16,
- Flag_is_removed_by_peephole = 1 << 17,
- Flag_is_predicated_using_blend = 1 << 18,
+ enum NodeFlags : uint64_t {
+ Flag_is_Copy = 1ULL << 0, // should be first bit to avoid shift
+ Flag_rematerialize = 1ULL << 1,
+ Flag_needs_anti_dependence_check = 1ULL << 2,
+ Flag_is_macro = 1ULL << 3,
+ Flag_is_Con = 1ULL << 4,
+ Flag_is_cisc_alternate = 1ULL << 5,
+ Flag_is_dead_loop_safe = 1ULL << 6,
+ Flag_may_be_short_branch = 1ULL << 7,
+ Flag_avoid_back_to_back_before = 1ULL << 8,
+ Flag_avoid_back_to_back_after = 1ULL << 9,
+ Flag_has_call = 1ULL << 10,
+ Flag_has_swapped_edges = 1ULL << 11,
+ Flag_is_scheduled = 1ULL << 12,
+ Flag_is_expensive = 1ULL << 13,
+ Flag_is_predicated_vector = 1ULL << 14,
+ Flag_for_post_loop_opts_igvn = 1ULL << 15,
+ Flag_for_merge_stores_igvn = 1ULL << 16,
+ Flag_is_removed_by_peephole = 1ULL << 17,
+ Flag_is_predicated_using_blend = 1ULL << 18,
_last_flag = Flag_is_predicated_using_blend
};
@@ -2176,7 +2176,10 @@ class BFSActions : public StackObj {
virtual bool is_target_node(Node* node) const = 0;
// Defines an action that should be taken when we visit a target node in the BFS traversal.
- virtual void target_node_action(Node* target_node) = 0;
+ // To give more freedom, we pass the direct child node to the target node such that
+ // child->in(i) == target node. This allows to also directly replace the target node instead
+ // of only updating its inputs.
+ virtual void target_node_action(Node* child, uint i) = 0;
};
// Class to perform a BFS traversal on the data nodes from a given start node. The provided BFSActions guide which
@@ -2198,7 +2201,7 @@ class DataNodeBFS : public StackObj {
Node* input = next->in(j);
if (_bfs_actions.is_target_node(input)) {
assert(_bfs_actions.should_visit(input), "must also pass node filter");
- _bfs_actions.target_node_action(input);
+ _bfs_actions.target_node_action(next, j);
} else if (_bfs_actions.should_visit(input)) {
_nodes_to_visit.push(input);
}
diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
index 84c01c68e38..136fc8ac864 100644
--- a/src/hotspot/share/opto/output.cpp
+++ b/src/hotspot/share/opto/output.cpp
@@ -1347,20 +1347,18 @@ CodeBuffer* PhaseOutput::init_buffer() {
// nmethod and CodeBuffer count stubs & constants as part of method's code.
// class HandlerImpl is platform-specific and defined in the *.ad files.
- int exception_handler_req = HandlerImpl::size_exception_handler() + MAX_stubs_size; // add marginal slop for handler
int deopt_handler_req = HandlerImpl::size_deopt_handler() + MAX_stubs_size; // add marginal slop for handler
stub_req += MAX_stubs_size; // ensure per-stub margin
code_req += MAX_inst_size; // ensure per-instruction margin
if (StressCodeBuffers)
- code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
+ code_req = const_req = stub_req = deopt_handler_req = 0x10; // force expansion
int total_req =
const_req +
code_req +
pad_req +
stub_req +
- exception_handler_req +
deopt_handler_req; // deopt handler
CodeBuffer* cb = code_buffer();
@@ -1789,8 +1787,6 @@ void PhaseOutput::fill_buffer(C2_MacroAssembler* masm, uint* blk_starts) {
// Only java methods have exception handlers and deopt handlers
// class HandlerImpl is platform-specific and defined in the *.ad files.
if (C->method()) {
- // Emit the exception handler code.
- _code_offsets.set_value(CodeOffsets::Exceptions, HandlerImpl::emit_exception_handler(masm));
if (C->failing()) {
return; // CodeBuffer::expand failed
}
diff --git a/src/hotspot/share/opto/phaseX.cpp b/src/hotspot/share/opto/phaseX.cpp
index 1fe911aa7ac..4a0933b89f2 100644
--- a/src/hotspot/share/opto/phaseX.cpp
+++ b/src/hotspot/share/opto/phaseX.cpp
@@ -1132,7 +1132,7 @@ void PhaseIterGVN::verify_empty_worklist(Node* node) {
// (1) Integer "widen" changes, but the range is the same.
// (2) LoadNode performs deep traversals. Load is not notified for changes far away.
// (3) CmpPNode performs deep traversals if it compares oopptr. CmpP is not notified for changes far away.
-bool PhaseIterGVN::verify_Value_for(Node* n) {
+bool PhaseIterGVN::verify_Value_for(Node* n, bool strict) {
// If we assert inside type(n), because the type is still a null, then maybe
// the node never went through gvn.transform, which would be a bug.
const Type* told = type(n);
@@ -1152,7 +1152,7 @@ bool PhaseIterGVN::verify_Value_for(Node* n) {
}
// Exception (2)
// LoadNode performs deep traversals. Load is not notified for changes far away.
- if (n->is_Load() && !told->singleton()) {
+ if (!strict && n->is_Load() && !told->singleton()) {
// MemNode::can_see_stored_value looks up through many memory nodes,
// which means we would need to notify modifications from far up in
// the inputs all the way down to the LoadNode. We don't do that.
@@ -1160,7 +1160,7 @@ bool PhaseIterGVN::verify_Value_for(Node* n) {
}
// Exception (3)
// CmpPNode performs deep traversals if it compares oopptr. CmpP is not notified for changes far away.
- if (n->Opcode() == Op_CmpP && type(n->in(1))->isa_oopptr() && type(n->in(2))->isa_oopptr()) {
+ if (!strict && n->Opcode() == Op_CmpP && type(n->in(1))->isa_oopptr() && type(n->in(2))->isa_oopptr()) {
// SubNode::Value
// CmpPNode::sub
// MemNode::detect_ptr_independence
@@ -2799,6 +2799,7 @@ void PhaseCCP::analyze() {
// Compile is over. The local arena gets de-allocated at the end of its scope.
ResourceArea local_arena(mtCompiler);
Unique_Node_List worklist(&local_arena);
+ Unique_Node_List worklist_revisit(&local_arena);
DEBUG_ONLY(Unique_Node_List worklist_verify(&local_arena);)
// Push root onto worklist
@@ -2807,45 +2808,86 @@ void PhaseCCP::analyze() {
assert(_root_and_safepoints.size() == 0, "must be empty (unused)");
_root_and_safepoints.push(C->root());
- // Pull from worklist; compute new value; push changes out.
- // This loop is the meat of CCP.
+ // This is the meat of CCP: pull from worklist; compute new value; push changes out.
+
+ // Do the first round. Since all initial types are TOP, this will visit all alive nodes.
while (worklist.size() != 0) {
Node* n = fetch_next_node(worklist);
DEBUG_ONLY(worklist_verify.push(n);)
+ if (needs_revisit(n)) {
+ worklist_revisit.push(n);
+ }
if (n->is_SafePoint()) {
// Make sure safepoints are processed by PhaseCCP::transform even if they are
// not reachable from the bottom. Otherwise, infinite loops would be removed.
_root_and_safepoints.push(n);
}
- const Type* new_type = n->Value(this);
- if (new_type != type(n)) {
- DEBUG_ONLY(verify_type(n, new_type, type(n));)
- dump_type_and_node(n, new_type);
- set_type(n, new_type);
- push_child_nodes_to_worklist(worklist, n);
- }
- if (KillPathsReachableByDeadTypeNode && n->is_Type() && new_type == Type::TOP) {
- // Keep track of Type nodes to kill CFG paths that use Type
- // nodes that become dead.
- _maybe_top_type_nodes.push(n);
- }
+ analyze_step(worklist, n);
}
+
+ // More rounds to catch updates far in the graph.
+ // Revisit nodes that might be able to refine their types at the end of the round.
+ // If so, process these nodes. If there is remaining work, start another round.
+ do {
+ while (worklist.size() != 0) {
+ Node* n = fetch_next_node(worklist);
+ analyze_step(worklist, n);
+ }
+ for (uint t = 0; t < worklist_revisit.size(); t++) {
+ Node* n = worklist_revisit.at(t);
+ analyze_step(worklist, n);
+ }
+ } while (worklist.size() != 0);
+
DEBUG_ONLY(verify_analyze(worklist_verify);)
}
+void PhaseCCP::analyze_step(Unique_Node_List& worklist, Node* n) {
+ const Type* new_type = n->Value(this);
+ if (new_type != type(n)) {
+ DEBUG_ONLY(verify_type(n, new_type, type(n));)
+ dump_type_and_node(n, new_type);
+ set_type(n, new_type);
+ push_child_nodes_to_worklist(worklist, n);
+ }
+ if (KillPathsReachableByDeadTypeNode && n->is_Type() && new_type == Type::TOP) {
+ // Keep track of Type nodes to kill CFG paths that use Type
+ // nodes that become dead.
+ _maybe_top_type_nodes.push(n);
+ }
+}
+
+// Some nodes can refine their types due to type change somewhere deep
+// in the graph. We will need to revisit them before claiming convergence.
+// Add nodes here if particular *Node::Value is doing deep graph traversals
+// not handled by PhaseCCP::push_more_uses().
+bool PhaseCCP::needs_revisit(Node* n) const {
+ // LoadNode performs deep traversals. Load is not notified for changes far away.
+ if (n->is_Load()) {
+ return true;
+ }
+ // CmpPNode performs deep traversals if it compares oopptr. CmpP is not notified for changes far away.
+ if (n->Opcode() == Op_CmpP && type(n->in(1))->isa_oopptr() && type(n->in(2))->isa_oopptr()) {
+ return true;
+ }
+ return false;
+}
+
#ifdef ASSERT
// For every node n on verify list, check if type(n) == n->Value()
-// We have a list of exceptions, see comments in verify_Value_for.
+// Note for CCP the non-convergence can lead to unsound analysis and mis-compilation.
+// Therefore, we are verifying Value convergence strictly.
void PhaseCCP::verify_analyze(Unique_Node_List& worklist_verify) {
bool failure = false;
while (worklist_verify.size()) {
Node* n = worklist_verify.pop();
- failure |= verify_Value_for(n);
+ failure |= verify_Value_for(n, /* strict = */ true);
}
// If we get this assert, check why the reported nodes were not processed again in CCP.
// We should either make sure that these nodes are properly added back to the CCP worklist
- // in PhaseCCP::push_child_nodes_to_worklist() to update their type or add an exception
- // in the verification code above if that is not possible for some reason (like Load nodes).
+ // in PhaseCCP::push_child_nodes_to_worklist() to update their type in the same round,
+ // or that they are added in PhaseCCP::needs_revisit() so that analysis revisits
+ // them at the end of the round.
assert(!failure, "PhaseCCP not at fixpoint: analysis result may be unsound.");
}
#endif
diff --git a/src/hotspot/share/opto/phaseX.hpp b/src/hotspot/share/opto/phaseX.hpp
index 083e77bf6d9..473231e6af5 100644
--- a/src/hotspot/share/opto/phaseX.hpp
+++ b/src/hotspot/share/opto/phaseX.hpp
@@ -490,7 +490,7 @@ public:
void optimize();
#ifdef ASSERT
void verify_optimize();
- bool verify_Value_for(Node* n);
+ bool verify_Value_for(Node* n, bool strict = false);
bool verify_Ideal_for(Node* n, bool can_reshape);
bool verify_Identity_for(Node* n);
void verify_empty_worklist(Node* n);
@@ -659,6 +659,8 @@ class PhaseCCP : public PhaseIterGVN {
// Worklist algorithm identifies constants
void analyze();
+ void analyze_step(Unique_Node_List& worklist, Node* n);
+ bool needs_revisit(Node* n) const;
#ifdef ASSERT
void verify_type(Node* n, const Type* tnew, const Type* told);
// For every node n on verify list, check if type(n) == n->Value()
diff --git a/src/hotspot/share/opto/predicates.cpp b/src/hotspot/share/opto/predicates.cpp
index 208bd6583c5..2489ff563a9 100644
--- a/src/hotspot/share/opto/predicates.cpp
+++ b/src/hotspot/share/opto/predicates.cpp
@@ -198,12 +198,21 @@ TemplateAssertionPredicate TemplateAssertionPredicate::clone_and_replace_opaque_
Node* new_opaque_input,
CountedLoopNode* new_loop_node,
PhaseIdealLoop* phase) const {
- DEBUG_ONLY(verify();)
OpaqueLoopInitNode* new_opaque_init = new OpaqueLoopInitNode(phase->C, new_opaque_input);
phase->register_new_node(new_opaque_init, new_control);
+ return clone_and_replace_init(new_control, new_opaque_init, new_loop_node, phase);
+}
+
+// Clone this Template Assertion Predicate and replace the old OpaqueLoopInit node with 'new_init'.
+// Note: 'new_init' could also have the 'OpaqueLoopInit` as parent node further up.
+TemplateAssertionPredicate TemplateAssertionPredicate::clone_and_replace_init(Node* new_control,
+ Node* new_init,
+ CountedLoopNode* new_loop_node,
+ PhaseIdealLoop* phase) const {
+ DEBUG_ONLY(verify();)
TemplateAssertionExpression template_assertion_expression(opaque_node(), phase);
OpaqueTemplateAssertionPredicateNode* new_opaque_node =
- template_assertion_expression.clone_and_replace_init(new_control, new_opaque_init, new_loop_node);
+ template_assertion_expression.clone_and_replace_init(new_control, new_init, new_loop_node);
AssertionPredicateIfCreator assertion_predicate_if_creator(phase);
IfTrueNode* success_proj = assertion_predicate_if_creator.create_for_template(new_control, _if_node->Opcode(),
new_opaque_node,
@@ -238,8 +247,40 @@ class ReplaceOpaqueStrideInput : public BFSActions {
return node->is_OpaqueLoopStride();
}
- void target_node_action(Node* target_node) override {
- _igvn.replace_input_of(target_node, 1, _new_opaque_stride_input);
+ void target_node_action(Node* child, uint i) override {
+ assert(child->in(i)->is_OpaqueLoopStride(), "must be OpaqueLoopStride");
+ _igvn.replace_input_of(child->in(i), 1, _new_opaque_stride_input);
+ }
+};
+
+// This class is used to replace the OpaqueLoopInitNode with a new node while leaving the other nodes
+// unchanged.
+class ReplaceOpaqueInitNode : public BFSActions {
+ Node* _new_opaque_init_node;
+ PhaseIterGVN& _igvn;
+
+ public:
+ ReplaceOpaqueInitNode(Node* new_opaque_init_node, PhaseIterGVN& igvn)
+ : _new_opaque_init_node(new_opaque_init_node),
+ _igvn(igvn) {}
+ NONCOPYABLE(ReplaceOpaqueInitNode);
+
+ void replace_for(OpaqueTemplateAssertionPredicateNode* opaque_node) {
+ DataNodeBFS bfs(*this);
+ bfs.run(opaque_node);
+ }
+
+ bool should_visit(Node* node) const override {
+ return TemplateAssertionExpressionNode::is_maybe_in_expression(node);
+ }
+
+ bool is_target_node(Node* node) const override {
+ return node->is_OpaqueLoopInit();
+ }
+
+ void target_node_action(Node* child, uint i) override {
+ assert(child->in(i)->is_OpaqueLoopInit(), "must be old OpaqueLoopInit");
+ _igvn.replace_input_of(child, i, _new_opaque_init_node);
}
};
@@ -250,6 +291,13 @@ void TemplateAssertionPredicate::replace_opaque_stride_input(Node* new_stride, P
replace_opaque_stride_input.replace_for(opaque_node());
}
+// Replace the OpaqueLoopInitNode with 'new_init' and leave the other nodes unchanged.
+void TemplateAssertionPredicate::replace_opaque_init_node(Node* new_init, PhaseIterGVN& igvn) const {
+ DEBUG_ONLY(verify();)
+ ReplaceOpaqueInitNode replace_opaque_init_node(new_init, igvn);
+ replace_opaque_init_node.replace_for(opaque_node());
+}
+
// Create a new Initialized Assertion Predicate from this template at the template success projection.
InitializedAssertionPredicate TemplateAssertionPredicate::initialize(PhaseIdealLoop* phase) const {
DEBUG_ONLY(verify();)
@@ -308,7 +356,8 @@ class OpaqueLoopNodesVerifier : public BFSActions {
return node->is_Opaque1();
}
- void target_node_action(Node* target_node) override {
+ void target_node_action(Node* child, uint i) override {
+ Node* target_node = child->in(i);
if (target_node->is_OpaqueLoopInit()) {
assert(!_found_init, "should only find one OpaqueLoopInitNode");
_found_init = true;
@@ -1094,6 +1143,18 @@ void ClonePredicateToTargetLoop::clone_template_assertion_predicate(
_target_loop_predicate_chain.insert_predicate(cloned_template_assertion_predicate);
}
+// Clones the provided Template Assertion Predicate to the head of the current predicate chain at the target loop and
+// replaces the current OpaqueLoopInit with 'new_init'.
+// Note: 'new_init' could also have the 'OpaqueLoopInit` as parent node further up.
+void ClonePredicateToTargetLoop::clone_template_assertion_predicate_and_replace_init(
+ const TemplateAssertionPredicate& template_assertion_predicate, Node* new_init) {
+ TemplateAssertionPredicate cloned_template_assertion_predicate =
+ template_assertion_predicate.clone_and_replace_init(_old_target_loop_entry, new_init, _target_loop_head->as_CountedLoop(), _phase);
+ template_assertion_predicate.rewire_loop_data_dependencies(cloned_template_assertion_predicate.tail(),
+ _node_in_loop_body, _phase);
+ _target_loop_predicate_chain.insert_predicate(cloned_template_assertion_predicate);
+}
+
CloneUnswitchedLoopPredicatesVisitor::CloneUnswitchedLoopPredicatesVisitor(
LoopNode* true_path_loop_head, LoopNode* false_path_loop_head,
const NodeInOriginalLoopBody& node_in_true_path_loop_body, const NodeInClonedLoopBody& node_in_false_path_loop_body,
@@ -1182,6 +1243,10 @@ void UpdateStrideForAssertionPredicates::connect_initialized_assertion_predicate
}
}
+void UpdateInitForTemplateAssertionPredicates::visit(const TemplateAssertionPredicate& template_assertion_predicate) {
+ template_assertion_predicate.replace_opaque_init_node(_new_init, _phase->igvn());
+}
+
// Do the following to find and eliminate useless Parse and Template Assertion Predicates:
// 1. Mark all Parse and Template Assertion Predicates "maybe useful".
// 2. Walk through the loop tree and iterate over all Predicates above each loop head. All found Parse and Template
diff --git a/src/hotspot/share/opto/predicates.hpp b/src/hotspot/share/opto/predicates.hpp
index 32b1c1cd3c4..cd0832cc062 100644
--- a/src/hotspot/share/opto/predicates.hpp
+++ b/src/hotspot/share/opto/predicates.hpp
@@ -438,7 +438,10 @@ class TemplateAssertionPredicate : public Predicate {
TemplateAssertionPredicate clone(Node* new_control, CountedLoopNode* new_loop_node, PhaseIdealLoop* phase) const;
TemplateAssertionPredicate clone_and_replace_opaque_input(Node* new_control, Node* new_opaque_input,
CountedLoopNode* new_loop_node, PhaseIdealLoop* phase) const;
+ TemplateAssertionPredicate clone_and_replace_init(Node* new_control, Node* new_input,
+ CountedLoopNode* new_loop_node, PhaseIdealLoop* phase) const;
void replace_opaque_stride_input(Node* new_stride, PhaseIterGVN& igvn) const;
+ void replace_opaque_init_node(Node* new_init, PhaseIterGVN& igvn) const;
InitializedAssertionPredicate initialize(PhaseIdealLoop* phase) const;
void rewire_loop_data_dependencies(IfTrueNode* target_predicate, const NodeInLoopBody& data_in_loop_body,
const PhaseIdealLoop* phase) const;
@@ -1228,6 +1231,7 @@ public:
}
void clone_template_assertion_predicate(const TemplateAssertionPredicate& template_assertion_predicate);
+ void clone_template_assertion_predicate_and_replace_init(const TemplateAssertionPredicate& template_assertion_predicate, Node* new_init);
};
// Visitor to clone Parse and Template Assertion Predicates from a loop to its unswitched true and false path loop.
@@ -1300,6 +1304,22 @@ class UpdateStrideForAssertionPredicates : public PredicateVisitor {
void visit(const InitializedAssertionPredicate& initialized_assertion_predicate) override;
};
+// This visitor replaces the OpaqueLoopInitNode for an Assertion Predicate with the expression passed as input.
+class UpdateInitForTemplateAssertionPredicates : public PredicateVisitor {
+ Node* const _new_init;
+ PhaseIdealLoop* const _phase;
+
+public:
+ UpdateInitForTemplateAssertionPredicates(Node* const new_init, PhaseIdealLoop* phase)
+ : _new_init(new_init),
+ _phase(phase) {}
+ NONCOPYABLE(UpdateInitForTemplateAssertionPredicates);
+
+ using PredicateVisitor::visit;
+
+ void visit(const TemplateAssertionPredicate& template_assertion_predicate) override;
+};
+
// Eliminate all useless Parse and Template Assertion Predicates. They become useless when they can no longer be found
// from a loop head. We mark these useless to clean them up later during IGVN. A Predicate that is marked useless will
// no longer be visited by a PredicateVisitor.
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
index 96fee925e5d..ecb8c2c1cd8 100644
--- a/src/hotspot/share/opto/type.cpp
+++ b/src/hotspot/share/opto/type.cpp
@@ -45,6 +45,8 @@
#include "opto/type.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/checkedCast.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/ostream.hpp"
#include "utilities/powerOfTwo.hpp"
#include "utilities/stringUtils.hpp"
@@ -2979,15 +2981,22 @@ const char *const TypePtr::ptr_msg[TypePtr::lastPTR] = {
#ifndef PRODUCT
void TypePtr::dump2( Dict &d, uint depth, outputStream *st ) const {
- if( _ptr == Null ) st->print("null");
- else st->print("%s *", ptr_msg[_ptr]);
- if( _offset == OffsetTop ) st->print("+top");
- else if( _offset == OffsetBot ) st->print("+bot");
- else if( _offset ) st->print("+%d", _offset);
+ st->print("ptr:%s", ptr_msg[_ptr]);
+ dump_offset(st);
dump_inline_depth(st);
dump_speculative(st);
}
+void TypePtr::dump_offset(outputStream* st) const {
+ if (_offset == OffsetBot) {
+ st->print("+bot");
+ } else if (_offset == OffsetTop) {
+ st->print("+top");
+ } else {
+ st->print("+%d", _offset);
+ }
+}
+
/**
*dump the speculative part of the type
*/
@@ -3159,11 +3168,12 @@ uint TypeRawPtr::hash(void) const {
//------------------------------dump2------------------------------------------
#ifndef PRODUCT
-void TypeRawPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
- if( _ptr == Constant )
- st->print(INTPTR_FORMAT, p2i(_bits));
- else
+void TypeRawPtr::dump2(Dict& d, uint depth, outputStream* st) const {
+ if (_ptr == Constant) {
+ st->print("rawptr:Constant:" INTPTR_FORMAT, p2i(_bits));
+ } else {
st->print("rawptr:%s", ptr_msg[_ptr]);
+ }
}
#endif
@@ -3798,24 +3808,29 @@ uint TypeOopPtr::hash(void) const {
//------------------------------dump2------------------------------------------
#ifndef PRODUCT
-void TypeOopPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+void TypeOopPtr::dump2(Dict& d, uint depth, outputStream* st) const {
st->print("oopptr:%s", ptr_msg[_ptr]);
- if( _klass_is_exact ) st->print(":exact");
- if( const_oop() ) st->print(INTPTR_FORMAT, p2i(const_oop()));
- switch( _offset ) {
- case OffsetTop: st->print("+top"); break;
- case OffsetBot: st->print("+any"); break;
- case 0: break;
- default: st->print("+%d",_offset); break;
+ if (_klass_is_exact) {
+ st->print(":exact");
}
- if (_instance_id == InstanceTop)
- st->print(",iid=top");
- else if (_instance_id != InstanceBot)
- st->print(",iid=%d",_instance_id);
-
+ if (const_oop() != nullptr) {
+ st->print(":" INTPTR_FORMAT, p2i(const_oop()));
+ }
+ dump_offset(st);
+ dump_instance_id(st);
dump_inline_depth(st);
dump_speculative(st);
}
+
+void TypeOopPtr::dump_instance_id(outputStream* st) const {
+ if (_instance_id == InstanceTop) {
+ st->print(",iid=top");
+ } else if (_instance_id == InstanceBot) {
+ st->print(",iid=bot");
+ } else {
+ st->print(",iid=%d", _instance_id);
+ }
+}
#endif
//------------------------------singleton--------------------------------------
@@ -4453,50 +4468,30 @@ bool TypeInstPtr::maybe_java_subtype_of_helper(const TypeOopPtr* other, bool thi
#ifndef PRODUCT
void TypeInstPtr::dump2(Dict &d, uint depth, outputStream* st) const {
// Print the name of the klass.
+ st->print("instptr:");
klass()->print_name_on(st);
_interfaces->dump(st);
- switch( _ptr ) {
- case Constant:
- if (WizardMode || Verbose) {
- ResourceMark rm;
- stringStream ss;
+ if (_ptr == Constant && (WizardMode || Verbose)) {
+ ResourceMark rm;
+ stringStream ss;
- st->print(" ");
- const_oop()->print_oop(&ss);
- // 'const_oop->print_oop()' may emit newlines('\n') into ss.
- // suppress newlines from it so -XX:+Verbose -XX:+PrintIdeal dumps one-liner for each node.
- char* buf = ss.as_string(/* c_heap= */false);
- StringUtils::replace_no_expand(buf, "\n", "");
- st->print_raw(buf);
- }
- case BotPTR:
- if (!WizardMode && !Verbose) {
- if( _klass_is_exact ) st->print(":exact");
- break;
- }
- case TopPTR:
- case AnyNull:
- case NotNull:
- st->print(":%s", ptr_msg[_ptr]);
- if( _klass_is_exact ) st->print(":exact");
- break;
- default:
- break;
+ st->print(" ");
+ const_oop()->print_oop(&ss);
+ // 'const_oop->print_oop()' may emit newlines('\n') into ss.
+ // suppress newlines from it so -XX:+Verbose -XX:+PrintIdeal dumps one-liner for each node.
+ char* buf = ss.as_string(/* c_heap= */false);
+ StringUtils::replace_no_expand(buf, "\n", "");
+ st->print_raw(buf);
}
- if( _offset ) { // Dump offset, if any
- if( _offset == OffsetBot ) st->print("+any");
- else if( _offset == OffsetTop ) st->print("+unknown");
- else st->print("+%d", _offset);
+ st->print(":%s", ptr_msg[_ptr]);
+ if (_klass_is_exact) {
+ st->print(":exact");
}
- st->print(" *");
- if (_instance_id == InstanceTop)
- st->print(",iid=top");
- else if (_instance_id != InstanceBot)
- st->print(",iid=%d",_instance_id);
-
+ dump_offset(st);
+ dump_instance_id(st);
dump_inline_depth(st);
dump_speculative(st);
}
@@ -5089,26 +5084,17 @@ const Type *TypeAryPtr::xdual() const {
//------------------------------dump2------------------------------------------
#ifndef PRODUCT
void TypeAryPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
- _ary->dump2(d,depth,st);
+ st->print("aryptr:");
+ _ary->dump2(d, depth, st);
_interfaces->dump(st);
- switch( _ptr ) {
- case Constant:
+ if (_ptr == Constant) {
const_oop()->print(st);
- break;
- case BotPTR:
- if (!WizardMode && !Verbose) {
- if( _klass_is_exact ) st->print(":exact");
- break;
- }
- case TopPTR:
- case AnyNull:
- case NotNull:
- st->print(":%s", ptr_msg[_ptr]);
- if( _klass_is_exact ) st->print(":exact");
- break;
- default:
- break;
+ }
+
+ st->print(":%s", ptr_msg[_ptr]);
+ if (_klass_is_exact) {
+ st->print(":exact");
}
if( _offset != 0 ) {
@@ -5126,12 +5112,8 @@ void TypeAryPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
}
}
}
- st->print(" *");
- if (_instance_id == InstanceTop)
- st->print(",iid=top");
- else if (_instance_id != InstanceBot)
- st->print(",iid=%d",_instance_id);
+ dump_instance_id(st);
dump_inline_depth(st);
dump_speculative(st);
}
@@ -5490,13 +5472,10 @@ const Type *TypeMetadataPtr::xdual() const {
#ifndef PRODUCT
void TypeMetadataPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
st->print("metadataptr:%s", ptr_msg[_ptr]);
- if( metadata() ) st->print(INTPTR_FORMAT, p2i(metadata()));
- switch( _offset ) {
- case OffsetTop: st->print("+top"); break;
- case OffsetBot: st->print("+any"); break;
- case 0: break;
- default: st->print("+%d",_offset); break;
+ if (metadata() != nullptr) {
+ st->print(":" INTPTR_FORMAT, p2i(metadata()));
}
+ dump_offset(st);
}
#endif
@@ -5644,44 +5623,6 @@ intptr_t TypeKlassPtr::get_con() const {
return (intptr_t)k->constant_encoding();
}
-//------------------------------dump2------------------------------------------
-// Dump Klass Type
-#ifndef PRODUCT
-void TypeKlassPtr::dump2(Dict & d, uint depth, outputStream *st) const {
- switch(_ptr) {
- case Constant:
- st->print("precise ");
- case NotNull:
- {
- const char *name = klass()->name()->as_utf8();
- if (name) {
- st->print("%s: " INTPTR_FORMAT, name, p2i(klass()));
- } else {
- ShouldNotReachHere();
- }
- _interfaces->dump(st);
- }
- case BotPTR:
- if (!WizardMode && !Verbose && _ptr != Constant) break;
- case TopPTR:
- case AnyNull:
- st->print(":%s", ptr_msg[_ptr]);
- if (_ptr == Constant) st->print(":exact");
- break;
- default:
- break;
- }
-
- if (_offset) { // Dump offset, if any
- if (_offset == OffsetBot) { st->print("+any"); }
- else if (_offset == OffsetTop) { st->print("+unknown"); }
- else { st->print("+%d", _offset); }
- }
-
- st->print(" *");
-}
-#endif
-
//=============================================================================
// Convenience common pre-built types.
@@ -6036,6 +5977,15 @@ const TypeKlassPtr* TypeInstKlassPtr::try_improve() const {
return this;
}
+#ifndef PRODUCT
+void TypeInstKlassPtr::dump2(Dict& d, uint depth, outputStream* st) const {
+ st->print("instklassptr:");
+ klass()->print_name_on(st);
+ _interfaces->dump(st);
+ st->print(":%s", ptr_msg[_ptr]);
+ dump_offset(st);
+}
+#endif // PRODUCT
const TypeAryKlassPtr *TypeAryKlassPtr::make(PTR ptr, const Type* elem, ciKlass* k, int offset) {
return (TypeAryKlassPtr*)(new TypeAryKlassPtr(ptr, elem, k, offset))->hashcons();
@@ -6507,34 +6457,11 @@ ciKlass* TypeAryKlassPtr::klass() const {
// Dump Klass Type
#ifndef PRODUCT
void TypeAryKlassPtr::dump2( Dict & d, uint depth, outputStream *st ) const {
- switch( _ptr ) {
- case Constant:
- st->print("precise ");
- case NotNull:
- {
- st->print("[");
- _elem->dump2(d, depth, st);
- _interfaces->dump(st);
- st->print(": ");
- }
- case BotPTR:
- if( !WizardMode && !Verbose && _ptr != Constant ) break;
- case TopPTR:
- case AnyNull:
- st->print(":%s", ptr_msg[_ptr]);
- if( _ptr == Constant ) st->print(":exact");
- break;
- default:
- break;
- }
-
- if( _offset ) { // Dump offset, if any
- if( _offset == OffsetBot ) { st->print("+any"); }
- else if( _offset == OffsetTop ) { st->print("+unknown"); }
- else { st->print("+%d", _offset); }
- }
-
- st->print(" *");
+ st->print("aryklassptr:[");
+ _elem->dump2(d, depth, st);
+ _interfaces->dump(st);
+ st->print(":%s", ptr_msg[_ptr]);
+ dump_offset(st);
}
#endif
diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
index c61c2a64278..4666cfbcf2d 100644
--- a/src/hotspot/share/opto/type.hpp
+++ b/src/hotspot/share/opto/type.hpp
@@ -1176,15 +1176,15 @@ protected:
int hash_speculative() const;
const TypePtr* add_offset_speculative(intptr_t offset) const;
const TypePtr* with_offset_speculative(intptr_t offset) const;
-#ifndef PRODUCT
- void dump_speculative(outputStream *st) const;
-#endif
// utility methods to work on the inline depth of the type
int dual_inline_depth() const;
int meet_inline_depth(int depth) const;
+
#ifndef PRODUCT
- void dump_inline_depth(outputStream *st) const;
+ void dump_speculative(outputStream* st) const;
+ void dump_inline_depth(outputStream* st) const;
+ void dump_offset(outputStream* st) const;
#endif
// TypeInstPtr (TypeAryPtr resp.) and TypeInstKlassPtr (TypeAryKlassPtr resp.) implement very similar meet logic.
@@ -1364,6 +1364,10 @@ protected:
virtual ciKlass* exact_klass_helper() const { return nullptr; }
virtual ciKlass* klass() const { return _klass; }
+#ifndef PRODUCT
+ void dump_instance_id(outputStream* st) const;
+#endif // PRODUCT
+
public:
bool is_java_subtype_of(const TypeOopPtr* other) const {
@@ -1832,9 +1836,6 @@ public:
virtual const TypeKlassPtr* try_improve() const { return this; }
-#ifndef PRODUCT
- virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
-#endif
private:
virtual bool is_meet_subtype_of(const TypePtr* other) const {
return is_meet_subtype_of_helper(other->is_klassptr(), klass_is_exact(), other->is_klassptr()->klass_is_exact());
@@ -1914,6 +1915,11 @@ public:
// Convenience common pre-built types.
static const TypeInstKlassPtr* OBJECT; // Not-null object klass or below
static const TypeInstKlassPtr* OBJECT_OR_NULL; // Maybe-null version of same
+
+#ifndef PRODUCT
+ virtual void dump2(Dict& d, uint depth, outputStream* st) const;
+#endif // PRODUCT
+
private:
virtual bool is_meet_subtype_of_helper(const TypeKlassPtr* other, bool this_xk, bool other_xk) const;
};
diff --git a/src/hotspot/share/opto/vectorization.cpp b/src/hotspot/share/opto/vectorization.cpp
index 98f3d79c9f5..15b2df663b6 100644
--- a/src/hotspot/share/opto/vectorization.cpp
+++ b/src/hotspot/share/opto/vectorization.cpp
@@ -1022,27 +1022,39 @@ bool VPointer::can_make_speculative_aliasing_check_with(const VPointer& other) c
// or at the multiversion_if. That is before the pre-loop. From the construction of
// VPointer, we already know that all its variables (except iv) are pre-loop invariant.
//
- // For the computation of main_init, we also need the pre_limit, and so we need
- // to check that this value is pre-loop invariant. In the case of non-equal iv_scales,
- // we also need the main_limit in the aliasing check, and so this value must then
- // also be pre-loop invariant.
+ // In VPointer::make_speculative_aliasing_check_with we compute main_init in all
+ // cases. For this, we require pre_init and pre_limit. These values must be available
+ // for the speculative check, i.e. their control must dominate the speculative check.
+ // Further, "if vp1.iv_scale() != vp2.iv_scale()" we additionally need to have
+ // main_limit available for the speculative check.
+ // Note: no matter if the speculative check is inserted as a predicate or at the
+ // multiversion if, the speculative check happens before (dominates) the
+ // pre-loop.
+ Node* pre_init = _vloop.pre_loop_end()->init_trip();
Opaque1Node* pre_limit_opaq = _vloop.pre_loop_end()->limit()->as_Opaque1();
Node* pre_limit = pre_limit_opaq->in(1);
Node* main_limit = _vloop.cl()->limit();
-
- if (!_vloop.is_pre_loop_invariant(pre_limit)) {
+ if (!_vloop.is_available_for_speculative_check(pre_init)) {
#ifdef ASSERT
if (_vloop.is_trace_speculative_aliasing_analysis()) {
- tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not pre-loop independent!");
+ tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not available at speculative check!");
+ }
+#endif
+ return false;
+ }
+ if (!_vloop.is_available_for_speculative_check(pre_limit)) {
+#ifdef ASSERT
+ if (_vloop.is_trace_speculative_aliasing_analysis()) {
+ tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not available at speculative check!");
}
#endif
return false;
}
- if (vp1.iv_scale() != vp2.iv_scale() && !_vloop.is_pre_loop_invariant(main_limit)) {
+ if (vp1.iv_scale() != vp2.iv_scale() && !_vloop.is_available_for_speculative_check(main_limit)) {
#ifdef ASSERT
if (_vloop.is_trace_speculative_aliasing_analysis()) {
- tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: main_limit is not pre-loop independent!");
+ tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: main_limit is not available at speculative check!");
}
#endif
return false;
@@ -1119,6 +1131,8 @@ BoolNode* VPointer::make_speculative_aliasing_check_with(const VPointer& other,
Node* pre_limit = pre_limit_opaq->in(1);
assert(_vloop.is_pre_loop_invariant(pre_init), "needed for aliasing check before pre-loop");
assert(_vloop.is_pre_loop_invariant(pre_limit), "needed for aliasing check before pre-loop");
+ assert(_vloop.is_available_for_speculative_check(pre_init), "ctrl must be early enough to avoid cycles");
+ assert(_vloop.is_available_for_speculative_check(pre_limit), "ctrl must be early enough to avoid cycles");
Node* pre_initL = new ConvI2LNode(pre_init);
Node* pre_limitL = new ConvI2LNode(pre_limit);
@@ -1180,6 +1194,7 @@ BoolNode* VPointer::make_speculative_aliasing_check_with(const VPointer& other,
jint main_iv_stride = _vloop.iv_stride();
Node* main_limit = _vloop.cl()->limit();
assert(_vloop.is_pre_loop_invariant(main_limit), "needed for aliasing check before pre-loop");
+ assert(_vloop.is_available_for_speculative_check(main_limit), "ctrl must be early enough to avoid cycles");
Node* main_limitL = new ConvI2LNode(main_limit);
phase->register_new_node_with_ctrl_of(main_limitL, pre_init);
diff --git a/src/hotspot/share/opto/vectorization.hpp b/src/hotspot/share/opto/vectorization.hpp
index f7099b5b7c0..aacd406f798 100644
--- a/src/hotspot/share/opto/vectorization.hpp
+++ b/src/hotspot/share/opto/vectorization.hpp
@@ -236,6 +236,8 @@ public:
// Some nodes must be pre-loop invariant, so that they can be used for conditions
// before or inside the pre-loop. For example, alignment of main-loop vector
// memops must be achieved in the pre-loop, via the exit check in the pre-loop.
+ // Note: this condition is NOT strong enough for speculative checks, those happen
+ // before the pre-loop. See is_available_for_speculative_check
bool is_pre_loop_invariant(Node* n) const {
// Must be in the main-loop, otherwise we can't access the pre-loop.
// This fails during SuperWord::unrolling_analysis, but that is ok.
@@ -257,6 +259,28 @@ public:
return is_before_pre_loop(early);
}
+ // Nodes that are to be used in speculative checks must be available early enough.
+ // Note: the speculative check happens before the pre-loop, either at the auto
+ // vectorization predicate or the multiversion if. This is before the
+ // pre-loop, and thus the condition here is stronger then the one from
+ // is_pre_loop_invariant.
+ bool is_available_for_speculative_check(Node* n) const {
+ assert(are_speculative_checks_possible(), "meaningless without speculative check");
+ ParsePredicateSuccessProj* parse_predicate_proj = auto_vectorization_parse_predicate_proj();
+ // Find the control of the predicate:
+ ProjNode* proj = (parse_predicate_proj != nullptr) ? parse_predicate_proj : multiversioning_fast_proj();
+ Node* check_ctrl = proj->in(0)->as_If()->in(0);
+
+ // Often, the control of n already dominates that of the predicate.
+ Node* n_ctrl = phase()->get_ctrl(n);
+ if (phase()->is_dominator(n_ctrl, check_ctrl)) { return true; }
+
+ // But in some cases, the ctrl of n is after that of the predicate,
+ // but the early ctrl is before the predicate.
+ Node* n_early = phase()->compute_early_ctrl(n, n_ctrl);
+ return phase()->is_dominator(n_early, check_ctrl);
+ }
+
// Check if the loop passes some basic preconditions for vectorization.
// Return indicates if analysis succeeded.
bool check_preconditions();
diff --git a/src/hotspot/share/prims/jvmtiClassFileReconstituter.cpp b/src/hotspot/share/prims/jvmtiClassFileReconstituter.cpp
index a441d405f8d..5077a1743b9 100644
--- a/src/hotspot/share/prims/jvmtiClassFileReconstituter.cpp
+++ b/src/hotspot/share/prims/jvmtiClassFileReconstituter.cpp
@@ -25,6 +25,7 @@
#include "classfile/symbolTable.hpp"
#include "interpreter/bytecodeStream.hpp"
#include "memory/universe.hpp"
+#include "oops/bsmAttribute.inline.hpp"
#include "oops/constantPool.inline.hpp"
#include "oops/fieldStreams.inline.hpp"
#include "oops/instanceKlass.inline.hpp"
@@ -389,20 +390,13 @@ void JvmtiClassFileReconstituter::write_annotations_attribute(const char* attr_n
// } bootstrap_methods[num_bootstrap_methods];
// }
void JvmtiClassFileReconstituter::write_bootstrapmethod_attribute() {
- Array* operands = cpool()->operands();
write_attribute_name_index("BootstrapMethods");
- int num_bootstrap_methods = ConstantPool::operand_array_length(operands);
-
- // calculate length of attribute
- u4 length = sizeof(u2); // num_bootstrap_methods
- for (int n = 0; n < num_bootstrap_methods; n++) {
- u2 num_bootstrap_arguments = cpool()->bsm_attribute_entry(n)->argument_count();
- length += sizeof(u2); // bootstrap_method_ref
- length += sizeof(u2); // num_bootstrap_arguments
- length += (u4)sizeof(u2) * num_bootstrap_arguments; // bootstrap_arguments[num_bootstrap_arguments]
- }
+ u4 length = sizeof(u2) + // Size of num_bootstrap_methods
+ // The rest of the data for the attribute is exactly the u2s in the data array.
+ sizeof(u2) * cpool()->bsm_entries().array_length();
write_u4(length);
+ int num_bootstrap_methods = cpool()->bsm_entries().number_of_entries();
// write attribute
write_u2(checked_cast(num_bootstrap_methods));
for (int n = 0; n < num_bootstrap_methods; n++) {
@@ -411,7 +405,7 @@ void JvmtiClassFileReconstituter::write_bootstrapmethod_attribute() {
write_u2(bsme->bootstrap_method_index());
write_u2(num_bootstrap_arguments);
for (int arg = 0; arg < num_bootstrap_arguments; arg++) {
- u2 bootstrap_argument = bsme->argument_index(arg);
+ u2 bootstrap_argument = bsme->argument(arg);
write_u2(bootstrap_argument);
}
}
@@ -798,7 +792,7 @@ void JvmtiClassFileReconstituter::write_class_attributes() {
if (type_anno != nullptr) {
++attr_count; // has RuntimeVisibleTypeAnnotations attribute
}
- if (cpool()->operands() != nullptr) {
+ if (!cpool()->bsm_entries().is_empty()) {
++attr_count;
}
if (ik()->nest_host_index() != 0) {
@@ -843,7 +837,7 @@ void JvmtiClassFileReconstituter::write_class_attributes() {
if (ik()->record_components() != nullptr) {
write_record_attribute();
}
- if (cpool()->operands() != nullptr) {
+ if (!cpool()->bsm_entries().is_empty()) {
write_bootstrapmethod_attribute();
}
if (inner_classes_length > 0) {
diff --git a/src/hotspot/share/prims/jvmtiRedefineClasses.cpp b/src/hotspot/share/prims/jvmtiRedefineClasses.cpp
index ef8875d582e..13b239b4df0 100644
--- a/src/hotspot/share/prims/jvmtiRedefineClasses.cpp
+++ b/src/hotspot/share/prims/jvmtiRedefineClasses.cpp
@@ -45,7 +45,8 @@
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "oops/annotations.hpp"
-#include "oops/constantPool.hpp"
+#include "oops/bsmAttribute.inline.hpp"
+#include "oops/constantPool.inline.hpp"
#include "oops/fieldStreams.inline.hpp"
#include "oops/klass.inline.hpp"
#include "oops/klassVtable.hpp"
@@ -573,9 +574,9 @@ void VM_RedefineClasses::append_entry(const constantPoolHandle& scratch_cp,
case JVM_CONSTANT_Dynamic: // fall through
case JVM_CONSTANT_InvokeDynamic:
{
- // Index of the bootstrap specifier in the operands array
+ // Index of the bootstrap specifier in the BSM array
int old_bs_i = scratch_cp->bootstrap_methods_attribute_index(scratch_i);
- int new_bs_i = find_or_append_operand(scratch_cp, old_bs_i, merge_cp_p,
+ int new_bs_i = find_or_append_bsm_entry(scratch_cp, old_bs_i, merge_cp_p,
merge_cp_length_p);
// The bootstrap method NameAndType_info index
int old_ref_i = scratch_cp->bootstrap_name_and_type_ref_index_at(scratch_i);
@@ -591,10 +592,11 @@ void VM_RedefineClasses::append_entry(const constantPoolHandle& scratch_cp,
("Dynamic entry@%d name_and_type_index change: %d to %d", *merge_cp_length_p, old_ref_i, new_ref_i);
}
- if (scratch_cp->tag_at(scratch_i).is_dynamic_constant())
+ if (scratch_cp->tag_at(scratch_i).is_dynamic_constant()) {
(*merge_cp_p)->dynamic_constant_at_put(*merge_cp_length_p, new_bs_i, new_ref_i);
- else
+ } else {
(*merge_cp_p)->invoke_dynamic_at_put(*merge_cp_length_p, new_bs_i, new_ref_i);
+ }
if (scratch_i != *merge_cp_length_p) {
// The new entry in *merge_cp_p is at a different index than
// the new entry in scratch_cp so we need to map the index values.
@@ -660,10 +662,10 @@ u2 VM_RedefineClasses::find_or_append_indirect_entry(const constantPoolHandle& s
} // end find_or_append_indirect_entry()
-// Append a bootstrap specifier into the merge_cp operands that is semantically equal
-// to the scratch_cp operands bootstrap specifier passed by the old_bs_i index.
+// Append a bootstrap specifier into the merge_cp BSM entries that is semantically equal
+// to the scratch_cp BSM entries' bootstrap specifier passed by the old_bs_i index.
// Recursively append new merge_cp entries referenced by the new bootstrap specifier.
-void VM_RedefineClasses::append_operand(const constantPoolHandle& scratch_cp, const int old_bs_i,
+int VM_RedefineClasses::append_bsm_entry(const constantPoolHandle& scratch_cp, const int old_bs_i,
constantPoolHandle *merge_cp_p, int *merge_cp_length_p) {
BSMAttributeEntry* old_bsme = scratch_cp->bsm_attribute_entry(old_bs_i);
@@ -672,90 +674,82 @@ void VM_RedefineClasses::append_operand(const constantPoolHandle& scratch_cp, co
merge_cp_length_p);
if (new_ref_i != old_ref_i) {
log_trace(redefine, class, constantpool)
- ("operands entry@%d bootstrap method ref_index change: %d to %d", _operands_cur_length, old_ref_i, new_ref_i);
+ ("BSM attribute entry@%d bootstrap method ref_index change: %d to %d", _bsmae_iter.current_offset() - 1, old_ref_i, new_ref_i);
}
- Array* merge_ops = (*merge_cp_p)->operands();
- int new_bs_i = _operands_cur_length;
- // We have _operands_cur_length == 0 when the merge_cp operands is empty yet.
- // However, the operand_offset_at(0) was set in the extend_operands() call.
- int new_base = (new_bs_i == 0) ? (*merge_cp_p)->operand_offset_at(0)
- : (*merge_cp_p)->operand_next_offset_at(new_bs_i - 1);
- u2 argc = old_bsme->argument_count();
-
- ConstantPool::operand_offset_at_put(merge_ops, _operands_cur_length, new_base);
- merge_ops->at_put(new_base++, new_ref_i);
- merge_ops->at_put(new_base++, argc);
-
- for (int i = 0; i < argc; i++) {
- u2 old_arg_ref_i = old_bsme->argument_index(i);
+ const int new_bs_i = _bsmae_iter.current_offset();
+ BSMAttributeEntry* new_bsme =
+ _bsmae_iter.reserve_new_entry(new_ref_i, old_bsme->argument_count());
+ assert(new_bsme != nullptr, "must be");
+ for (int i = 0; i < new_bsme->argument_count(); i++) {
+ u2 old_arg_ref_i = old_bsme->argument(i);
u2 new_arg_ref_i = find_or_append_indirect_entry(scratch_cp, old_arg_ref_i, merge_cp_p,
merge_cp_length_p);
- merge_ops->at_put(new_base++, new_arg_ref_i);
+ new_bsme->set_argument(i, new_arg_ref_i);
+
if (new_arg_ref_i != old_arg_ref_i) {
log_trace(redefine, class, constantpool)
- ("operands entry@%d bootstrap method argument ref_index change: %d to %d",
- _operands_cur_length, old_arg_ref_i, new_arg_ref_i);
+ ("BSM attribute entry@%d bootstrap method argument ref_index change: %d to %d",
+ _bsmae_iter.current_offset() - 1, old_arg_ref_i, new_arg_ref_i);
}
}
- if (old_bs_i != _operands_cur_length) {
- // The bootstrap specifier in *merge_cp_p is at a different index than
- // that in scratch_cp so we need to map the index values.
- map_operand_index(old_bs_i, new_bs_i);
- }
- _operands_cur_length++;
-} // end append_operand()
+ // This is only for the logging
+ map_bsm_index(old_bs_i, new_bs_i);
+ return new_bs_i;
+} // end append_bsm_entry()
-int VM_RedefineClasses::find_or_append_operand(const constantPoolHandle& scratch_cp,
+int VM_RedefineClasses::find_or_append_bsm_entry(const constantPoolHandle& scratch_cp,
int old_bs_i, constantPoolHandle *merge_cp_p, int *merge_cp_length_p) {
+ const int max_offset_in_merge = _bsmae_iter.current_offset();
int new_bs_i = old_bs_i; // bootstrap specifier index
- bool match = (old_bs_i < _operands_cur_length) &&
- scratch_cp->compare_operand_to(old_bs_i, *merge_cp_p, old_bs_i);
+ // Has the old_bs_i index been used already? Check if it's the same so we know
+ // whether or not a remapping is required.
+ bool match = (old_bs_i < max_offset_in_merge) &&
+ scratch_cp->compare_bootstrap_entry_to(old_bs_i, *merge_cp_p, old_bs_i);
if (!match) {
// forward reference in *merge_cp_p or not a direct match
- int found_i = scratch_cp->find_matching_operand(old_bs_i, *merge_cp_p,
- _operands_cur_length);
+ int found_i = scratch_cp->find_matching_bsm_entry(old_bs_i, *merge_cp_p,
+ max_offset_in_merge);
if (found_i != -1) {
- guarantee(found_i != old_bs_i, "compare_operand_to() and find_matching_operand() disagree");
- // found a matching operand somewhere else in *merge_cp_p so just need a mapping
+ guarantee(found_i != old_bs_i, "compare_bootstrap_entry_to() and find_matching_bsm_entry() disagree");
+ // found a matching BSM entry somewhere else in *merge_cp_p so just need a mapping
new_bs_i = found_i;
- map_operand_index(old_bs_i, found_i);
+ map_bsm_index(old_bs_i, found_i);
} else {
// no match found so we have to append this bootstrap specifier to *merge_cp_p
- append_operand(scratch_cp, old_bs_i, merge_cp_p, merge_cp_length_p);
- new_bs_i = _operands_cur_length - 1;
+ new_bs_i = append_bsm_entry(scratch_cp, old_bs_i, merge_cp_p, merge_cp_length_p);
}
}
return new_bs_i;
-} // end find_or_append_operand()
+} // end find_or_append_bsm_entry()
-void VM_RedefineClasses::finalize_operands_merge(const constantPoolHandle& merge_cp, TRAPS) {
- if (merge_cp->operands() == nullptr) {
+void VM_RedefineClasses::finalize_bsm_entries_merge(const constantPoolHandle& merge_cp, TRAPS) {
+ if (merge_cp->bsm_entries().number_of_entries() == 0) {
return;
}
- // Shrink the merge_cp operands
- merge_cp->shrink_operands(_operands_cur_length, CHECK);
+ // Finished extending the BSMAEs
+ merge_cp->end_extension(_bsmae_iter, CHECK);
if (log_is_enabled(Trace, redefine, class, constantpool)) {
// don't want to loop unless we are tracing
int count = 0;
- for (int i = 1; i < _operands_index_map_p->length(); i++) {
- int value = _operands_index_map_p->at(i);
+ for (int i = 1; i < _bsm_index_map_p->length(); i++) {
+ int value = _bsm_index_map_p->at(i);
if (value != -1) {
- log_trace(redefine, class, constantpool)("operands_index_map[%d]: old=%d new=%d", count, i, value);
+ log_trace(redefine, class, constantpool)("bsm_index_map[%d]: old=%d new=%d", count, i, value);
count++;
}
}
}
// Clean-up
- _operands_index_map_p = nullptr;
- _operands_cur_length = 0;
- _operands_index_map_count = 0;
-} // end finalize_operands_merge()
+ _bsm_index_map_p = nullptr;
+ _bsm_index_map_count = 0;
+ _bsmae_iter = BSMAttributeEntries::InsertionIterator();
+} // end finalize_bsmentries_merge()
// Symbol* comparator for qsort
// The caller must have an active ResourceMark.
@@ -1272,26 +1266,26 @@ u2 VM_RedefineClasses::find_new_index(int old_index) {
// Find new bootstrap specifier index value for old bootstrap specifier index
// value by searching the index map. Returns unused index (-1) if there is
// no mapped value for the old bootstrap specifier index.
-int VM_RedefineClasses::find_new_operand_index(int old_index) {
- if (_operands_index_map_count == 0) {
+int VM_RedefineClasses::find_new_bsm_index(int old_index) {
+ if (_bsm_index_map_count == 0) {
// map is empty so nothing can be found
return -1;
}
- if (old_index == -1 || old_index >= _operands_index_map_p->length()) {
+ if (old_index == -1 || old_index >= _bsm_index_map_p->length()) {
// The old_index is out of range so it is not mapped.
// This should not happen in regular constant pool merging use.
return -1;
}
- int value = _operands_index_map_p->at(old_index);
+ int value = _bsm_index_map_p->at(old_index);
if (value == -1) {
// the old_index is not mapped
return -1;
}
return value;
-} // end find_new_operand_index()
+} // end find_new_bsm_index()
// The bug 6214132 caused the verification to fail.
@@ -1560,22 +1554,15 @@ void VM_RedefineClasses::map_index(const constantPoolHandle& scratch_cp,
// Map old_index to new_index as needed.
-void VM_RedefineClasses::map_operand_index(int old_index, int new_index) {
- if (find_new_operand_index(old_index) != -1) {
- // old_index is already mapped
- return;
- }
-
+void VM_RedefineClasses::map_bsm_index(int old_index, int new_index) {
if (old_index == new_index) {
// no mapping is needed
return;
}
-
- _operands_index_map_p->at_put(old_index, new_index);
- _operands_index_map_count++;
-
+ _bsm_index_map_p->at_put(old_index, new_index);
+ _bsm_index_map_count++;
log_trace(redefine, class, constantpool)("mapped bootstrap specifier at index %d to %d", old_index, new_index);
-} // end map_index()
+} // end map_bsm_index()
// Merge old_cp and scratch_cp and return the results of the merge via
@@ -1639,8 +1626,8 @@ bool VM_RedefineClasses::merge_constant_pools(const constantPoolHandle& old_cp,
}
} // end for each old_cp entry
- ConstantPool::copy_operands(old_cp, merge_cp_p, CHECK_false);
- merge_cp_p->extend_operands(scratch_cp, CHECK_false);
+ ConstantPool::copy_bsm_entries(old_cp, merge_cp_p, CHECK_false);
+ _bsmae_iter = merge_cp_p->start_extension(scratch_cp, CHECK_false);
// We don't need to sanity check that *merge_cp_length_p is within
// *merge_cp_p bounds since we have the minimum on-entry check above.
@@ -1737,7 +1724,7 @@ bool VM_RedefineClasses::merge_constant_pools(const constantPoolHandle& old_cp,
("after pass 1b: merge_cp_len=%d, scratch_i=%d, index_map_len=%d",
merge_cp_length_p, scratch_i, _index_map_count);
}
- finalize_operands_merge(merge_cp_p, CHECK_false);
+ finalize_bsm_entries_merge(merge_cp_p, CHECK_false);
return true;
} // end merge_constant_pools()
@@ -1807,12 +1794,11 @@ jvmtiError VM_RedefineClasses::merge_cp_and_rewrite(
_index_map_count = 0;
_index_map_p = new intArray(scratch_cp->length(), scratch_cp->length(), -1);
- _operands_cur_length = ConstantPool::operand_array_length(old_cp->operands());
- _operands_index_map_count = 0;
- int operands_index_map_len = ConstantPool::operand_array_length(scratch_cp->operands());
- _operands_index_map_p = new intArray(operands_index_map_len, operands_index_map_len, -1);
+ _bsm_index_map_count = 0;
+ int bsm_data_len = scratch_cp->bsm_entries().array_length();
+ _bsm_index_map_p = new intArray(bsm_data_len, bsm_data_len, -1);
- // reference to the cp holder is needed for copy_operands()
+ // reference to the cp holder is needed for reallocating the BSM attribute
merge_cp->set_pool_holder(scratch_class);
bool result = merge_constant_pools(old_cp, scratch_cp, merge_cp,
merge_cp_length, THREAD);
@@ -3500,7 +3486,7 @@ void VM_RedefineClasses::set_new_constant_pool(
smaller_cp->set_version(version);
// attach klass to new constant pool
- // reference to the cp holder is needed for copy_operands()
+ // reference to the cp holder is needed for reallocating the BSM attribute
smaller_cp->set_pool_holder(scratch_class);
smaller_cp->copy_fields(scratch_cp());
diff --git a/src/hotspot/share/prims/jvmtiRedefineClasses.hpp b/src/hotspot/share/prims/jvmtiRedefineClasses.hpp
index d2eda1f3eed..3f1b555b175 100644
--- a/src/hotspot/share/prims/jvmtiRedefineClasses.hpp
+++ b/src/hotspot/share/prims/jvmtiRedefineClasses.hpp
@@ -363,11 +363,16 @@ class VM_RedefineClasses: public VM_Operation {
int _index_map_count;
intArray * _index_map_p;
- // _operands_index_map_count is just an optimization for knowing if
- // _operands_index_map_p contains any entries.
- int _operands_cur_length;
- int _operands_index_map_count;
- intArray * _operands_index_map_p;
+ // _bsm_index_map_count is just an optimization for knowing if
+ // _bsm_index_map_p contains any entries.
+ int _bsm_index_map_count;
+ intArray * _bsm_index_map_p;
+
+ // After merge_constant_pools "Pass 0", the BSMAttribute entries of merge_cp_p will have been expanded to fit
+ // scratch_cp's BSMAttribute entries as well.
+ // However, the newly acquired space will not have been filled in yet.
+ // To append to this new space, the iterator is used.
+ BSMAttributeEntries::InsertionIterator _bsmae_iter;
// ptr to _class_count scratch_classes
InstanceKlass** _scratch_classes;
@@ -429,17 +434,18 @@ class VM_RedefineClasses: public VM_Operation {
// Support for constant pool merging (these routines are in alpha order):
void append_entry(const constantPoolHandle& scratch_cp, int scratch_i,
constantPoolHandle *merge_cp_p, int *merge_cp_length_p);
- void append_operand(const constantPoolHandle& scratch_cp, int scratch_bootstrap_spec_index,
+ // Returns the index of the appended BSM
+ int append_bsm_entry(const constantPoolHandle& scratch_cp, int scratch_bootstrap_spec_index,
constantPoolHandle *merge_cp_p, int *merge_cp_length_p);
- void finalize_operands_merge(const constantPoolHandle& merge_cp, TRAPS);
+ void finalize_bsm_entries_merge(const constantPoolHandle& merge_cp, TRAPS);
u2 find_or_append_indirect_entry(const constantPoolHandle& scratch_cp, int scratch_i,
constantPoolHandle *merge_cp_p, int *merge_cp_length_p);
- int find_or_append_operand(const constantPoolHandle& scratch_cp, int scratch_bootstrap_spec_index,
+ int find_or_append_bsm_entry(const constantPoolHandle& scratch_cp, int scratch_bootstrap_spec_index,
constantPoolHandle *merge_cp_p, int *merge_cp_length_p);
u2 find_new_index(int old_index);
- int find_new_operand_index(int old_bootstrap_spec_index);
+ int find_new_bsm_index(int old_bootstrap_spec_index);
void map_index(const constantPoolHandle& scratch_cp, int old_index, int new_index);
- void map_operand_index(int old_bootstrap_spec_index, int new_bootstrap_spec_index);
+ void map_bsm_index(int old_bootstrap_spec_index, int new_bootstrap_spec_index);
bool merge_constant_pools(const constantPoolHandle& old_cp,
const constantPoolHandle& scratch_cp, constantPoolHandle& merge_cp_p,
int& merge_cp_length_p, TRAPS);
diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
index 55ee7641a5f..4a983095593 100644
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@@ -1478,10 +1478,10 @@ void Arguments::set_conservative_max_heap_alignment() {
// the alignments imposed by several sources: any requirements from the heap
// itself and the maximum page size we may run the VM with.
size_t heap_alignment = GCConfig::arguments()->conservative_max_heap_alignment();
- _conservative_max_heap_alignment = MAX4(heap_alignment,
+ _conservative_max_heap_alignment = MAX3(heap_alignment,
os::vm_allocation_granularity(),
- os::max_page_size(),
- GCArguments::compute_heap_alignment());
+ os::max_page_size());
+ assert(is_power_of_2(_conservative_max_heap_alignment), "Expected to be a power-of-2");
}
jint Arguments::set_ergonomics_flags() {
@@ -1589,8 +1589,8 @@ void Arguments::set_heap_size() {
}
if (UseCompressedOops) {
- size_t heap_end = HeapBaseMinAddress + MaxHeapSize;
- size_t max_coop_heap = max_heap_for_compressed_oops();
+ uintptr_t heap_end = HeapBaseMinAddress + MaxHeapSize;
+ uintptr_t max_coop_heap = max_heap_for_compressed_oops();
// Limit the heap size to the maximum possible when using compressed oops
if (heap_end < max_coop_heap) {
@@ -1607,7 +1607,7 @@ void Arguments::set_heap_size() {
aot_log_info(aot)("UseCompressedOops disabled due to "
"max heap %zu > compressed oop heap %zu. "
"Please check the setting of MaxRAMPercentage %5.2f.",
- reasonable_max, max_coop_heap, MaxRAMPercentage);
+ reasonable_max, (size_t)max_coop_heap, MaxRAMPercentage);
FLAG_SET_ERGO(UseCompressedOops, false);
} else {
reasonable_max = max_coop_heap;
diff --git a/src/hotspot/share/runtime/atomic.hpp b/src/hotspot/share/runtime/atomic.hpp
index 5b4d7d8659f..b8960fd796b 100644
--- a/src/hotspot/share/runtime/atomic.hpp
+++ b/src/hotspot/share/runtime/atomic.hpp
@@ -75,6 +75,7 @@
// v.release_store(x) -> void
// v.release_store_fence(x) -> void
// v.compare_exchange(x, y [, o]) -> T
+// v.exchange(x [, o]) -> T
//
// (2) All atomic types are default constructible.
//
@@ -92,7 +93,6 @@
// (3) Atomic pointers and atomic integers additionally provide
//
// member functions:
-// v.exchange(x [, o]) -> T
// v.add_then_fetch(i [, o]) -> T
// v.sub_then_fetch(i [, o]) -> T
// v.fetch_then_add(i [, o]) -> T
@@ -102,10 +102,7 @@
// type of i must be signed, or both must be unsigned. Atomic pointers perform
// element arithmetic.
//
-// (4) An atomic translated type additionally provides the exchange
-// function if its associated atomic decayed type provides that function.
-//
-// (5) Atomic integers additionally provide
+// (4) Atomic integers additionally provide
//
// member functions:
// v.and_then_fetch(x [, o]) -> T
@@ -115,7 +112,7 @@
// v.fetch_then_or(x [, o]) -> T
// v.fetch_then_xor(x [, o]) -> T
//
-// (6) Atomic pointers additionally provide
+// (5) Atomic pointers additionally provide
//
// nested types:
// ElementType -> std::remove_pointer_t
@@ -127,9 +124,6 @@
// stand out a little more when used in surrounding non-atomic code. Without
// the "AtomicAccess::" qualifier, some of those names are easily overlooked.
//
-// Atomic bytes don't provide exchange(). This is because that operation
-// hasn't been implemented for 1 byte values. That could be changed if needed.
-//
// Atomic for 2 byte integers is not supported. This is because atomic
// operations of that size have not been implemented. There haven't been
// required use-cases. Many platforms don't provide hardware support.
@@ -184,15 +178,8 @@ private:
// Helper base classes, providing various parts of the APIs.
template class CommonCore;
- template class SupportsExchange;
template class SupportsArithmetic;
- // Support conditional exchange() for atomic translated types.
- template class HasExchange;
- template class DecayedHasExchange;
- template::value>
- class TranslatedExchange;
-
public:
template()>
class Atomic;
@@ -275,15 +262,7 @@ public:
atomic_memory_order order = memory_order_conservative) {
return AtomicAccess::cmpxchg(value_ptr(), compare_value, new_value, order);
}
-};
-template
-class AtomicImpl::SupportsExchange : public CommonCore {
-protected:
- explicit SupportsExchange(T value) : CommonCore(value) {}
- ~SupportsExchange() = default;
-
-public:
T exchange(T new_value,
atomic_memory_order order = memory_order_conservative) {
return AtomicAccess::xchg(this->value_ptr(), new_value, order);
@@ -291,7 +270,7 @@ public:
};
template
-class AtomicImpl::SupportsArithmetic : public SupportsExchange {
+class AtomicImpl::SupportsArithmetic : public CommonCore {
// Guarding the AtomicAccess calls with constexpr checking of Offset produces
// better compile-time error messages.
template
@@ -311,7 +290,7 @@ class AtomicImpl::SupportsArithmetic : public SupportsExchange {
}
protected:
- explicit SupportsArithmetic(T value) : SupportsExchange(value) {}
+ explicit SupportsArithmetic(T value) : CommonCore(value) {}
~SupportsArithmetic() = default;
public:
@@ -424,54 +403,8 @@ public:
// Atomic translated type
-// Test whether Atomic has exchange().
template
-class AtomicImpl::HasExchange {
- template static void* test(decltype(&Check::exchange));
- template static int test(...);
- using test_type = decltype(test>(nullptr));
-public:
- static constexpr bool value = std::is_pointer_v;
-};
-
-// Test whether the atomic decayed type associated with T has exchange().
-template
-class AtomicImpl::DecayedHasExchange {
- using Translator = PrimitiveConversions::Translate;
- using Decayed = typename Translator::Decayed;
-
- // "Unit test" HasExchange<>.
- static_assert(HasExchange::value);
- static_assert(HasExchange::value);
- static_assert(!HasExchange::value);
-
-public:
- static constexpr bool value = HasExchange::value;
-};
-
-// Base class for atomic translated type if atomic decayed type doesn't have
-// exchange().
-template
-class AtomicImpl::TranslatedExchange {};
-
-// Base class for atomic translated type if atomic decayed type does have
-// exchange().
-template
-class AtomicImpl::TranslatedExchange {
-public:
- T exchange(T new_value,
- atomic_memory_order order = memory_order_conservative) {
- return static_cast(this)->exchange_impl(new_value, order);
- }
-};
-
-template
-class AtomicImpl::Atomic
- : public TranslatedExchange, T>
-{
- // Give TranslatedExchange<> access to exchange_impl() if needed.
- friend class TranslatedExchange, T>;
-
+class AtomicImpl::Atomic {
using Translator = PrimitiveConversions::Translate;
using Decayed = typename Translator::Decayed;
@@ -533,12 +466,7 @@ public:
order));
}
-private:
- // Implementation of exchange() if needed.
- // Exclude when not needed, to prevent reference to non-existent function
- // of atomic decayed type if someone explicitly instantiates Atomic.
- template::value)>
- T exchange_impl(T new_value, atomic_memory_order order) {
+ T exchange(T new_value, atomic_memory_order order = memory_order_conservative) {
return recover(_value.exchange(decay(new_value), order));
}
};
diff --git a/src/hotspot/share/runtime/atomicAccess.hpp b/src/hotspot/share/runtime/atomicAccess.hpp
index 72b7f92cf04..fb06f084366 100644
--- a/src/hotspot/share/runtime/atomicAccess.hpp
+++ b/src/hotspot/share/runtime/atomicAccess.hpp
@@ -419,8 +419,8 @@ private:
struct XchgImpl;
// Platform-specific implementation of xchg. Support for sizes
- // of 4, and sizeof(intptr_t) are required. The class is a function
- // object that must be default constructable, with these requirements:
+ // of 1, 4, and 8 are required. The class is a function object
+ // that must be default constructable, with these requirements:
//
// - dest is of type T*.
// - exchange_value is of type T.
diff --git a/src/hotspot/share/runtime/cpuTimeCounters.cpp b/src/hotspot/share/runtime/cpuTimeCounters.cpp
index e5364550b6c..e174407089c 100644
--- a/src/hotspot/share/runtime/cpuTimeCounters.cpp
+++ b/src/hotspot/share/runtime/cpuTimeCounters.cpp
@@ -118,8 +118,5 @@ ThreadTotalCPUTimeClosure::~ThreadTotalCPUTimeClosure() {
}
void ThreadTotalCPUTimeClosure::do_thread(Thread* thread) {
- // The default code path (fast_thread_cpu_time()) asserts that
- // pthread_getcpuclockid() and clock_gettime() must return 0. Thus caller
- // must ensure the thread exists and has not terminated.
_total += os::thread_cpu_time(thread);
}
diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp
index 0aa7b392b17..e2029a26d37 100644
--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@@ -498,6 +498,9 @@ Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread
RegisterMap::WalkContinuation::skip);
// Now get the deoptee with a valid map
frame deoptee = stub_frame.sender(&map);
+ if (exec_mode == Unpack_deopt) {
+ assert(deoptee.is_deoptimized_frame(), "frame is not marked for deoptimization");
+ }
// Set the deoptee nmethod
assert(current->deopt_compiled_method() == nullptr, "Pending deopt!");
nmethod* nm = deoptee.cb()->as_nmethod_or_null();
diff --git a/src/hotspot/share/runtime/frame.cpp b/src/hotspot/share/runtime/frame.cpp
index b5cd4acc75d..8f969600ba8 100644
--- a/src/hotspot/share/runtime/frame.cpp
+++ b/src/hotspot/share/runtime/frame.cpp
@@ -206,7 +206,7 @@ address frame::raw_pc() const {
if (is_deoptimized_frame()) {
nmethod* nm = cb()->as_nmethod_or_null();
assert(nm != nullptr, "only nmethod is expected here");
- return nm->deopt_handler_begin() - pc_return_offset;
+ return nm->deopt_handler_entry() - pc_return_offset;
} else {
return (pc() - pc_return_offset);
}
@@ -355,7 +355,7 @@ void frame::deoptimize(JavaThread* thread) {
// If the call site is a MethodHandle call site use the MH deopt handler.
nmethod* nm = _cb->as_nmethod();
- address deopt = nm->deopt_handler_begin();
+ address deopt = nm->deopt_handler_entry();
NativePostCallNop* inst = nativePostCallNop_at(pc());
diff --git a/src/hotspot/share/runtime/os.hpp b/src/hotspot/share/runtime/os.hpp
index e008f29eecc..b65bf643cbf 100644
--- a/src/hotspot/share/runtime/os.hpp
+++ b/src/hotspot/share/runtime/os.hpp
@@ -534,6 +534,7 @@ class os: AllStatic {
static void realign_memory(char *addr, size_t bytes, size_t alignment_hint);
// NUMA-specific interface
+ static void numa_set_thread_affinity(Thread* thread, int node);
static void numa_make_local(char *addr, size_t bytes, int lgrp_hint);
static void numa_make_global(char *addr, size_t bytes);
static size_t numa_get_groups_num();
diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
index 79c7c0b32b4..e277e1fb569 100644
--- a/src/hotspot/share/runtime/sharedRuntime.cpp
+++ b/src/hotspot/share/runtime/sharedRuntime.cpp
@@ -87,6 +87,9 @@
#ifdef COMPILER1
#include "c1/c1_Runtime1.hpp"
#endif
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
#if INCLUDE_JFR
#include "jfr/jfr.inline.hpp"
#endif
@@ -601,6 +604,11 @@ address SharedRuntime::raw_exception_handler_for_return_address(JavaThread* curr
// The deferred StackWatermarkSet::after_unwind check will be performed in
// * OptoRuntime::handle_exception_C_helper for C2 code
// * exception_handler_for_pc_helper via Runtime1::handle_exception_from_callee_id for C1 code
+#ifdef COMPILER2
+ if (nm->compiler_type() == compiler_c2) {
+ return OptoRuntime::exception_blob()->entry_point();
+ }
+#endif // COMPILER2
return nm->exception_begin();
}
}
diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp
index a75e67e9b56..25a99c2d758 100644
--- a/src/hotspot/share/runtime/vmStructs.cpp
+++ b/src/hotspot/share/runtime/vmStructs.cpp
@@ -54,6 +54,7 @@
#include "oops/array.hpp"
#include "oops/arrayKlass.hpp"
#include "oops/arrayOop.hpp"
+#include "oops/bsmAttribute.hpp"
#include "oops/constantPool.hpp"
#include "oops/constMethod.hpp"
#include "oops/cpCache.hpp"
@@ -166,10 +167,12 @@
nonstatic_field(ArrayKlass, _dimension, int) \
volatile_nonstatic_field(ArrayKlass, _higher_dimension, ObjArrayKlass*) \
volatile_nonstatic_field(ArrayKlass, _lower_dimension, ArrayKlass*) \
+ nonstatic_field(BSMAttributeEntries, _offsets, Array*) \
+ nonstatic_field(BSMAttributeEntries, _bootstrap_methods, Array*) \
+ nonstatic_field(ConstantPool, _bsm_entries, BSMAttributeEntries) \
nonstatic_field(ConstantPool, _tags, Array*) \
nonstatic_field(ConstantPool, _cache, ConstantPoolCache*) \
nonstatic_field(ConstantPool, _pool_holder, InstanceKlass*) \
- nonstatic_field(ConstantPool, _operands, Array*) \
nonstatic_field(ConstantPool, _resolved_klasses, Array*) \
nonstatic_field(ConstantPool, _length, int) \
nonstatic_field(ConstantPool, _minor_version, u2) \
@@ -534,7 +537,7 @@
nonstatic_field(nmethod, _osr_link, nmethod*) \
nonstatic_field(nmethod, _state, volatile signed char) \
nonstatic_field(nmethod, _exception_offset, int) \
- nonstatic_field(nmethod, _deopt_handler_offset, int) \
+ nonstatic_field(nmethod, _deopt_handler_entry_offset, int) \
nonstatic_field(nmethod, _orig_pc_offset, int) \
nonstatic_field(nmethod, _stub_offset, int) \
nonstatic_field(nmethod, _immutable_data_ref_count_offset, int) \
@@ -733,6 +736,7 @@
unchecked_nonstatic_field(Array, _data, sizeof(int)) \
unchecked_nonstatic_field(Array, _data, sizeof(u1)) \
unchecked_nonstatic_field(Array, _data, sizeof(u2)) \
+ unchecked_nonstatic_field(Array, _data, sizeof(u4)) \
unchecked_nonstatic_field(Array, _data, sizeof(Method*)) \
unchecked_nonstatic_field(Array, _data, sizeof(Klass*)) \
unchecked_nonstatic_field(Array, _data, sizeof(ResolvedFieldEntry)) \
@@ -964,6 +968,7 @@
declare_toplevel_type(volatile Metadata*) \
\
declare_toplevel_type(DataLayout) \
+ declare_toplevel_type(BSMAttributeEntries) \
\
/********/ \
/* Oops */ \
diff --git a/src/hotspot/share/services/cpuTimeUsage.cpp b/src/hotspot/share/services/cpuTimeUsage.cpp
index 0c7ecfdb655..27b5e90fbaf 100644
--- a/src/hotspot/share/services/cpuTimeUsage.cpp
+++ b/src/hotspot/share/services/cpuTimeUsage.cpp
@@ -36,7 +36,6 @@
volatile bool CPUTimeUsage::Error::_has_error = false;
static inline jlong thread_cpu_time_or_zero(Thread* thread) {
- assert(!Universe::is_shutting_down(), "Should not query during shutdown");
jlong cpu_time = os::thread_cpu_time(thread);
if (cpu_time == -1) {
CPUTimeUsage::Error::mark_error();
diff --git a/src/hotspot/share/services/diagnosticCommand.cpp b/src/hotspot/share/services/diagnosticCommand.cpp
index 5bef650891d..91b23904676 100644
--- a/src/hotspot/share/services/diagnosticCommand.cpp
+++ b/src/hotspot/share/services/diagnosticCommand.cpp
@@ -22,6 +22,7 @@
*
*/
+#include "cds/aotMetaspace.hpp"
#include "cds/cds_globals.hpp"
#include "cds/cdsConfig.hpp"
#include "classfile/classLoaderDataGraph.hpp"
@@ -165,6 +166,7 @@ void DCmd::register_dcmds(){
#if INCLUDE_CDS
DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl(full_export, true, false));
+ DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl(full_export, true, false));
#endif // INCLUDE_CDS
DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl(full_export, true, false));
@@ -986,6 +988,28 @@ void ClassesDCmd::execute(DCmdSource source, TRAPS) {
VMThread::execute(&vmop);
}
+#if INCLUDE_CDS
+void AOTEndRecordingDCmd::execute(DCmdSource source, TRAPS) {
+ if (!CDSConfig::is_dumping_preimage_static_archive()) {
+ output()->print_cr("AOT.end_recording is unsupported when VM flags -XX:AOTMode=record or -XX:AOTCacheOutput= are missing.");
+ return;
+ }
+
+ if (AOTMetaspace::preimage_static_archive_dumped()) {
+ output()->print_cr("Recording has already ended.");
+ return;
+ }
+
+ AOTMetaspace::dump_static_archive(THREAD);
+ if (!AOTMetaspace::preimage_static_archive_dumped()) {
+ output()->print_cr("Error: Failed to end recording.");
+ return;
+ }
+
+ output()->print_cr("Recording ended successfully.");
+}
+#endif // INCLUDE_CDS
+
#if INCLUDE_CDS
#define DEFAULT_CDS_ARCHIVE_FILENAME "java_pid%p_.jsa"
diff --git a/src/hotspot/share/services/diagnosticCommand.hpp b/src/hotspot/share/services/diagnosticCommand.hpp
index 2364b0ce4cd..c41e7bf2e2e 100644
--- a/src/hotspot/share/services/diagnosticCommand.hpp
+++ b/src/hotspot/share/services/diagnosticCommand.hpp
@@ -325,6 +325,21 @@ public:
virtual void execute(DCmdSource source, TRAPS);
};
+#if INCLUDE_CDS
+class AOTEndRecordingDCmd : public DCmd {
+public:
+ AOTEndRecordingDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+ static const char* name() { return "AOT.end_recording"; }
+ static const char* description() {
+ return "End AOT recording.";
+ }
+ static const char* impact() {
+ return "Medium: Pause time depends on number of loaded classes";
+ }
+ virtual void execute(DCmdSource source, TRAPS);
+};
+#endif // INCLUDE_CDS
+
#if INCLUDE_CDS
class DumpSharedArchiveDCmd: public DCmdWithParser {
protected:
diff --git a/src/hotspot/share/utilities/vmError.cpp b/src/hotspot/share/utilities/vmError.cpp
index e0cbb60c744..a290602e0be 100644
--- a/src/hotspot/share/utilities/vmError.cpp
+++ b/src/hotspot/share/utilities/vmError.cpp
@@ -664,6 +664,7 @@ void VMError::report(outputStream* st, bool _verbose) {
BEGIN
if (MemTracker::enabled() &&
NmtVirtualMemory_lock != nullptr &&
+ _thread != nullptr &&
NmtVirtualMemory_lock->owned_by_self()) {
// Manually unlock to avoid reentrancy due to mallocs in detailed mode.
NmtVirtualMemory_lock->unlock();
@@ -1305,7 +1306,7 @@ void VMError::report(outputStream* st, bool _verbose) {
os::print_signal_handlers(st, buf, sizeof(buf));
st->cr();
- STEP_IF("Native Memory Tracking", _verbose)
+ STEP_IF("Native Memory Tracking", _verbose && _thread != nullptr)
MemTracker::error_report(st);
st->cr();
diff --git a/src/hotspot/share/utilities/waitBarrier_generic.cpp b/src/hotspot/share/utilities/waitBarrier_generic.cpp
index a6436d93ffc..b268b10c757 100644
--- a/src/hotspot/share/utilities/waitBarrier_generic.cpp
+++ b/src/hotspot/share/utilities/waitBarrier_generic.cpp
@@ -23,7 +23,6 @@
*
*/
-#include "runtime/atomicAccess.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/os.hpp"
#include "utilities/spinYield.hpp"
@@ -79,10 +78,10 @@
void GenericWaitBarrier::arm(int barrier_tag) {
assert(barrier_tag != 0, "Pre arm: Should be arming with armed value");
- assert(AtomicAccess::load(&_barrier_tag) == 0,
+ assert(_barrier_tag.load_relaxed() == 0,
"Pre arm: Should not be already armed. Tag: %d",
- AtomicAccess::load(&_barrier_tag));
- AtomicAccess::release_store(&_barrier_tag, barrier_tag);
+ _barrier_tag.load_relaxed());
+ _barrier_tag.release_store(barrier_tag);
Cell &cell = tag_to_cell(barrier_tag);
cell.arm(barrier_tag);
@@ -92,9 +91,9 @@ void GenericWaitBarrier::arm(int barrier_tag) {
}
void GenericWaitBarrier::disarm() {
- int barrier_tag = AtomicAccess::load_acquire(&_barrier_tag);
+ int barrier_tag = _barrier_tag.load_acquire();
assert(barrier_tag != 0, "Pre disarm: Should be armed. Tag: %d", barrier_tag);
- AtomicAccess::release_store(&_barrier_tag, 0);
+ _barrier_tag.release_store(0);
Cell &cell = tag_to_cell(barrier_tag);
cell.disarm(barrier_tag);
@@ -121,7 +120,7 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
SpinYield sp;
while (true) {
- state = AtomicAccess::load_acquire(&_state);
+ state = _state.load_acquire();
assert(decode_tag(state) == 0,
"Pre arm: Should not be armed. "
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
@@ -134,7 +133,7 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
// Try to swing cell to armed. This should always succeed after the check above.
int64_t new_state = encode(requested_tag, 0);
- int64_t prev_state = AtomicAccess::cmpxchg(&_state, state, new_state);
+ int64_t prev_state = _state.compare_exchange(state, new_state);
if (prev_state != state) {
fatal("Cannot arm the wait barrier. "
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
@@ -145,14 +144,14 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
int GenericWaitBarrier::Cell::signal_if_needed(int max) {
int signals = 0;
while (true) {
- int cur = AtomicAccess::load_acquire(&_outstanding_wakeups);
+ int cur = _outstanding_wakeups.load_acquire();
if (cur == 0) {
// All done, no more waiters.
return 0;
}
assert(cur > 0, "Sanity");
- int prev = AtomicAccess::cmpxchg(&_outstanding_wakeups, cur, cur - 1);
+ int prev = _outstanding_wakeups.compare_exchange(cur, cur - 1);
if (prev != cur) {
// Contention, return to caller for early return or backoff.
return prev;
@@ -172,7 +171,7 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
int32_t waiters;
while (true) {
- int64_t state = AtomicAccess::load_acquire(&_state);
+ int64_t state = _state.load_acquire();
int32_t tag = decode_tag(state);
waiters = decode_waiters(state);
@@ -182,7 +181,7 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
tag, waiters);
int64_t new_state = encode(0, waiters);
- if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
+ if (_state.compare_exchange(state, new_state) == state) {
// Successfully disarmed.
break;
}
@@ -191,19 +190,19 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
// Wake up waiters, if we have at least one.
// Allow other threads to assist with wakeups, if possible.
if (waiters > 0) {
- AtomicAccess::release_store(&_outstanding_wakeups, waiters);
+ _outstanding_wakeups.release_store(waiters);
SpinYield sp;
while (signal_if_needed(INT_MAX) > 0) {
sp.wait();
}
}
- assert(AtomicAccess::load(&_outstanding_wakeups) == 0, "Post disarm: Should not have outstanding wakeups");
+ assert(_outstanding_wakeups.load_relaxed() == 0, "Post disarm: Should not have outstanding wakeups");
}
void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
// Try to register ourselves as pending waiter.
while (true) {
- int64_t state = AtomicAccess::load_acquire(&_state);
+ int64_t state = _state.load_acquire();
int32_t tag = decode_tag(state);
if (tag != expected_tag) {
// Cell tag had changed while waiting here. This means either the cell had
@@ -219,7 +218,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
tag, waiters);
int64_t new_state = encode(tag, waiters + 1);
- if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
+ if (_state.compare_exchange(state, new_state) == state) {
// Success! Proceed to wait.
break;
}
@@ -238,7 +237,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
// Register ourselves as completed waiter before leaving.
while (true) {
- int64_t state = AtomicAccess::load_acquire(&_state);
+ int64_t state = _state.load_acquire();
int32_t tag = decode_tag(state);
int32_t waiters = decode_waiters(state);
@@ -248,7 +247,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
tag, waiters);
int64_t new_state = encode(tag, waiters - 1);
- if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
+ if (_state.compare_exchange(state, new_state) == state) {
// Success!
break;
}
diff --git a/src/hotspot/share/utilities/waitBarrier_generic.hpp b/src/hotspot/share/utilities/waitBarrier_generic.hpp
index 8ed9ef3ac6e..0cbba1041db 100644
--- a/src/hotspot/share/utilities/waitBarrier_generic.hpp
+++ b/src/hotspot/share/utilities/waitBarrier_generic.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
+#include "runtime/atomic.hpp"
#include "runtime/semaphore.hpp"
#include "utilities/globalDefinitions.hpp"
@@ -43,10 +44,10 @@ private:
Semaphore _sem;
// Cell state, tracks the arming + waiters status
- volatile int64_t _state;
+ Atomic _state;
// Wakeups to deliver for current waiters
- volatile int _outstanding_wakeups;
+ Atomic _outstanding_wakeups;
int signal_if_needed(int max);
@@ -83,7 +84,7 @@ private:
// Trailing padding to protect the last cell.
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
- volatile int _barrier_tag;
+ Atomic _barrier_tag;
// Trailing padding to insulate the rest of the barrier from adjacent
// data structures. The leading padding is not needed, as cell padding
diff --git a/src/java.base/share/classes/com/sun/crypto/provider/DHKEM.java b/src/java.base/share/classes/com/sun/crypto/provider/DHKEM.java
index b27320ed24b..c7372a4c2c8 100644
--- a/src/java.base/share/classes/com/sun/crypto/provider/DHKEM.java
+++ b/src/java.base/share/classes/com/sun/crypto/provider/DHKEM.java
@@ -26,26 +26,51 @@ package com.sun.crypto.provider;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.io.Serial;
import java.math.BigInteger;
-import java.security.*;
-import java.security.interfaces.ECKey;
+import java.security.AsymmetricKey;
+import java.security.InvalidAlgorithmParameterException;
+import java.security.InvalidKeyException;
+import java.security.KeyFactory;
+import java.security.KeyPair;
+import java.security.KeyPairGenerator;
+import java.security.NoSuchAlgorithmException;
+import java.security.PrivateKey;
+import java.security.ProviderException;
+import java.security.PublicKey;
+import java.security.SecureRandom;
import java.security.interfaces.ECPublicKey;
-import java.security.interfaces.XECKey;
import java.security.interfaces.XECPublicKey;
-import java.security.spec.*;
+import java.security.spec.AlgorithmParameterSpec;
+import java.security.spec.ECParameterSpec;
+import java.security.spec.ECPoint;
+import java.security.spec.ECPrivateKeySpec;
+import java.security.spec.ECPublicKeySpec;
+import java.security.spec.InvalidKeySpecException;
+import java.security.spec.KeySpec;
+import java.security.spec.NamedParameterSpec;
+import java.security.spec.XECPrivateKeySpec;
+import java.security.spec.XECPublicKeySpec;
import java.util.Arrays;
import java.util.Objects;
-import javax.crypto.*;
-import javax.crypto.spec.SecretKeySpec;
+import javax.crypto.DecapsulateException;
+import javax.crypto.KDF;
+import javax.crypto.KEM;
+import javax.crypto.KEMSpi;
+import javax.crypto.KeyAgreement;
+import javax.crypto.SecretKey;
import javax.crypto.spec.HKDFParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
import sun.security.jca.JCAUtil;
-import sun.security.util.*;
-
-import jdk.internal.access.SharedSecrets;
+import sun.security.util.ArrayUtil;
+import sun.security.util.CurveDB;
+import sun.security.util.ECUtil;
+import sun.security.util.InternalPrivateKey;
+import sun.security.util.NamedCurve;
+import sun.security.util.SliceableSecretKey;
// Implementing DHKEM defined inside https://www.rfc-editor.org/rfc/rfc9180.html,
-// without the AuthEncap and AuthDecap functions
public class DHKEM implements KEMSpi {
private static final byte[] KEM = new byte[]
@@ -65,80 +90,86 @@ public class DHKEM implements KEMSpi {
private static final byte[] EMPTY = new byte[0];
private record Handler(Params params, SecureRandom secureRandom,
- PrivateKey skR, PublicKey pkR)
+ PrivateKey skS, PublicKey pkS, // sender keys
+ PrivateKey skR, PublicKey pkR) // receiver keys
implements EncapsulatorSpi, DecapsulatorSpi {
@Override
public KEM.Encapsulated engineEncapsulate(int from, int to, String algorithm) {
- Objects.checkFromToIndex(from, to, params.Nsecret);
+ Objects.checkFromToIndex(from, to, params.nsecret);
Objects.requireNonNull(algorithm, "null algorithm");
KeyPair kpE = params.generateKeyPair(secureRandom);
PrivateKey skE = kpE.getPrivate();
PublicKey pkE = kpE.getPublic();
- byte[] pkEm = params.SerializePublicKey(pkE);
- byte[] pkRm = params.SerializePublicKey(pkR);
- byte[] kem_context = concat(pkEm, pkRm);
- byte[] key = null;
+ byte[] pkEm = params.serializePublicKey(pkE);
+ byte[] pkRm = params.serializePublicKey(pkR);
try {
- byte[] dh = params.DH(skE, pkR);
- key = params.ExtractAndExpand(dh, kem_context);
- return new KEM.Encapsulated(
- new SecretKeySpec(key, from, to - from, algorithm),
- pkEm, null);
+ SecretKey key;
+ if (skS == null) {
+ byte[] kem_context = concat(pkEm, pkRm);
+ key = params.deriveKey(algorithm, from, to, kem_context,
+ params.dh(skE, pkR));
+ } else {
+ byte[] pkSm = params.serializePublicKey(pkS);
+ byte[] kem_context = concat(pkEm, pkRm, pkSm);
+ key = params.deriveKey(algorithm, from, to, kem_context,
+ params.dh(skE, pkR), params.dh(skS, pkR));
+ }
+ return new KEM.Encapsulated(key, pkEm, null);
+ } catch (UnsupportedOperationException e) {
+ throw e;
} catch (Exception e) {
throw new ProviderException("internal error", e);
- } finally {
- // `key` has been cloned into the `SecretKeySpec` within the
- // returned `KEM.Encapsulated`, so it can now be cleared.
- if (key != null) {
- Arrays.fill(key, (byte)0);
- }
}
}
@Override
public SecretKey engineDecapsulate(byte[] encapsulation,
int from, int to, String algorithm) throws DecapsulateException {
- Objects.checkFromToIndex(from, to, params.Nsecret);
+ Objects.checkFromToIndex(from, to, params.nsecret);
Objects.requireNonNull(algorithm, "null algorithm");
Objects.requireNonNull(encapsulation, "null encapsulation");
- if (encapsulation.length != params.Npk) {
+ if (encapsulation.length != params.npk) {
throw new DecapsulateException("incorrect encapsulation size");
}
- byte[] key = null;
try {
- PublicKey pkE = params.DeserializePublicKey(encapsulation);
- byte[] dh = params.DH(skR, pkE);
- byte[] pkRm = params.SerializePublicKey(pkR);
- byte[] kem_context = concat(encapsulation, pkRm);
- key = params.ExtractAndExpand(dh, kem_context);
- return new SecretKeySpec(key, from, to - from, algorithm);
+ PublicKey pkE = params.deserializePublicKey(encapsulation);
+ byte[] pkRm = params.serializePublicKey(pkR);
+ if (pkS == null) {
+ byte[] kem_context = concat(encapsulation, pkRm);
+ return params.deriveKey(algorithm, from, to, kem_context,
+ params.dh(skR, pkE));
+ } else {
+ byte[] pkSm = params.serializePublicKey(pkS);
+ byte[] kem_context = concat(encapsulation, pkRm, pkSm);
+ return params.deriveKey(algorithm, from, to, kem_context,
+ params.dh(skR, pkE), params.dh(skR, pkS));
+ }
+ } catch (UnsupportedOperationException e) {
+ throw e;
} catch (IOException | InvalidKeyException e) {
throw new DecapsulateException("Cannot decapsulate", e);
} catch (Exception e) {
throw new ProviderException("internal error", e);
- } finally {
- if (key != null) {
- Arrays.fill(key, (byte)0);
- }
}
}
@Override
public int engineSecretSize() {
- return params.Nsecret;
+ return params.nsecret;
}
@Override
public int engineEncapsulationSize() {
- return params.Npk;
+ return params.npk;
}
}
// Not really a random. For KAT test only. It generates key pair from ikm.
public static class RFC9180DeriveKeyPairSR extends SecureRandom {
- static final long serialVersionUID = 0L;
+ @Serial
+ private static final long serialVersionUID = 0L;
private final byte[] ikm;
@@ -147,7 +178,7 @@ public class DHKEM implements KEMSpi {
this.ikm = ikm;
}
- public KeyPair derive(Params params) {
+ private KeyPair derive(Params params) {
try {
return params.deriveKeyPair(ikm);
} catch (Exception e) {
@@ -183,9 +214,9 @@ public class DHKEM implements KEMSpi {
;
private final int kem_id;
- private final int Nsecret;
- private final int Nsk;
- private final int Npk;
+ private final int nsecret;
+ private final int nsk;
+ private final int npk;
private final String kaAlgorithm;
private final String keyAlgorithm;
private final AlgorithmParameterSpec spec;
@@ -193,18 +224,18 @@ public class DHKEM implements KEMSpi {
private final byte[] suiteId;
- Params(int kem_id, int Nsecret, int Nsk, int Npk,
+ Params(int kem_id, int nsecret, int nsk, int npk,
String kaAlgorithm, String keyAlgorithm, AlgorithmParameterSpec spec,
String hkdfAlgorithm) {
this.kem_id = kem_id;
this.spec = spec;
- this.Nsecret = Nsecret;
- this.Nsk = Nsk;
- this.Npk = Npk;
+ this.nsecret = nsecret;
+ this.nsk = nsk;
+ this.npk = npk;
this.kaAlgorithm = kaAlgorithm;
this.keyAlgorithm = keyAlgorithm;
this.hkdfAlgorithm = hkdfAlgorithm;
- suiteId = concat(KEM, I2OSP(kem_id, 2));
+ suiteId = concat(KEM, i2OSP(kem_id, 2));
}
private boolean isEC() {
@@ -224,18 +255,18 @@ public class DHKEM implements KEMSpi {
}
}
- private byte[] SerializePublicKey(PublicKey k) {
+ private byte[] serializePublicKey(PublicKey k) {
if (isEC()) {
ECPoint w = ((ECPublicKey) k).getW();
return ECUtil.encodePoint(w, ((NamedCurve) spec).getCurve());
} else {
byte[] uArray = ((XECPublicKey) k).getU().toByteArray();
ArrayUtil.reverse(uArray);
- return Arrays.copyOf(uArray, Npk);
+ return Arrays.copyOf(uArray, npk);
}
}
- private PublicKey DeserializePublicKey(byte[] data)
+ private PublicKey deserializePublicKey(byte[] data)
throws IOException, NoSuchAlgorithmException, InvalidKeySpecException {
KeySpec keySpec;
if (isEC()) {
@@ -251,29 +282,59 @@ public class DHKEM implements KEMSpi {
return KeyFactory.getInstance(keyAlgorithm).generatePublic(keySpec);
}
- private byte[] DH(PrivateKey skE, PublicKey pkR)
+ private SecretKey dh(PrivateKey skE, PublicKey pkR)
throws NoSuchAlgorithmException, InvalidKeyException {
KeyAgreement ka = KeyAgreement.getInstance(kaAlgorithm);
ka.init(skE);
ka.doPhase(pkR, true);
- return ka.generateSecret();
+ return ka.generateSecret("Generic");
}
- private byte[] ExtractAndExpand(byte[] dh, byte[] kem_context)
- throws NoSuchAlgorithmException, InvalidKeyException {
- KDF hkdf = KDF.getInstance(hkdfAlgorithm);
- SecretKey eae_prk = LabeledExtract(hkdf, suiteId, EAE_PRK, dh);
- try {
- return LabeledExpand(hkdf, suiteId, eae_prk, SHARED_SECRET,
- kem_context, Nsecret);
- } finally {
- if (eae_prk instanceof SecretKeySpec s) {
- SharedSecrets.getJavaxCryptoSpecAccess()
- .clearSecretKeySpec(s);
+ // The final shared secret derivation of either the encapsulator
+ // or the decapsulator. The key slicing is implemented inside.
+ // Throws UOE if a slice of the key cannot be found.
+ private SecretKey deriveKey(String alg, int from, int to,
+ byte[] kem_context, SecretKey... dhs)
+ throws NoSuchAlgorithmException {
+ if (from == 0 && to == nsecret) {
+ return extractAndExpand(kem_context, alg, dhs);
+ } else {
+ // First get shared secrets in "Generic" and then get a slice
+ // of it in the requested algorithm.
+ var fullKey = extractAndExpand(kem_context, "Generic", dhs);
+ if ("RAW".equalsIgnoreCase(fullKey.getFormat())) {
+ byte[] km = fullKey.getEncoded();
+ if (km == null) {
+ // Should not happen if format is "RAW"
+ throw new UnsupportedOperationException("Key extract failed");
+ } else {
+ try {
+ return new SecretKeySpec(km, from, to - from, alg);
+ } finally {
+ Arrays.fill(km, (byte)0);
+ }
+ }
+ } else if (fullKey instanceof SliceableSecretKey ssk) {
+ return ssk.slice(alg, from, to);
+ } else {
+ throw new UnsupportedOperationException("Cannot extract key");
}
}
}
+ private SecretKey extractAndExpand(byte[] kem_context, String alg, SecretKey... dhs)
+ throws NoSuchAlgorithmException {
+ var kdf = KDF.getInstance(hkdfAlgorithm);
+ var builder = labeledExtract(suiteId, EAE_PRK);
+ for (var dh : dhs) builder.addIKM(dh);
+ try {
+ return kdf.deriveKey(alg,
+ labeledExpand(builder, suiteId, SHARED_SECRET, kem_context, nsecret));
+ } catch (InvalidAlgorithmParameterException e) {
+ throw new ProviderException(e);
+ }
+ }
+
private PublicKey getPublicKey(PrivateKey sk)
throws InvalidKeyException {
if (!(sk instanceof InternalPrivateKey)) {
@@ -298,45 +359,37 @@ public class DHKEM implements KEMSpi {
// For KAT tests only. See RFC9180DeriveKeyPairSR.
public KeyPair deriveKeyPair(byte[] ikm) throws Exception {
- KDF hkdf = KDF.getInstance(hkdfAlgorithm);
- SecretKey dkp_prk = LabeledExtract(hkdf, suiteId, DKP_PRK, ikm);
- try {
- if (isEC()) {
- NamedCurve curve = (NamedCurve) spec;
- BigInteger sk = BigInteger.ZERO;
- int counter = 0;
- while (sk.signum() == 0 ||
- sk.compareTo(curve.getOrder()) >= 0) {
- if (counter > 255) {
- throw new RuntimeException();
- }
- byte[] bytes = LabeledExpand(hkdf, suiteId, dkp_prk,
- CANDIDATE, I2OSP(counter, 1), Nsk);
- // bitmask is defined to be 0xFF for P-256 and P-384,
- // and 0x01 for P-521
- if (this == Params.P521) {
- bytes[0] = (byte) (bytes[0] & 0x01);
- }
- sk = new BigInteger(1, (bytes));
- counter = counter + 1;
+ var kdf = KDF.getInstance(hkdfAlgorithm);
+ var builder = labeledExtract(suiteId, DKP_PRK).addIKM(ikm);
+ if (isEC()) {
+ NamedCurve curve = (NamedCurve) spec;
+ BigInteger sk = BigInteger.ZERO;
+ int counter = 0;
+ while (sk.signum() == 0 || sk.compareTo(curve.getOrder()) >= 0) {
+ if (counter > 255) {
+ // So unlucky and should not happen
+ throw new ProviderException("DeriveKeyPairError");
}
- PrivateKey k = DeserializePrivateKey(sk.toByteArray());
- return new KeyPair(getPublicKey(k), k);
- } else {
- byte[] sk = LabeledExpand(hkdf, suiteId, dkp_prk, SK, EMPTY,
- Nsk);
- PrivateKey k = DeserializePrivateKey(sk);
- return new KeyPair(getPublicKey(k), k);
- }
- } finally {
- if (dkp_prk instanceof SecretKeySpec s) {
- SharedSecrets.getJavaxCryptoSpecAccess()
- .clearSecretKeySpec(s);
+ byte[] bytes = kdf.deriveData(labeledExpand(builder,
+ suiteId, CANDIDATE, i2OSP(counter, 1), nsk));
+ // bitmask is defined to be 0xFF for P-256 and P-384, and 0x01 for P-521
+ if (this == Params.P521) {
+ bytes[0] = (byte) (bytes[0] & 0x01);
+ }
+ sk = new BigInteger(1, (bytes));
+ counter = counter + 1;
}
+ PrivateKey k = deserializePrivateKey(sk.toByteArray());
+ return new KeyPair(getPublicKey(k), k);
+ } else {
+ byte[] sk = kdf.deriveData(labeledExpand(builder,
+ suiteId, SK, EMPTY, nsk));
+ PrivateKey k = deserializePrivateKey(sk);
+ return new KeyPair(getPublicKey(k), k);
}
}
- private PrivateKey DeserializePrivateKey(byte[] data) throws Exception {
+ private PrivateKey deserializePrivateKey(byte[] data) throws Exception {
KeySpec keySpec = isEC()
? new ECPrivateKeySpec(new BigInteger(1, (data)), (NamedCurve) spec)
: new XECPrivateKeySpec(spec, data);
@@ -359,7 +412,22 @@ public class DHKEM implements KEMSpi {
throw new InvalidAlgorithmParameterException("no spec needed");
}
Params params = paramsFromKey(pk);
- return new Handler(params, getSecureRandom(secureRandom), null, pk);
+ return new Handler(params, getSecureRandom(secureRandom), null, null, null, pk);
+ }
+
+ // AuthEncap is not public KEM API
+ public EncapsulatorSpi engineNewAuthEncapsulator(PublicKey pkR, PrivateKey skS,
+ AlgorithmParameterSpec spec, SecureRandom secureRandom)
+ throws InvalidAlgorithmParameterException, InvalidKeyException {
+ if (pkR == null || skS == null) {
+ throw new InvalidKeyException("input key is null");
+ }
+ if (spec != null) {
+ throw new InvalidAlgorithmParameterException("no spec needed");
+ }
+ Params params = paramsFromKey(pkR);
+ return new Handler(params, getSecureRandom(secureRandom),
+ skS, params.getPublicKey(skS), null, pkR);
}
@Override
@@ -372,20 +440,34 @@ public class DHKEM implements KEMSpi {
throw new InvalidAlgorithmParameterException("no spec needed");
}
Params params = paramsFromKey(sk);
- return new Handler(params, null, sk, params.getPublicKey(sk));
+ return new Handler(params, null, null, null, sk, params.getPublicKey(sk));
}
- private Params paramsFromKey(Key k) throws InvalidKeyException {
- if (k instanceof ECKey eckey) {
- if (ECUtil.equals(eckey.getParams(), CurveDB.P_256)) {
+ // AuthDecap is not public KEM API
+ public DecapsulatorSpi engineNewAuthDecapsulator(
+ PrivateKey skR, PublicKey pkS, AlgorithmParameterSpec spec)
+ throws InvalidAlgorithmParameterException, InvalidKeyException {
+ if (skR == null || pkS == null) {
+ throw new InvalidKeyException("input key is null");
+ }
+ if (spec != null) {
+ throw new InvalidAlgorithmParameterException("no spec needed");
+ }
+ Params params = paramsFromKey(skR);
+ return new Handler(params, null, null, pkS, skR, params.getPublicKey(skR));
+ }
+
+ private Params paramsFromKey(AsymmetricKey k) throws InvalidKeyException {
+ var p = k.getParams();
+ if (p instanceof ECParameterSpec ecp) {
+ if (ECUtil.equals(ecp, CurveDB.P_256)) {
return Params.P256;
- } else if (ECUtil.equals(eckey.getParams(), CurveDB.P_384)) {
+ } else if (ECUtil.equals(ecp, CurveDB.P_384)) {
return Params.P384;
- } else if (ECUtil.equals(eckey.getParams(), CurveDB.P_521)) {
+ } else if (ECUtil.equals(ecp, CurveDB.P_521)) {
return Params.P521;
}
- } else if (k instanceof XECKey xkey
- && xkey.getParams() instanceof NamedParameterSpec ns) {
+ } else if (p instanceof NamedParameterSpec ns) {
if (ns.getName().equalsIgnoreCase("X25519")) {
return Params.X25519;
} else if (ns.getName().equalsIgnoreCase("X448")) {
@@ -401,8 +483,11 @@ public class DHKEM implements KEMSpi {
return o.toByteArray();
}
- private static byte[] I2OSP(int n, int w) {
- assert n < 256;
+ // I2OSP(n, w) as defined in RFC 9180 Section 3.
+ // In DHKEM and HPKE, number is always <65536
+ // and converted to at most 2 bytes.
+ public static byte[] i2OSP(int n, int w) {
+ assert n < 65536;
assert w == 1 || w == 2;
if (w == 1) {
return new byte[] { (byte) n };
@@ -411,32 +496,32 @@ public class DHKEM implements KEMSpi {
}
}
- private static SecretKey LabeledExtract(KDF hkdf, byte[] suite_id,
- byte[] label, byte[] ikm) throws InvalidKeyException {
- SecretKeySpec s = new SecretKeySpec(concat(HPKE_V1, suite_id, label,
- ikm), "IKM");
- try {
- HKDFParameterSpec spec =
- HKDFParameterSpec.ofExtract().addIKM(s).extractOnly();
- return hkdf.deriveKey("Generic", spec);
- } catch (InvalidAlgorithmParameterException |
- NoSuchAlgorithmException e) {
- throw new InvalidKeyException(e.getMessage(), e);
- } finally {
- SharedSecrets.getJavaxCryptoSpecAccess().clearSecretKeySpec(s);
- }
+ // Create a LabeledExtract builder with labels.
+ // You can add more IKM and salt into the result.
+ public static HKDFParameterSpec.Builder labeledExtract(
+ byte[] suiteId, byte[] label) {
+ return HKDFParameterSpec.ofExtract()
+ .addIKM(HPKE_V1).addIKM(suiteId).addIKM(label);
}
- private static byte[] LabeledExpand(KDF hkdf, byte[] suite_id,
- SecretKey prk, byte[] label, byte[] info, int L)
- throws InvalidKeyException {
- byte[] labeled_info = concat(I2OSP(L, 2), HPKE_V1, suite_id, label,
- info);
- try {
- return hkdf.deriveData(HKDFParameterSpec.expandOnly(
- prk, labeled_info, L));
- } catch (InvalidAlgorithmParameterException iape) {
- throw new InvalidKeyException(iape.getMessage(), iape);
- }
+ // Create a labeled info from info and labels
+ private static byte[] labeledInfo(
+ byte[] suiteId, byte[] label, byte[] info, int length) {
+ return concat(i2OSP(length, 2), HPKE_V1, suiteId, label, info);
+ }
+
+ // LabeledExpand from a builder
+ public static HKDFParameterSpec labeledExpand(
+ HKDFParameterSpec.Builder builder,
+ byte[] suiteId, byte[] label, byte[] info, int length) {
+ return builder.thenExpand(
+ labeledInfo(suiteId, label, info, length), length);
+ }
+
+ // LabeledExpand from a prk
+ public static HKDFParameterSpec labeledExpand(
+ SecretKey prk, byte[] suiteId, byte[] label, byte[] info, int length) {
+ return HKDFParameterSpec.expandOnly(
+ prk, labeledInfo(suiteId, label, info, length), length);
}
}
diff --git a/src/java.base/share/classes/com/sun/crypto/provider/HPKE.java b/src/java.base/share/classes/com/sun/crypto/provider/HPKE.java
new file mode 100644
index 00000000000..eee5f59cc75
--- /dev/null
+++ b/src/java.base/share/classes/com/sun/crypto/provider/HPKE.java
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.sun.crypto.provider;
+
+import sun.security.util.CurveDB;
+import sun.security.util.ECUtil;
+
+import javax.crypto.BadPaddingException;
+import javax.crypto.Cipher;
+import javax.crypto.CipherSpi;
+import javax.crypto.DecapsulateException;
+import javax.crypto.IllegalBlockSizeException;
+import javax.crypto.KDF;
+import javax.crypto.KEM;
+import javax.crypto.NoSuchPaddingException;
+import javax.crypto.SecretKey;
+import javax.crypto.ShortBufferException;
+import javax.crypto.spec.GCMParameterSpec;
+import javax.crypto.spec.HPKEParameterSpec;
+import javax.crypto.spec.IvParameterSpec;
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.security.AlgorithmParameters;
+import java.security.AsymmetricKey;
+import java.security.InvalidAlgorithmParameterException;
+import java.security.InvalidKeyException;
+import java.security.Key;
+import java.security.NoSuchAlgorithmException;
+import java.security.PrivateKey;
+import java.security.ProviderException;
+import java.security.PublicKey;
+import java.security.SecureRandom;
+import java.security.spec.AlgorithmParameterSpec;
+import java.security.spec.ECParameterSpec;
+import java.security.spec.NamedParameterSpec;
+import java.util.Arrays;
+
+public class HPKE extends CipherSpi {
+
+ private static final byte[] HPKE = new byte[]
+ {'H', 'P', 'K', 'E'};
+ private static final byte[] SEC = new byte[]
+ {'s', 'e', 'c'};
+ private static final byte[] PSK_ID_HASH = new byte[]
+ {'p', 's', 'k', '_', 'i', 'd', '_', 'h', 'a', 's', 'h'};
+ private static final byte[] INFO_HASH = new byte[]
+ {'i', 'n', 'f', 'o', '_', 'h', 'a', 's', 'h'};
+ private static final byte[] SECRET = new byte[]
+ {'s', 'e', 'c', 'r', 'e', 't'};
+ private static final byte[] EXP = new byte[]
+ {'e', 'x', 'p'};
+ private static final byte[] KEY = new byte[]
+ {'k', 'e', 'y'};
+ private static final byte[] BASE_NONCE = new byte[]
+ {'b', 'a', 's', 'e', '_', 'n', 'o', 'n', 'c', 'e'};
+
+ private static final int BEGIN = 1;
+ private static final int EXPORT_ONLY = 2; // init done with aead_id == 65535
+ private static final int ENCRYPT_AND_EXPORT = 3; // int done with AEAD
+ private static final int AFTER_FINAL = 4; // after doFinal, need reinit internal cipher
+
+ private int state = BEGIN;
+ private Impl impl;
+
+ @Override
+ protected void engineSetMode(String mode) throws NoSuchAlgorithmException {
+ throw new NoSuchAlgorithmException(mode);
+ }
+
+ @Override
+ protected void engineSetPadding(String padding) throws NoSuchPaddingException {
+ throw new NoSuchPaddingException(padding);
+ }
+
+ @Override
+ protected int engineGetBlockSize() {
+ if (state == ENCRYPT_AND_EXPORT || state == AFTER_FINAL) {
+ return impl.aead.cipher.getBlockSize();
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ protected int engineGetOutputSize(int inputLen) {
+ if (state == ENCRYPT_AND_EXPORT || state == AFTER_FINAL) {
+ return impl.aead.cipher.getOutputSize(inputLen);
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ protected byte[] engineGetIV() {
+ return (state == BEGIN || impl.kemEncaps == null)
+ ? null : impl.kemEncaps.clone();
+ }
+
+ @Override
+ protected AlgorithmParameters engineGetParameters() {
+ return null;
+ }
+
+ @Override
+ protected void engineInit(int opmode, Key key, SecureRandom random)
+ throws InvalidKeyException {
+ throw new InvalidKeyException("HPKEParameterSpec must be provided");
+ }
+
+ @Override
+ protected void engineInit(int opmode, Key key,
+ AlgorithmParameterSpec params, SecureRandom random)
+ throws InvalidKeyException, InvalidAlgorithmParameterException {
+ impl = new Impl(opmode);
+ if (!(key instanceof AsymmetricKey ak)) {
+ throw new InvalidKeyException("Not an asymmetric key");
+ }
+ if (params == null) {
+ throw new InvalidAlgorithmParameterException(
+ "HPKEParameterSpec must be provided");
+ } else if (params instanceof HPKEParameterSpec hps) {
+ impl.init(ak, hps, random);
+ } else {
+ throw new InvalidAlgorithmParameterException(
+ "Unsupported params type: " + params.getClass());
+ }
+ if (impl.hasEncrypt()) {
+ impl.aead.start(impl.opmode, impl.context.k, impl.context.computeNonce());
+ state = ENCRYPT_AND_EXPORT;
+ } else {
+ state = EXPORT_ONLY;
+ }
+ }
+
+ @Override
+ protected void engineInit(int opmode, Key key,
+ AlgorithmParameters params, SecureRandom random)
+ throws InvalidKeyException, InvalidAlgorithmParameterException {
+ throw new InvalidKeyException("HPKEParameterSpec must be provided");
+ }
+
+ // state is ENCRYPT_AND_EXPORT after this call succeeds
+ private void maybeReinitInternalCipher() {
+ if (state == BEGIN) {
+ throw new IllegalStateException("Illegal state: " + state);
+ }
+ if (state == EXPORT_ONLY) {
+ throw new UnsupportedOperationException();
+ }
+ if (state == AFTER_FINAL) {
+ impl.aead.start(impl.opmode, impl.context.k, impl.context.computeNonce());
+ state = ENCRYPT_AND_EXPORT;
+ }
+ }
+
+ @Override
+ protected byte[] engineUpdate(byte[] input, int inputOffset, int inputLen) {
+ maybeReinitInternalCipher();
+ return impl.aead.cipher.update(input, inputOffset, inputLen);
+ }
+
+ @Override
+ protected int engineUpdate(byte[] input, int inputOffset, int inputLen,
+ byte[] output, int outputOffset) throws ShortBufferException {
+ maybeReinitInternalCipher();
+ return impl.aead.cipher.update(
+ input, inputOffset, inputLen, output, outputOffset);
+ }
+
+ @Override
+ protected void engineUpdateAAD(byte[] src, int offset, int len) {
+ maybeReinitInternalCipher();
+ impl.aead.cipher.updateAAD(src, offset, len);
+ }
+
+ @Override
+ protected void engineUpdateAAD(ByteBuffer src) {
+ maybeReinitInternalCipher();
+ impl.aead.cipher.updateAAD(src);
+ }
+
+ @Override
+ protected byte[] engineDoFinal(byte[] input, int inputOffset, int inputLen)
+ throws IllegalBlockSizeException, BadPaddingException {
+ maybeReinitInternalCipher();
+ impl.context.IncrementSeq();
+ state = AFTER_FINAL;
+ if (input == null) { // a bug in doFinal(null, ?, ?)
+ return impl.aead.cipher.doFinal();
+ } else {
+ return impl.aead.cipher.doFinal(input, inputOffset, inputLen);
+ }
+ }
+
+ @Override
+ protected int engineDoFinal(byte[] input, int inputOffset, int inputLen,
+ byte[] output, int outputOffset) throws ShortBufferException,
+ IllegalBlockSizeException, BadPaddingException {
+ maybeReinitInternalCipher();
+ impl.context.IncrementSeq();
+ state = AFTER_FINAL;
+ return impl.aead.cipher.doFinal(
+ input, inputOffset, inputLen, output, outputOffset);
+ }
+
+ //@Override
+ protected SecretKey engineExportKey(String algorithm, byte[] context, int length) {
+ if (state == BEGIN) {
+ throw new IllegalStateException("State: " + state);
+ } else {
+ return impl.context.exportKey(algorithm, context, length);
+ }
+ }
+
+ //@Override
+ protected byte[] engineExportData(byte[] context, int length) {
+ if (state == BEGIN) {
+ throw new IllegalStateException("State: " + state);
+ } else {
+ return impl.context.exportData(context, length);
+ }
+ }
+
+ private static class AEAD {
+ final Cipher cipher;
+ final int nk, nn, nt;
+ final int id;
+ public AEAD(int id) throws InvalidAlgorithmParameterException {
+ this.id = id;
+ try {
+ switch (id) {
+ case HPKEParameterSpec.AEAD_AES_128_GCM -> {
+ cipher = Cipher.getInstance("AES/GCM/NoPadding");
+ nk = 16;
+ }
+ case HPKEParameterSpec.AEAD_AES_256_GCM -> {
+ cipher = Cipher.getInstance("AES/GCM/NoPadding");
+ nk = 32;
+ }
+ case HPKEParameterSpec.AEAD_CHACHA20_POLY1305 -> {
+ cipher = Cipher.getInstance("ChaCha20-Poly1305");
+ nk = 32;
+ }
+ case HPKEParameterSpec.EXPORT_ONLY -> {
+ cipher = null;
+ nk = -1;
+ }
+ default -> throw new InvalidAlgorithmParameterException(
+ "Unknown aead_id: " + id);
+ }
+ } catch (NoSuchAlgorithmException | NoSuchPaddingException e) {
+ throw new ProviderException("Internal error", e);
+ }
+ nn = 12; nt = 16;
+ }
+
+ void start(int opmode, SecretKey key, byte[] nonce) {
+ try {
+ if (id == HPKEParameterSpec.AEAD_CHACHA20_POLY1305) {
+ cipher.init(opmode, key, new IvParameterSpec(nonce));
+ } else {
+ cipher.init(opmode, key, new GCMParameterSpec(nt * 8, nonce));
+ }
+ } catch (InvalidAlgorithmParameterException | InvalidKeyException e) {
+ throw new ProviderException("Internal error", e);
+ }
+ }
+ }
+
+ private static class Impl {
+
+ final int opmode;
+
+ HPKEParameterSpec params;
+ Context context;
+ AEAD aead;
+
+ byte[] suite_id;
+ String kdfAlg;
+ int kdfNh;
+
+ // only used on sender side
+ byte[] kemEncaps;
+
+ class Context {
+ final SecretKey k; // null if only export
+ final byte[] base_nonce;
+ final SecretKey exporter_secret;
+
+ byte[] seq = new byte[aead.nn];
+
+ public Context(SecretKey sk, byte[] base_nonce,
+ SecretKey exporter_secret) {
+ this.k = sk;
+ this.base_nonce = base_nonce;
+ this.exporter_secret = exporter_secret;
+ }
+
+ SecretKey exportKey(String algorithm, byte[] exporter_context, int length) {
+ if (exporter_context == null) {
+ throw new IllegalArgumentException("Null exporter_context");
+ }
+ try {
+ var kdf = KDF.getInstance(kdfAlg);
+ return kdf.deriveKey(algorithm, DHKEM.labeledExpand(
+ exporter_secret, suite_id, SEC, exporter_context, length));
+ } catch (InvalidAlgorithmParameterException | NoSuchAlgorithmException e) {
+ // algorithm not accepted by HKDF, length too big or too small
+ throw new IllegalArgumentException("Invalid input", e);
+ }
+ }
+
+ byte[] exportData(byte[] exporter_context, int length) {
+ if (exporter_context == null) {
+ throw new IllegalArgumentException("Null exporter_context");
+ }
+ try {
+ var kdf = KDF.getInstance(kdfAlg);
+ return kdf.deriveData(DHKEM.labeledExpand(
+ exporter_secret, suite_id, SEC, exporter_context, length));
+ } catch (InvalidAlgorithmParameterException | NoSuchAlgorithmException e) {
+ // algorithm not accepted by HKDF, length too big or too small
+ throw new IllegalArgumentException("Invalid input", e);
+ }
+ }
+
+ private byte[] computeNonce() {
+ var result = new byte[aead.nn];
+ for (var i = 0; i < result.length; i++) {
+ result[i] = (byte)(seq[i] ^ base_nonce[i]);
+ }
+ return result;
+ }
+
+ private void IncrementSeq() {
+ for (var i = seq.length - 1; i >= 0; i--) {
+ if ((seq[i] & 0xff) == 0xff) {
+ seq[i] = 0;
+ } else {
+ seq[i]++;
+ return;
+ }
+ }
+ // seq >= (1 << (8*aead.Nn)) - 1 when this method is called
+ throw new ProviderException("MessageLimitReachedError");
+ }
+ }
+
+ public Impl(int opmode) {
+ this.opmode = opmode;
+ }
+
+ public boolean hasEncrypt() {
+ return params.aead_id() != 65535;
+ }
+
+ // Section 7.2.1 of RFC 9180 has restrictions on size of psk, psk_id,
+ // info, and exporter_context (~2^61 for HMAC-SHA256 and ~2^125 for
+ // HMAC-SHA384 and HMAC-SHA512). This method does not pose any
+ // restrictions.
+ public void init(AsymmetricKey key, HPKEParameterSpec p, SecureRandom rand)
+ throws InvalidKeyException, InvalidAlgorithmParameterException {
+ if (opmode != Cipher.ENCRYPT_MODE && opmode != Cipher.DECRYPT_MODE) {
+ throw new UnsupportedOperationException(
+ "Can only be used for encryption and decryption");
+ }
+ setParams(p);
+ SecretKey shared_secret;
+ if (opmode == Cipher.ENCRYPT_MODE) {
+ if (!(key instanceof PublicKey pk)) {
+ throw new InvalidKeyException(
+ "Cannot encrypt with private key");
+ }
+ if (p.encapsulation() != null) {
+ throw new InvalidAlgorithmParameterException(
+ "Must not provide key encapsulation message on sender side");
+ }
+ checkMatch(false, pk, params.kem_id());
+ KEM.Encapsulated enc;
+ switch (p.authKey()) {
+ case null -> {
+ var e = kem().newEncapsulator(pk, rand);
+ enc = e.encapsulate();
+ }
+ case PrivateKey skS -> {
+ checkMatch(true, skS, params.kem_id());
+ // AuthEncap not public KEM API but it's internally supported
+ var e = new DHKEM().engineNewAuthEncapsulator(pk, skS, null, rand);
+ enc = e.engineEncapsulate(0, e.engineSecretSize(), "Generic");
+ }
+ default -> throw new InvalidAlgorithmParameterException(
+ "Cannot auth with public key");
+ }
+ kemEncaps = enc.encapsulation();
+ shared_secret = enc.key();
+ } else {
+ if (!(key instanceof PrivateKey sk)) {
+ throw new InvalidKeyException("Cannot decrypt with public key");
+ }
+ checkMatch(false, sk, params.kem_id());
+ try {
+ var encap = p.encapsulation();
+ if (encap == null) {
+ throw new InvalidAlgorithmParameterException(
+ "Must provide key encapsulation message on recipient side");
+ }
+ switch (p.authKey()) {
+ case null -> {
+ var d = kem().newDecapsulator(sk);
+ shared_secret = d.decapsulate(encap);
+ }
+ case PublicKey pkS -> {
+ checkMatch(true, pkS, params.kem_id());
+ // AuthDecap not public KEM API but it's internally supported
+ var d = new DHKEM().engineNewAuthDecapsulator(sk, pkS, null);
+ shared_secret = d.engineDecapsulate(
+ encap, 0, d.engineSecretSize(), "Generic");
+ }
+ default -> throw new InvalidAlgorithmParameterException(
+ "Cannot auth with private key");
+ }
+ } catch (DecapsulateException e) {
+ throw new InvalidAlgorithmParameterException(e);
+ }
+ }
+
+ var usePSK = usePSK(params.psk());
+ int mode = params.authKey() == null ? (usePSK ? 1 : 0) : (usePSK ? 3 : 2);
+ context = keySchedule(mode, shared_secret,
+ params.info(),
+ params.psk(),
+ params.psk_id());
+ }
+
+ private static void checkMatch(boolean inSpec, AsymmetricKey k, int kem_id)
+ throws InvalidKeyException, InvalidAlgorithmParameterException {
+ var p = k.getParams();
+ switch (p) {
+ case ECParameterSpec ecp -> {
+ if ((!ECUtil.equals(ecp, CurveDB.P_256)
+ || kem_id != HPKEParameterSpec.KEM_DHKEM_P_256_HKDF_SHA256)
+ && (!ECUtil.equals(ecp, CurveDB.P_384)
+ || kem_id != HPKEParameterSpec.KEM_DHKEM_P_384_HKDF_SHA384)
+ && (!ECUtil.equals(ecp, CurveDB.P_521)
+ || kem_id != HPKEParameterSpec.KEM_DHKEM_P_521_HKDF_SHA512)) {
+ var name = ECUtil.getCurveName(ecp);
+ throw new InvalidAlgorithmParameterException(
+ name + " does not match " + kem_id);
+ }
+ }
+ case NamedParameterSpec ns -> {
+ var name = ns.getName();
+ if ((!name.equalsIgnoreCase("x25519")
+ || kem_id != HPKEParameterSpec.KEM_DHKEM_X25519_HKDF_SHA256)
+ && (!name.equalsIgnoreCase("x448")
+ || kem_id != HPKEParameterSpec.KEM_DHKEM_X448_HKDF_SHA512)) {
+ throw new InvalidAlgorithmParameterException(
+ name + " does not match " + kem_id);
+ }
+ }
+ case null, default -> {
+ var msg = k.getClass() + " does not match " + kem_id;
+ if (inSpec) {
+ throw new InvalidAlgorithmParameterException(msg);
+ } else {
+ throw new InvalidKeyException(msg);
+ }
+ }
+ }
+ }
+
+ private KEM kem() {
+ try {
+ return KEM.getInstance("DHKEM");
+ } catch (NoSuchAlgorithmException e) {
+ throw new ProviderException("Internal error", e);
+ }
+ }
+
+ private void setParams(HPKEParameterSpec p)
+ throws InvalidAlgorithmParameterException {
+ params = p;
+ suite_id = concat(
+ HPKE,
+ DHKEM.i2OSP(params.kem_id(), 2),
+ DHKEM.i2OSP(params.kdf_id(), 2),
+ DHKEM.i2OSP(params.aead_id(), 2));
+ switch (params.kdf_id()) {
+ case HPKEParameterSpec.KDF_HKDF_SHA256 -> {
+ kdfAlg = "HKDF-SHA256";
+ kdfNh = 32;
+ }
+ case HPKEParameterSpec.KDF_HKDF_SHA384 -> {
+ kdfAlg = "HKDF-SHA384";
+ kdfNh = 48;
+ }
+ case HPKEParameterSpec.KDF_HKDF_SHA512 -> {
+ kdfAlg = "HKDF-SHA512";
+ kdfNh = 64;
+ }
+ default -> throw new InvalidAlgorithmParameterException(
+ "Unsupported kdf_id: " + params.kdf_id());
+ }
+ aead = new AEAD(params.aead_id());
+ }
+
+ private Context keySchedule(int mode,
+ SecretKey shared_secret,
+ byte[] info,
+ SecretKey psk,
+ byte[] psk_id) {
+ try {
+ var psk_id_hash_x = DHKEM.labeledExtract(suite_id, PSK_ID_HASH)
+ .addIKM(psk_id).extractOnly();
+ var info_hash_x = DHKEM.labeledExtract(suite_id, INFO_HASH)
+ .addIKM(info).extractOnly();
+
+ // deriveData must and can be called because all info to
+ // thw builder are just byte arrays. Any KDF impl can handle this.
+ var kdf = KDF.getInstance(kdfAlg);
+ var key_schedule_context = concat(new byte[]{(byte) mode},
+ kdf.deriveData(psk_id_hash_x),
+ kdf.deriveData(info_hash_x));
+
+ var secret_x_builder = DHKEM.labeledExtract(suite_id, SECRET);
+ if (psk != null) {
+ secret_x_builder.addIKM(psk);
+ }
+ secret_x_builder.addSalt(shared_secret);
+ var secret_x = kdf.deriveKey("Generic", secret_x_builder.extractOnly());
+
+ // A new KDF object must be created because secret_x_builder
+ // might contain provider-specific keys which the previous
+ // KDF (provider already chosen) cannot handle.
+ kdf = KDF.getInstance(kdfAlg);
+ var exporter_secret = kdf.deriveKey("Generic", DHKEM.labeledExpand(
+ secret_x, suite_id, EXP, key_schedule_context, kdfNh));
+
+ if (hasEncrypt()) {
+ // ChaCha20-Poly1305 does not care about algorithm name
+ var key = kdf.deriveKey("AES", DHKEM.labeledExpand(secret_x,
+ suite_id, KEY, key_schedule_context, aead.nk));
+ // deriveData must be called because we need to increment nonce
+ var base_nonce = kdf.deriveData(DHKEM.labeledExpand(secret_x,
+ suite_id, BASE_NONCE, key_schedule_context, aead.nn));
+ return new Context(key, base_nonce, exporter_secret);
+ } else {
+ return new Context(null, null, exporter_secret);
+ }
+ } catch (InvalidAlgorithmParameterException
+ | NoSuchAlgorithmException | UnsupportedOperationException e) {
+ throw new ProviderException("Internal error", e);
+ }
+ }
+ }
+
+ private static boolean usePSK(SecretKey psk) {
+ return psk != null;
+ }
+
+ private static byte[] concat(byte[]... inputs) {
+ var o = new ByteArrayOutputStream();
+ Arrays.stream(inputs).forEach(o::writeBytes);
+ return o.toByteArray();
+ }
+}
diff --git a/src/java.base/share/classes/com/sun/crypto/provider/JceKeyStore.java b/src/java.base/share/classes/com/sun/crypto/provider/JceKeyStore.java
index ec8e0f3757d..ad98653b9c2 100644
--- a/src/java.base/share/classes/com/sun/crypto/provider/JceKeyStore.java
+++ b/src/java.base/share/classes/com/sun/crypto/provider/JceKeyStore.java
@@ -661,6 +661,10 @@ public final class JceKeyStore extends KeyStoreSpi {
dos.close();
}
}
+
+ if (debug != null) {
+ emitWeakKeyStoreWarning();
+ }
}
}
@@ -862,6 +866,10 @@ public final class JceKeyStore extends KeyStoreSpi {
secretKeyCount);
}
+ if (debug != null) {
+ emitWeakKeyStoreWarning();
+ }
+
/*
* If a password has been provided, we check the keyed digest
* at the end. If this check fails, the store has been tampered
@@ -978,4 +986,12 @@ public final class JceKeyStore extends KeyStoreSpi {
return Status.UNDECIDED;
}
}
+
+ private void emitWeakKeyStoreWarning() {
+ debug.println("WARNING: JCEKS uses outdated cryptographic "
+ + "algorithms and will be removed in a future "
+ + "release. Migrate to PKCS12 using:");
+ debug.println("keytool -importkeystore -srckeystore "
+ + "-destkeystore -deststoretype pkcs12");
+ }
}
diff --git a/src/java.base/share/classes/com/sun/crypto/provider/SunJCE.java b/src/java.base/share/classes/com/sun/crypto/provider/SunJCE.java
index 22d5f17c6e0..4b38bd55809 100644
--- a/src/java.base/share/classes/com/sun/crypto/provider/SunJCE.java
+++ b/src/java.base/share/classes/com/sun/crypto/provider/SunJCE.java
@@ -371,6 +371,8 @@ public final class SunJCE extends Provider {
ps("Cipher", "PBEWithHmacSHA512/256AndAES_256",
"com.sun.crypto.provider.PBES2Core$HmacSHA512_256AndAES_256");
+ ps("Cipher", "HPKE", "com.sun.crypto.provider.HPKE");
+
/*
* Key(pair) Generator engines
*/
diff --git a/src/java.base/share/classes/java/lang/Character.java b/src/java.base/share/classes/java/lang/Character.java
index d866202909c..b71849eaee7 100644
--- a/src/java.base/share/classes/java/lang/Character.java
+++ b/src/java.base/share/classes/java/lang/Character.java
@@ -743,7 +743,8 @@ class Character implements java.io.Serializable, Comparable, Constabl
*/
public static final class UnicodeBlock extends Subset {
/**
- * NUM_ENTITIES should match the total number of UnicodeBlocks.
+ * NUM_ENTITIES should match the total number of UnicodeBlock identifier
+ * names plus their aliases.
* It should be adjusted whenever the Unicode Character Database
* is upgraded.
*/
diff --git a/src/java.base/share/classes/java/lang/Class.java b/src/java.base/share/classes/java/lang/Class.java
index cfd2fc82235..eab1993a2b4 100644
--- a/src/java.base/share/classes/java/lang/Class.java
+++ b/src/java.base/share/classes/java/lang/Class.java
@@ -43,6 +43,7 @@ import java.lang.reflect.Executable;
import java.lang.reflect.Field;
import java.lang.reflect.GenericArrayType;
import java.lang.reflect.GenericDeclaration;
+import java.lang.reflect.GenericSignatureFormatError;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Member;
import java.lang.reflect.Method;
@@ -159,6 +160,10 @@ import sun.reflect.annotation.*;
* other members are the classes and interfaces whose declarations are
* enclosed within the top-level class declaration.
*
+ * Unless otherwise specified, methods in this class throw a
+ * {@link NullPointerException} when they are called with {@code null}
+ * or an array that contains {@code null} as an argument.
+ *
*
* A class or interface created by the invocation of
* {@link java.lang.invoke.MethodHandles.Lookup#defineHiddenClass(byte[], boolean, MethodHandles.Lookup.ClassOption...)
@@ -529,7 +534,8 @@ public final class Class implements java.io.Serializable,
* (which implies linking). See Section {@jls
* 12.4} of The Java Language
* Specification.
- * @param loader class loader from which the class must be loaded
+ * @param loader class loader from which the class must be loaded,
+ * may be {@code null}
* @return class object representing the desired class
*
* @throws LinkageError if the linkage fails
@@ -588,8 +594,6 @@ public final class Class implements java.io.Serializable,
* @return {@code Class} object of the given name defined in the given module;
* {@code null} if not found.
*
- * @throws NullPointerException if the given module or name is {@code null}
- *
* @throws LinkageError if the linkage fails
*
* @jls 12.2 Loading of Classes and Interfaces
@@ -619,8 +623,6 @@ public final class Class implements java.io.Serializable,
*
* @param primitiveName the name of the primitive type to find
*
- * @throws NullPointerException if the argument is {@code null}
- *
* @jls 4.2 Primitive Types and Values
* @jls 15.8.2 Class Literals
* @since 22
@@ -756,7 +758,7 @@ public final class Class implements java.io.Serializable,
* this {@code Class} object represents a primitive type, this method
* returns {@code false}.
*
- * @param obj the object to check
+ * @param obj the object to check, may be {@code null}
* @return true if {@code obj} is an instance of this class
*
* @since 1.1
@@ -786,8 +788,6 @@ public final class Class implements java.io.Serializable,
* @param cls the {@code Class} object to be checked
* @return the {@code boolean} value indicating whether objects of the
* type {@code cls} can be assigned to objects of this class
- * @throws NullPointerException if the specified Class parameter is
- * null.
* @since 1.1
*/
@IntrinsicCandidate
@@ -1445,7 +1445,6 @@ public final class Class implements java.io.Serializable,
if (!enclosingInfo.isMethod())
return null;
- // Descriptor already validated by VM
List> types = BytecodeDescriptor.parseMethod(enclosingInfo.getDescriptor(), getClassLoader());
Class> returnType = types.removeLast();
Class>[] parameterClasses = types.toArray(EMPTY_CLASS_ARRAY);
@@ -1533,8 +1532,15 @@ public final class Class implements java.io.Serializable,
String getName() { return name; }
- String getDescriptor() { return descriptor; }
-
+ String getDescriptor() {
+ // hotspot validates this descriptor to be either a field or method
+ // descriptor as the "type" in a NameAndType in verification.
+ // So this can still be a field descriptor
+ if (descriptor.isEmpty() || descriptor.charAt(0) != '(') {
+ throw new GenericSignatureFormatError("Bad method signature: " + descriptor);
+ }
+ return descriptor;
+ }
}
private static Class> toClass(Type o) {
@@ -1567,7 +1573,6 @@ public final class Class implements java.io.Serializable,
if (!enclosingInfo.isConstructor())
return null;
- // Descriptor already validated by VM
List> types = BytecodeDescriptor.parseMethod(enclosingInfo.getDescriptor(), getClassLoader());
types.removeLast();
Class>[] parameterClasses = types.toArray(EMPTY_CLASS_ARRAY);
@@ -2051,7 +2056,6 @@ public final class Class implements java.io.Serializable,
* {@code name}
* @throws NoSuchFieldException if a field with the specified name is
* not found.
- * @throws NullPointerException if {@code name} is {@code null}
*
* @since 1.1
* @jls 8.2 Class Members
@@ -2142,13 +2146,13 @@ public final class Class implements java.io.Serializable,
* overriding method as it would have a more specific return type.
*
* @param name the name of the method
- * @param parameterTypes the list of parameters
+ * @param parameterTypes the list of parameters, may be {@code null}
* @return the {@code Method} object that matches the specified
* {@code name} and {@code parameterTypes}
- * @throws NoSuchMethodException if a matching method is not found
+ * @throws NoSuchMethodException if a matching method is not found,
+ * if {@code parameterTypes} contains {@code null},
* or if the name is {@value ConstantDescs#INIT_NAME} or
- * {@value ConstantDescs#CLASS_INIT_NAME}.
- * @throws NullPointerException if {@code name} is {@code null}
+ * {@value ConstantDescs#CLASS_INIT_NAME}
*
* @jls 8.2 Class Members
* @jls 8.4 Method Declarations
@@ -2179,12 +2183,13 @@ public final class Class implements java.io.Serializable,
* represented by this {@code Class} object whose formal parameter
* types match those specified by {@code parameterTypes}.
*
- * @param parameterTypes the parameter array
+ * @param parameterTypes the parameter array, may be {@code null}
* @return the {@code Constructor} object of the public constructor that
* matches the specified {@code parameterTypes}
* @throws NoSuchMethodException if a matching constructor is not found,
- * including when this {@code Class} object represents
- * an interface, a primitive type, an array class, or void.
+ * if this {@code Class} object represents an interface, a primitive
+ * type, an array class, or void, or if {@code parameterTypes}
+ * contains {@code null}
*
* @see #getDeclaredConstructor(Class[])
* @since 1.1
@@ -2365,7 +2370,6 @@ public final class Class implements java.io.Serializable,
* class
* @throws NoSuchFieldException if a field with the specified name is
* not found.
- * @throws NullPointerException if {@code name} is {@code null}
*
* @since 1.1
* @jls 8.2 Class Members
@@ -2400,11 +2404,13 @@ public final class Class implements java.io.Serializable,
* method does not find the {@code clone()} method.
*
* @param name the name of the method
- * @param parameterTypes the parameter array
- * @return the {@code Method} object for the method of this class
- * matching the specified name and parameters
- * @throws NoSuchMethodException if a matching method is not found.
- * @throws NullPointerException if {@code name} is {@code null}
+ * @param parameterTypes the parameter array, may be {@code null}
+ * @return the {@code Method} object for the method of this class
+ * matching the specified name and parameters
+ * @throws NoSuchMethodException if a matching method is not found,
+ * if {@code parameterTypes} contains {@code null},
+ * or if the name is {@value ConstantDescs#INIT_NAME} or
+ * {@value ConstantDescs#CLASS_INIT_NAME}
*
* @jls 8.2 Class Members
* @jls 8.4 Method Declarations
@@ -2471,12 +2477,13 @@ public final class Class implements java.io.Serializable,
* declared in a non-static context, the formal parameter types
* include the explicit enclosing instance as the first parameter.
*
- * @param parameterTypes the parameter array
+ * @param parameterTypes the parameter array, may be {@code null}
* @return The {@code Constructor} object for the constructor with the
* specified parameter list
* @throws NoSuchMethodException if a matching constructor is not found,
- * including when this {@code Class} object represents
- * an interface, a primitive type, an array class, or void.
+ * if this {@code Class} object represents an interface, a
+ * primitive type, an array class, or void, or if
+ * {@code parameterTypes} contains {@code null}
*
* @see #getConstructor(Class[])
* @since 1.1
@@ -2535,7 +2542,6 @@ public final class Class implements java.io.Serializable,
* resource with this name is found, or the resource is in a package
* that is not {@linkplain Module#isOpen(String, Module) open} to at
* least the caller module.
- * @throws NullPointerException If {@code name} is {@code null}
*
* @see Module#getResourceAsStream(String)
* @since 1.1
@@ -2631,7 +2637,6 @@ public final class Class implements java.io.Serializable,
* resource is in a package that is not
* {@linkplain Module#isOpen(String, Module) open} to at least the caller
* module.
- * @throws NullPointerException If {@code name} is {@code null}
* @since 1.1
*/
@CallerSensitive
@@ -3473,7 +3478,7 @@ public final class Class implements java.io.Serializable,
* Casts an object to the class or interface represented
* by this {@code Class} object.
*
- * @param obj the object to be cast
+ * @param obj the object to be cast, may be {@code null}
* @return the object after casting, or null if obj is null
*
* @throws ClassCastException if the object is not
@@ -3528,7 +3533,6 @@ public final class Class implements java.io.Serializable,
* Note that any annotation returned by this method is a
* declaration annotation.
*
- * @throws NullPointerException {@inheritDoc}
* @since 1.5
*/
@Override
@@ -3541,7 +3545,6 @@ public final class Class implements java.io.Serializable,
/**
* {@inheritDoc}
- * @throws NullPointerException {@inheritDoc}
* @since 1.5
*/
@Override
@@ -3554,7 +3557,6 @@ public final class Class implements java.io.Serializable,
* Note that any annotations returned by this method are
* declaration annotations.
*
- * @throws NullPointerException {@inheritDoc}
* @since 1.8
*/
@Override
@@ -3584,7 +3586,6 @@ public final class Class implements java.io.Serializable,
* Note that any annotation returned by this method is a
* declaration annotation.
*
- * @throws NullPointerException {@inheritDoc}
* @since 1.8
*/
@Override
@@ -3600,7 +3601,6 @@ public final class Class