diff --git a/doc/hotspot-style.html b/doc/hotspot-style.html
index 7be6867b3ca..f1c25dab7f4 100644
--- a/doc/hotspot-style.html
+++ b/doc/hotspot-style.html
@@ -1859,8 +1859,6 @@ difference.
Additional Undecided
Features
-Trailing return type syntax for functions (n2541)
Member initializers and aggregates (n3653)
Rvalue references and move semantics
diff --git a/doc/hotspot-style.md b/doc/hotspot-style.md
index facdf68462f..e49f49ec1c9 100644
--- a/doc/hotspot-style.md
+++ b/doc/hotspot-style.md
@@ -1853,9 +1853,6 @@ See Object Lifetime: C++17 6.8/8, C++20 6.7.3/8
### Additional Undecided Features
-* Trailing return type syntax for functions
-([n2541](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2541.htm))
-
* Member initializers and aggregates
([n3653](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3653.html))
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index 6072cbc74dd..9d58a280998 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -37,56 +37,25 @@ AC_DEFUN([FLAGS_SETUP_SHARED_LIBS],
if test "x$TOOLCHAIN_TYPE" = xgcc; then
# Default works for linux, might work on other platforms as well.
SHARED_LIBRARY_FLAGS='-shared'
- # --disable-new-dtags forces use of RPATH instead of RUNPATH for rpaths.
- # This protects internal library dependencies within the JDK from being
- # overridden using LD_LIBRARY_PATH. See JDK-8326891 for more information.
- SET_EXECUTABLE_ORIGIN='-Wl,-rpath,\$$ORIGIN[$]1 -Wl,--disable-new-dtags'
- SET_SHARED_LIBRARY_ORIGIN="-Wl,-z,origin $SET_EXECUTABLE_ORIGIN"
- SET_SHARED_LIBRARY_NAME='-Wl,-soname=[$]1'
elif test "x$TOOLCHAIN_TYPE" = xclang; then
if test "x$OPENJDK_TARGET_OS" = xmacosx; then
# Linking is different on MacOSX
SHARED_LIBRARY_FLAGS="-dynamiclib -compatibility_version 1.0.0 -current_version 1.0.0"
- SET_EXECUTABLE_ORIGIN='-Wl,-rpath,@loader_path$(or [$]1,/.)'
- SET_SHARED_LIBRARY_ORIGIN="$SET_EXECUTABLE_ORIGIN"
- SET_SHARED_LIBRARY_NAME='-Wl,-install_name,@rpath/[$]1'
elif test "x$OPENJDK_TARGET_OS" = xaix; then
# Linking is different on aix
SHARED_LIBRARY_FLAGS="-shared -Wl,-bM:SRE -Wl,-bnoentry"
- SET_EXECUTABLE_ORIGIN=""
- SET_SHARED_LIBRARY_ORIGIN=''
- SET_SHARED_LIBRARY_NAME=''
else
# Default works for linux, might work on other platforms as well.
SHARED_LIBRARY_FLAGS='-shared'
- SET_EXECUTABLE_ORIGIN='-Wl,-rpath,\$$ORIGIN[$]1'
- if test "x$OPENJDK_TARGET_OS" = xlinux; then
- SET_EXECUTABLE_ORIGIN="$SET_EXECUTABLE_ORIGIN -Wl,--disable-new-dtags"
- fi
- SET_SHARED_LIBRARY_NAME='-Wl,-soname=[$]1'
-
- # arm specific settings
- if test "x$OPENJDK_TARGET_CPU" = "xarm"; then
- # '-Wl,-z,origin' isn't used on arm.
- SET_SHARED_LIBRARY_ORIGIN='-Wl,-rpath,\$$$$ORIGIN[$]1'
- else
- SET_SHARED_LIBRARY_ORIGIN="-Wl,-z,origin $SET_EXECUTABLE_ORIGIN"
- fi
fi
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
SHARED_LIBRARY_FLAGS="-dll"
- SET_EXECUTABLE_ORIGIN=''
- SET_SHARED_LIBRARY_ORIGIN=''
- SET_SHARED_LIBRARY_NAME=''
fi
- AC_SUBST(SET_EXECUTABLE_ORIGIN)
- AC_SUBST(SET_SHARED_LIBRARY_ORIGIN)
- AC_SUBST(SET_SHARED_LIBRARY_NAME)
AC_SUBST(SHARED_LIBRARY_FLAGS)
])
@@ -934,48 +903,6 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP],
IF_FALSE: [$2FDLIBM_CFLAGS=""])
fi
AC_SUBST($2FDLIBM_CFLAGS)
-
- # Check whether the compiler supports the Arm C Language Extensions (ACLE)
- # for SVE. Set SVE_CFLAGS to -march=armv8-a+sve if it does.
- # ACLE and this flag are required to build the aarch64 SVE related functions in
- # libvectormath. Apple Silicon does not support SVE; use macOS as a proxy for
- # that check.
- if test "x$OPENJDK_TARGET_CPU" = "xaarch64" && test "x$OPENJDK_TARGET_OS" = "xlinux"; then
- if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
- AC_LANG_PUSH(C)
- OLD_CFLAGS="$CFLAGS"
- CFLAGS="$CFLAGS -march=armv8-a+sve"
- AC_MSG_CHECKING([if Arm SVE ACLE is supported])
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ],
- [
- svint32_t r = svdup_n_s32(1);
- return 0;
- ])],
- [
- AC_MSG_RESULT([yes])
- $2SVE_CFLAGS="-march=armv8-a+sve"
- # Switching the initialization mode with gcc from 'pattern' to 'zero'
- # avoids the use of unsupported `__builtin_clear_padding` for variable
- # length aggregates
- if test "x$DEBUG_LEVEL" != xrelease && test "x$TOOLCHAIN_TYPE" = xgcc ; then
- INIT_ZERO_FLAG="-ftrivial-auto-var-init=zero"
- FLAGS_COMPILER_CHECK_ARGUMENTS(ARGUMENT: [$INIT_ZERO_FLAG],
- IF_TRUE: [
- $2SVE_CFLAGS="${$2SVE_CFLAGS} $INIT_ZERO_FLAG"
- ]
- )
- fi
- ],
- [
- AC_MSG_RESULT([no])
- $2SVE_CFLAGS=""
- ]
- )
- CFLAGS="$OLD_CFLAGS"
- AC_LANG_POP(C)
- fi
- fi
- AC_SUBST($2SVE_CFLAGS)
])
AC_DEFUN_ONCE([FLAGS_SETUP_BRANCH_PROTECTION],
diff --git a/make/autoconf/flags-ldflags.m4 b/make/autoconf/flags-ldflags.m4
index a12a6e7f9a6..651be3a1913 100644
--- a/make/autoconf/flags-ldflags.m4
+++ b/make/autoconf/flags-ldflags.m4
@@ -98,7 +98,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
# Setup OS-dependent LDFLAGS
if test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang; then
- # FIXME: We should really generalize SET_SHARED_LIBRARY_ORIGIN instead.
+ # FIXME: We should really generalize SetSharedLibraryOrigin instead.
OS_LDFLAGS_JVM_ONLY="-Wl,-rpath,@loader_path/. -Wl,-rpath,@loader_path/.."
OS_LDFLAGS="-mmacosx-version-min=$MACOSX_VERSION_MIN -Wl,-reproducible"
fi
diff --git a/make/autoconf/flags-other.m4 b/make/autoconf/flags-other.m4
index 9d41cf04791..4570f6ede78 100644
--- a/make/autoconf/flags-other.m4
+++ b/make/autoconf/flags-other.m4
@@ -107,6 +107,62 @@ AC_DEFUN([FLAGS_SETUP_NMFLAGS],
AC_SUBST(NMFLAGS)
])
+# Check whether the compiler supports the Arm C Language Extensions (ACLE)
+# for SVE. Set SVE_CFLAGS to -march=armv8-a+sve if it does.
+# ACLE and this flag are required to build the aarch64 SVE related functions
+# in libvectormath.
+AC_DEFUN([FLAGS_SETUP_SVE],
+[
+ AARCH64_SVE_AVAILABLE=false
+ # Apple Silicon does not support SVE; use macOS as a proxy for that check.
+ if test "x$OPENJDK_TARGET_CPU" = "xaarch64" && test "x$OPENJDK_TARGET_OS" = "xlinux"; then
+ if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
+ # check the compiler and binutils support sve or not
+ AC_MSG_CHECKING([if Arm SVE ACLE is supported])
+ AC_LANG_PUSH([C])
+ saved_cflags="$CFLAGS"
+ CFLAGS="$CFLAGS -march=armv8-a+sve $CFLAGS_WARNINGS_ARE_ERRORS ARG_ARGUMENT"
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+ [
+ #include
+ svfloat64_t a() {}
+ ],
+ [
+ svint32_t r = svdup_n_s32(1)
+ ])],
+ [
+ AARCH64_SVE_AVAILABLE=true
+ ]
+ )
+ CFLAGS="$saved_cflags"
+ AC_LANG_POP([C])
+ AC_MSG_RESULT([$AARCH64_SVE_AVAILABLE])
+ fi
+ fi
+
+ UTIL_ARG_ENABLE(NAME: aarch64-sve, DEFAULT: auto,
+ RESULT: AARCH64_SVE_ENABLED,
+ DESC: [Use SVE when compiling libsleef],
+ AVAILABLE: $AARCH64_SVE_AVAILABLE)
+ SVE_CFLAGS=""
+ if test "x$AARCH64_SVE_ENABLED" = xtrue; then
+ SVE_CFLAGS="-march=armv8-a+sve"
+ # Switching the initialization mode with gcc from 'pattern' to 'zero'
+ # avoids the use of unsupported `__builtin_clear_padding` for variable
+ # length aggregates
+ if test "x$DEBUG_LEVEL" != xrelease && test "x$TOOLCHAIN_TYPE" = xgcc ; then
+ AC_MSG_CHECKING([Switching the initialization mode with gcc from pattern to zero])
+ INIT_ZERO_FLAG="-ftrivial-auto-var-init=zero"
+ FLAGS_COMPILER_CHECK_ARGUMENTS(ARGUMENT: [$INIT_ZERO_FLAG],
+ IF_TRUE: [
+ SVE_CFLAGS="${SVE_CFLAGS} $INIT_ZERO_FLAG"
+ ]
+ )
+ fi
+ fi
+ AC_SUBST(SVE_CFLAGS)
+])
+
################################################################################
# platform independent
AC_DEFUN([FLAGS_SETUP_ASFLAGS],
diff --git a/make/autoconf/flags.m4 b/make/autoconf/flags.m4
index c810d15ebbc..10647305757 100644
--- a/make/autoconf/flags.m4
+++ b/make/autoconf/flags.m4
@@ -374,6 +374,7 @@ AC_DEFUN([FLAGS_SETUP_FLAGS],
FLAGS_SETUP_RCFLAGS
FLAGS_SETUP_NMFLAGS
+ FLAGS_SETUP_SVE
FLAGS_SETUP_ASFLAGS
FLAGS_SETUP_ASFLAGS_CPU_DEP([TARGET])
FLAGS_SETUP_ASFLAGS_CPU_DEP([BUILD], [OPENJDK_BUILD_])
diff --git a/make/autoconf/lib-tests.m4 b/make/autoconf/lib-tests.m4
index 9eb5ee5a046..23f3d443a6c 100644
--- a/make/autoconf/lib-tests.m4
+++ b/make/autoconf/lib-tests.m4
@@ -28,7 +28,7 @@
################################################################################
# Minimum supported versions
-JTREG_MINIMUM_VERSION=7.5.2
+JTREG_MINIMUM_VERSION=8
GTEST_MINIMUM_VERSION=1.14.0
################################################################################
diff --git a/make/autoconf/spec.gmk.template b/make/autoconf/spec.gmk.template
index ab6bb51c27e..4eb5aa2f66d 100644
--- a/make/autoconf/spec.gmk.template
+++ b/make/autoconf/spec.gmk.template
@@ -624,17 +624,8 @@ ASFLAGS_DEBUG_SYMBOLS := @ASFLAGS_DEBUG_SYMBOLS@
# Compress (or not) jars
COMPRESS_JARS := @COMPRESS_JARS@
-# Options to linker to specify the library name.
-# (Note absence of := assignment, because we do not want to evaluate the macro body here)
-SET_SHARED_LIBRARY_NAME = @SET_SHARED_LIBRARY_NAME@
-
SHARED_LIBRARY_FLAGS := @SHARED_LIBRARY_FLAGS@
-# Set origin using the linker, ie use the relative path to the dependent library to find the dependencies.
-# (Note absence of := assignment, because we do not want to evaluate the macro body here)
-SET_SHARED_LIBRARY_ORIGIN = @SET_SHARED_LIBRARY_ORIGIN@
-SET_EXECUTABLE_ORIGIN = @SET_EXECUTABLE_ORIGIN@
-
LIBRARY_PREFIX := @LIBRARY_PREFIX@
SHARED_LIBRARY_SUFFIX := @SHARED_LIBRARY_SUFFIX@
STATIC_LIBRARY_SUFFIX := @STATIC_LIBRARY_SUFFIX@
diff --git a/make/common/Execute.gmk b/make/common/Execute.gmk
index 4199e8f13b7..0311c4ecba1 100644
--- a/make/common/Execute.gmk
+++ b/make/common/Execute.gmk
@@ -148,9 +148,12 @@ define SetupExecuteBody
$1_INFO := Running commands for $1
endif
+ $1_VARDEPS := $$($1_COMMAND) $$($1_PRE_COMMAND) $$($1_POST_COMMAND)
+ $1_VARDEPS_FILE := $$(call DependOnVariable, $1_VARDEPS)
+
ifneq ($$($1_PRE_COMMAND), )
- $$($1_PRE_MARKER): $$($1_DEPS)
+ $$($1_PRE_MARKER): $$($1_DEPS) $$($1_VARDEPS_FILE)
ifneq ($$($1_WARN), )
$$(call LogWarn, $$($1_WARN))
endif
@@ -176,7 +179,7 @@ define SetupExecuteBody
$1 := $$($1_PRE_MARKER) $$($1_EXEC_RESULT)
else
- $$($1_EXEC_RESULT): $$($1_DEPS)
+ $$($1_EXEC_RESULT): $$($1_DEPS) $$($1_VARDEPS_FILE)
ifneq ($$($1_WARN), )
$$(call LogWarn, $$($1_WARN))
endif
diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk
index 0285669ffd8..29001e09bd0 100644
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -30,6 +30,47 @@ ifeq ($(INCLUDE), true)
include NativeCompilation.gmk
+ifeq ($(call isCompiler, gcc), true)
+ # --disable-new-dtags forces use of RPATH instead of RUNPATH for rpaths.
+ # This protects internal library dependencies within the JDK from being
+ # overridden using LD_LIBRARY_PATH. See JDK-8326891 for more information.
+ SetExecutableOrigin = \
+ -Wl,-rpath,\$(DOLLAR)ORIGIN$1 -Wl,--disable-new-dtags
+ SetSharedLibraryOrigin = \
+ -Wl,-z,origin -Wl,-rpath,\$(DOLLAR)ORIGIN$1 -Wl,--disable-new-dtags
+else ifeq ($(call isCompiler, clang), true)
+ ifeq ($(call isTargetOs, macosx), true)
+ SetExecutableOrigin = \
+ -Wl,-rpath,@loader_path$(or $1,/.)
+ SetSharedLibraryOrigin = \
+ -Wl,-rpath,@loader_path$(or $1,/.)
+ else ifeq ($(call isTargetOs, aix), true)
+ SetExecutableOrigin =
+ SetSharedLibraryOrigin =
+ else
+ ifeq ($(call isTargetOs, linux), true)
+ SetExecutableOrigin = \
+ -Wl,-rpath,\$(DOLLAR)ORIGIN$1 -Wl,--disable-new-dtags
+ else
+ SetExecutableOrigin = \
+ -Wl,-rpath,\$(DOLLAR)ORIGIN$1
+ endif
+
+ ifeq ($(call isTargetOs, arm), true)
+ SetSharedLibraryOrigin = \
+ -Wl,-rpath,\$(DOLLAR)ORIGIN$1
+ else
+ SetSharedLibraryOrigin = \
+ -Wl,-z,origin -Wl,-rpath,\$(DOLLAR)ORIGIN$1
+ endif
+ endif
+else ifeq ($(call isCompiler, microsoft), true)
+ SetExecutableOrigin =
+ SetSharedLibraryOrigin =
+else
+ $(error Unknown toolchain)
+endif
+
FindSrcDirsForComponent += \
$(call uniq, $(wildcard \
$(TOPDIR)/src/$(strip $1)/$(OPENJDK_TARGET_OS)/native/$(strip $2) \
@@ -444,9 +485,9 @@ define SetupJdkNativeCompilationBody
ifneq ($$($1_LD_SET_ORIGIN), false)
ifeq ($$($1_TYPE), EXECUTABLE)
- $1_LDFLAGS += $$(call SET_EXECUTABLE_ORIGIN)
+ $1_LDFLAGS += $$(call SetExecutableOrigin)
else
- $1_LDFLAGS += $$(call SET_SHARED_LIBRARY_ORIGIN)
+ $1_LDFLAGS += $$(call SetSharedLibraryOrigin)
endif
endif
# APPEND_LDFLAGS, if it exists, must be set after the origin flags
diff --git a/make/common/modules/LauncherCommon.gmk b/make/common/modules/LauncherCommon.gmk
index 700c0de74d5..0b420df5684 100644
--- a/make/common/modules/LauncherCommon.gmk
+++ b/make/common/modules/LauncherCommon.gmk
@@ -156,8 +156,8 @@ define SetupBuildLauncherBody
DISABLED_WARNINGS_gcc := unused-function unused-variable, \
DISABLED_WARNINGS_clang := unused-function, \
LDFLAGS := $$($1_LDFLAGS), \
- LDFLAGS_linux := $$(call SET_EXECUTABLE_ORIGIN,/../lib), \
- LDFLAGS_macosx := $$(call SET_EXECUTABLE_ORIGIN,/../lib), \
+ LDFLAGS_linux := $$(call SetExecutableOrigin,/../lib), \
+ LDFLAGS_macosx := $$(call SetExecutableOrigin,/../lib), \
LDFLAGS_FILTER_OUT := $$($1_LDFLAGS_FILTER_OUT), \
JDK_LIBS := $$($1_JDK_LIBS), \
JDK_LIBS_windows := $$($1_JDK_LIBS_windows), \
diff --git a/make/common/native/Link.gmk b/make/common/native/Link.gmk
index e888edfcc4c..855e50bddfb 100644
--- a/make/common/native/Link.gmk
+++ b/make/common/native/Link.gmk
@@ -50,6 +50,26 @@ GetEntitlementsFile = \
$(if $(wildcard $f), $f, $(DEFAULT_ENTITLEMENTS_FILE)) \
)
+ifeq ($(call isCompiler, gcc), true)
+ SetSharedLibraryName = \
+ -Wl,-soname=$1
+else ifeq ($(call isCompiler, clang), true)
+ ifeq ($(call isTargetOs, macosx), true)
+ SetSharedLibraryName = \
+ -Wl,-install_name,@rpath/$1
+ else ifeq ($(call isTargetOs, aix), true)
+ SetSharedLibraryName =
+ else
+ # Default works for linux, might work on other platforms as well.
+ SetSharedLibraryName = \
+ -Wl,-soname=$1
+ endif
+else ifeq ($(call isCompiler, microsoft), true)
+ SetSharedLibraryName =
+else
+ $(error Unknown toolchain)
+endif
+
################################################################################
define SetupLinking
# Unless specifically set, stripping should only happen if symbols are also
@@ -131,7 +151,7 @@ define CreateDynamicLibraryOrExecutable
# A shared dynamic library or an executable binary has been specified
ifeq ($$($1_TYPE), LIBRARY)
# Generating a dynamic library.
- $1_EXTRA_LDFLAGS += $$(call SET_SHARED_LIBRARY_NAME,$$($1_BASENAME))
+ $1_EXTRA_LDFLAGS += $$(call SetSharedLibraryName,$$($1_BASENAME))
endif
ifeq ($(MACOSX_CODESIGN_MODE), hardened)
diff --git a/make/conf/github-actions.conf b/make/conf/github-actions.conf
index d2b6cd23128..438e4b3ce8d 100644
--- a/make/conf/github-actions.conf
+++ b/make/conf/github-actions.conf
@@ -26,7 +26,7 @@
# Versions and download locations for dependencies used by GitHub Actions (GHA)
GTEST_VERSION=1.14.0
-JTREG_VERSION=7.5.2+1
+JTREG_VERSION=8+2
LINUX_X64_BOOT_JDK_EXT=tar.gz
LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk24/1f9ff9062db4449d8ca828c504ffae90/36/GPL/openjdk-24_linux-x64_bin.tar.gz
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index d4877604a90..7ed72005ced 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -1174,9 +1174,9 @@ var getJibProfilesDependencies = function (input, common) {
jtreg: {
server: "jpg",
product: "jtreg",
- version: "7.5.2",
- build_number: "1",
- file: "bundles/jtreg-7.5.2+1.zip",
+ version: "8",
+ build_number: "2",
+ file: "bundles/jtreg-8+2.zip",
environment_name: "JT_HOME",
environment_path: input.get("jtreg", "home_path") + "/bin",
configure_args: "--with-jtreg=" + input.get("jtreg", "home_path"),
diff --git a/make/hotspot/lib/CompileGtest.gmk b/make/hotspot/lib/CompileGtest.gmk
index 30d3e3b524c..d615e254f5a 100644
--- a/make/hotspot/lib/CompileGtest.gmk
+++ b/make/hotspot/lib/CompileGtest.gmk
@@ -152,7 +152,7 @@ $(eval $(call SetupJdkExecutable, BUILD_GTEST_LAUNCHER, \
-I$(GTEST_FRAMEWORK_SRC)/googlemock \
-I$(GTEST_FRAMEWORK_SRC)/googlemock/include, \
LD_SET_ORIGIN := false, \
- LDFLAGS_unix := $(call SET_SHARED_LIBRARY_ORIGIN), \
+ LDFLAGS_unix := $(call SetSharedLibraryOrigin), \
JDK_LIBS := gtest:libjvm, \
COPY_DEBUG_SYMBOLS := $(GTEST_COPY_DEBUG_SYMBOLS), \
ZIP_EXTERNAL_DEBUG_SYMBOLS := false, \
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 0fd1c752174..79bbd6a4106 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -57,7 +57,7 @@ ifeq ($(call check-jvm-feature, zero), true)
-DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
ifeq ($(ENABLE_LIBFFI_BUNDLING), true)
- JVM_LDFLAGS_FEATURES += $(call SET_EXECUTABLE_ORIGIN,/..)
+ JVM_LDFLAGS_FEATURES += $(call SetExecutableOrigin,/..)
endif
else
JVM_EXCLUDE_PATTERNS += /zero/
diff --git a/make/modules/java.base/Gensrc.gmk b/make/modules/java.base/Gensrc.gmk
index 2750a6c8791..79db438934e 100644
--- a/make/modules/java.base/Gensrc.gmk
+++ b/make/modules/java.base/Gensrc.gmk
@@ -33,7 +33,6 @@ include gensrc/GensrcBuffer.gmk
include gensrc/GensrcCharacterData.gmk
include gensrc/GensrcCharsetCoder.gmk
include gensrc/GensrcCharsetMapping.gmk
-include gensrc/GensrcExceptions.gmk
include gensrc/GensrcMisc.gmk
include gensrc/GensrcModuleLoaderMap.gmk
include gensrc/GensrcRegex.gmk
diff --git a/make/modules/java.base/gensrc/GensrcExceptions.gmk b/make/modules/java.base/gensrc/GensrcExceptions.gmk
deleted file mode 100644
index baa61596d6b..00000000000
--- a/make/modules/java.base/gensrc/GensrcExceptions.gmk
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-include MakeIncludeStart.gmk
-ifeq ($(INCLUDE), true)
-
-################################################################################
-
-GENSRC_EXCEPTIONS :=
-
-GENSRC_EXCEPTIONS_DST := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/nio
-
-GENSRC_EXCEPTIONS_SRC := $(MODULE_SRC)/share/classes/java/nio
-GENSRC_EXCEPTIONS_CMD := $(TOPDIR)/make/scripts/genExceptions.sh
-
-GENSRC_EXCEPTIONS_SRC_DIRS := . charset channels
-
-$(GENSRC_EXCEPTIONS_DST)/_the.%.marker: $(GENSRC_EXCEPTIONS_SRC)/%/exceptions \
- $(GENSRC_EXCEPTIONS_CMD)
- $(call LogInfo, Generating exceptions java.nio $*)
- $(call MakeDir, $(@D)/$*)
- SCRIPTS="$(TOPDIR)/make/scripts" AWK="$(AWK)" SH="$(SH)" $(SH) \
- $(GENSRC_EXCEPTIONS_CMD) $< $(@D)/$* $(LOG_DEBUG)
- $(TOUCH) $@
-
-GENSRC_EXCEPTIONS += $(foreach D, $(GENSRC_EXCEPTIONS_SRC_DIRS), $(GENSRC_EXCEPTIONS_DST)/_the.$(D).marker)
-
-$(GENSRC_EXCEPTIONS): $(BUILD_TOOLS_JDK)
-
-TARGETS += $(GENSRC_EXCEPTIONS)
-
-################################################################################
-
-endif # include guard
-include MakeIncludeEnd.gmk
diff --git a/make/scripts/addNotices.sh b/make/scripts/addNotices.sh
deleted file mode 100644
index d9864818a14..00000000000
--- a/make/scripts/addNotices.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#! /bin/sh
-#
-# Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-# Parse the first contiguous comment block in this script and generate
-# a java comment block. If this script is invoked with a copyright
-# year/year range, the java comment block will contain a Sun copyright.
-
-COPYRIGHT_YEARS="$1"
-
-cat <<__END__
-/*
-__END__
-
-if [ "x$COPYRIGHT_YEARS" != x ]; then
- cat <<__END__
- * Copyright (c) $COPYRIGHT_YEARS Oracle and/or its affiliates. All rights reserved.
-__END__
-fi
-
-$AWK ' /^#.*Copyright.*Oracle/ { next }
- /^#([^!]|$)/ { sub(/^#/, " *"); print }
- /^$/ { print " */"; exit } ' $0
diff --git a/make/scripts/genExceptions.sh b/make/scripts/genExceptions.sh
deleted file mode 100644
index 7c191189827..00000000000
--- a/make/scripts/genExceptions.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#! /bin/sh
-#
-# Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-# Generate exception classes
-
-SPEC=$1
-DST=$2
-
-gen() {
- ID=$1
- WHAT=$2
- SVUID=$3
- ARG_TYPE=$4
- ARG_ID=$5
- ARG_PROP=$6
- ARG_PHRASE=$7
- ARG_PARAM="$ARG_TYPE$ $ARG_ID"
- echo '-->' $DST/$ID.java
- out=$DST/${ID}.java
-
- $SH ${SCRIPTS}/addNotices.sh "$COPYRIGHT_YEARS" > $out
-
- cat >>$out <<__END__
-
-// -- This file was mechanically generated: Do not edit! -- //
-
-package $PACKAGE;
-
-
-/**$WHAT
- *
- * @since $SINCE
- */
-
-public `if [ ${ABSTRACT:-0} = 1 ];
- then echo 'abstract '; fi`class $ID
- extends ${SUPER}
-{
-
- @java.io.Serial
- private static final long serialVersionUID = $SVUID;
-__END__
-
- if [ $ARG_ID ]; then
-
- cat >>$out <<__END__
-
- /**
- * The $ARG_PHRASE.
- *
- * @serial
- */
- private $ARG_TYPE $ARG_ID;
-
- /**
- * Constructs an instance of this class.
- *
- * @param $ARG_ID
- * The $ARG_PHRASE
- */
- public $ID($ARG_TYPE $ARG_ID) {
- super(String.valueOf($ARG_ID));
- this.$ARG_ID = $ARG_ID;
- }
-
- /**
- * Retrieves the $ARG_PHRASE.
- *
- * @return The $ARG_PHRASE
- */
- public $ARG_TYPE get$ARG_PROP() {
- return $ARG_ID;
- }
-
-}
-__END__
-
- else
-
- cat >>$out <<__END__
-
- /**
- * Constructs an instance of this class.
- */
- public $ID() { }
-
-}
-__END__
-
- fi
-}
-
-. $SPEC
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index 67c4dad27a7..ef35b66003d 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -216,11 +216,6 @@ source %{
return false;
}
break;
- case Op_ExpandV:
- if (UseSVE < 2 || is_subword_type(bt)) {
- return false;
- }
- break;
case Op_VectorMaskToLong:
if (UseSVE > 0 && vlen > 64) {
return false;
@@ -7113,10 +7108,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg,
ins_pipe(pipe_slow);
%}
-instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
+instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{
+ predicate(UseSVE == 0);
+ match(Set dst (ExpandV src mask));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
+ __ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister,
+ $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{
+ predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4));
+ match(Set dst (ExpandV src pg));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
+ __ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
+ $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{
+ predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4);
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst);
- format %{ "vexpand $dst, $pg, $src" %}
+ format %{ "vexpand_sve2_SD $dst, $src, $pg" %}
ins_encode %{
// Example input: src = 1 2 3 4 5 6 7 8
// pg = 1 0 0 1 1 0 1 1
@@ -7127,7 +7151,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
// for TBL whose value is used to select the indexed element from src vector.
BasicType bt = Matcher::vector_element_basic_type(this);
- assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
// dst = 0 0 0 0 0 0 0 0
__ sve_dup($dst$$FloatRegister, size, 0);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 28f91204ec3..012de7e46d8 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -206,11 +206,6 @@ source %{
return false;
}
break;
- case Op_ExpandV:
- if (UseSVE < 2 || is_subword_type(bt)) {
- return false;
- }
- break;
case Op_VectorMaskToLong:
if (UseSVE > 0 && vlen > 64) {
return false;
@@ -5101,10 +5096,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg,
ins_pipe(pipe_slow);
%}
-instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
+instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{
+ predicate(UseSVE == 0);
+ match(Set dst (ExpandV src mask));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
+ __ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister,
+ $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{
+ predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4));
+ match(Set dst (ExpandV src pg));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+ format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this);
+ int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
+ __ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
+ $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{
+ predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4);
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst);
- format %{ "vexpand $dst, $pg, $src" %}
+ format %{ "vexpand_sve2_SD $dst, $src, $pg" %}
ins_encode %{
// Example input: src = 1 2 3 4 5 6 7 8
// pg = 1 0 0 1 1 0 1 1
@@ -5115,7 +5139,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
// for TBL whose value is used to select the indexed element from src vector.
BasicType bt = Matcher::vector_element_basic_type(this);
- assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
// dst = 0 0 0 0 0 0 0 0
__ sve_dup($dst$$FloatRegister, size, 0);
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index a5d2cbfac98..4c4251fbe9f 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -4068,6 +4068,13 @@ public:
INSN(sve_brkb, 0b10); // Break before first true condition
#undef INSN
+ // SVE move prefix (unpredicated)
+ void sve_movprfx(FloatRegister Zd, FloatRegister Zn) {
+ starti;
+ f(0b00000100, 31, 24), f(0b00, 23, 22), f(0b1, 21), f(0b00000, 20, 16);
+ f(0b101111, 15, 10), rf(Zn, 5), rf(Zd, 0);
+ }
+
// Element count and increment scalar (SVE)
#define INSN(NAME, TYPE) \
void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp
index e7efe472b82..fb14d588f04 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp
@@ -26,7 +26,7 @@
#ifndef CPU_AARCH64_ASSEMBLER_AARCH64_INLINE_HPP
#define CPU_AARCH64_ASSEMBLER_AARCH64_INLINE_HPP
-#include "asm/assembler.inline.hpp"
+#include "asm/assembler.hpp"
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index b1562c54f4e..b61a0e4e378 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -2771,3 +2771,90 @@ void C2_MacroAssembler::select_from_two_vectors(FloatRegister dst, FloatRegister
select_from_two_vectors_neon(dst, src1, src2, dst, tmp, vector_length_in_bytes);
}
}
+
+// Vector expand implementation. Elements from the src vector are expanded into
+// the dst vector under the control of the vector mask.
+// Since there are no native instructions directly corresponding to expand before
+// SVE2p2, the following implementations mainly leverages the TBL instruction to
+// implement expand. To compute the index input for TBL, the prefix sum algorithm
+// (https://en.wikipedia.org/wiki/Prefix_sum) is used. The same algorithm is used
+// for NEON and SVE, but with different instructions where appropriate.
+
+// Vector expand implementation for NEON.
+//
+// An example of 128-bit Byte vector:
+// Data direction: high <== low
+// Input:
+// src = g f e d c b a 9 8 7 6 5 4 3 2 1
+// mask = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1
+// Expected result:
+// dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
+void C2_MacroAssembler::vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask,
+ FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
+ int vector_length_in_bytes) {
+ assert(vector_length_in_bytes <= 16, "the vector length in bytes for NEON must be <= 16");
+ assert_different_registers(dst, src, mask, tmp1, tmp2);
+ // Since the TBL instruction only supports byte table, we need to
+ // compute indices in byte type for all types.
+ SIMD_Arrangement size = vector_length_in_bytes == 16 ? T16B : T8B;
+ // tmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ dup(tmp1, size, zr);
+ // dst = 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
+ negr(dst, size, mask);
+ // Calculate vector index for TBL with prefix sum algorithm.
+ // dst = 8 8 8 7 6 6 6 5 4 4 4 3 2 2 2 1
+ for (int i = 1; i < vector_length_in_bytes; i <<= 1) {
+ ext(tmp2, size, tmp1, dst, vector_length_in_bytes - i);
+ addv(dst, size, tmp2, dst);
+ }
+ // tmp2 = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1
+ orr(tmp2, size, mask, mask);
+ // tmp2 = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
+ bsl(tmp2, size, dst, tmp1);
+ // tmp1 = 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+ movi(tmp1, size, 1);
+ // dst = -1 -1 7 6 -1 -1 5 4 -1 -1 3 2 -1 -1 1 0
+ subv(dst, size, tmp2, tmp1);
+ // dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
+ tbl(dst, size, src, 1, dst);
+}
+
+// Vector expand implementation for SVE.
+//
+// An example of 128-bit Short vector:
+// Data direction: high <== low
+// Input:
+// src = gf ed cb a9 87 65 43 21
+// pg = 00 01 00 01 00 01 00 01
+// Expected result:
+// dst = 00 87 00 65 00 43 00 21
+void C2_MacroAssembler::vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
+ FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
+ int vector_length_in_bytes) {
+ assert(UseSVE > 0, "expand implementation only for SVE");
+ assert_different_registers(dst, src, tmp1, tmp2);
+ SIMD_RegVariant size = elemType_to_regVariant(bt);
+
+ // tmp1 = 00 00 00 00 00 00 00 00
+ sve_dup(tmp1, size, 0);
+ sve_movprfx(tmp2, tmp1);
+ // tmp2 = 00 01 00 01 00 01 00 01
+ sve_cpy(tmp2, size, pg, 1, true);
+ // Calculate vector index for TBL with prefix sum algorithm.
+ // tmp2 = 04 04 03 03 02 02 01 01
+ for (int i = type2aelembytes(bt); i < vector_length_in_bytes; i <<= 1) {
+ sve_movprfx(dst, tmp1);
+ // The EXT instruction operates on the full-width sve register. The correct
+ // index calculation method is:
+ // vector_length_in_bytes - i + MaxVectorSize - vector_length_in_bytes =>
+ // MaxVectorSize - i.
+ sve_ext(dst, tmp2, MaxVectorSize - i);
+ sve_add(tmp2, size, dst, tmp2);
+ }
+ // dst = 00 04 00 03 00 02 00 01
+ sve_sel(dst, size, pg, tmp2, tmp1);
+ // dst = -1 03 -1 02 -1 01 -1 00
+ sve_sub(dst, size, 1);
+ // dst = 00 87 00 65 00 43 00 21
+ sve_tbl(dst, size, src, dst);
+}
\ No newline at end of file
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
index 0403a27910f..cb8ded142f4 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@@ -204,4 +204,10 @@
FloatRegister index, FloatRegister tmp, BasicType bt,
unsigned vector_length_in_bytes);
+ void vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask,
+ FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
+ int vector_length_in_bytes);
+ void vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
+ FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
+ int vector_length_in_bytes);
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
index 42f3c4a015a..9950feb7470 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
@@ -86,15 +86,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
}
}
-void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
- Register start, Register count, Register scratch, RegSet saved_regs) {
- __ push(saved_regs, sp);
- assert_different_registers(start, count, scratch);
- assert_different_registers(c_rarg0, count);
- __ mov(c_rarg0, start);
- __ mov(c_rarg1, count);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
- __ pop(saved_regs, sp);
+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
+ DecoratorSet decorators,
+ Register start,
+ Register count,
+ Register scratch,
+ RegSet saved_regs) {
+
+ Label done;
+ Label loop;
+ Label next;
+
+ __ cbz(count, done);
+
+ // Calculate the number of card marks to set. Since the object might start and
+ // end within a card, we need to calculate this via the card table indexes of
+ // the actual start and last addresses covered by the object.
+ // Temporarily use the count register for the last element address.
+ __ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop
+ __ sub(count, count, BytesPerHeapOop); // Use last element address for end.
+
+ __ lsr(start, start, CardTable::card_shift());
+ __ lsr(count, count, CardTable::card_shift());
+ __ sub(count, count, start); // Number of bytes to mark - 1.
+
+ // Add card table base offset to start.
+ __ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
+ __ add(start, start, scratch);
+
+ __ bind(loop);
+ if (UseCondCardMark) {
+ __ ldrb(scratch, Address(start, count));
+ // Instead of loading clean_card_val and comparing, we exploit the fact that
+ // the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
+ __ tbz(scratch, 0, next);
+ }
+ static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
+ __ strb(zr, Address(start, count));
+ __ bind(next);
+ __ subs(count, count, 1);
+ __ br(Assembler::GE, loop);
+
+ __ bind(done);
}
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
@@ -202,10 +235,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
+ const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
+ assert(thread == rthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1);
+
// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
__ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
@@ -214,33 +251,19 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
if (new_val_may_be_null) {
__ cbz(new_val, done);
}
- // Storing region crossing non-null, is card young?
+ // Storing region crossing non-null.
__ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
- __ load_byte_map_base(tmp2); // tmp2 := card table base address
- __ add(tmp1, tmp1, tmp2); // tmp1 := card address
- __ ldrb(tmp2, Address(tmp1)); // tmp2 := card
- __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val?
-}
-static void generate_post_barrier_slow_path(MacroAssembler* masm,
- const Register thread,
- const Register tmp1,
- const Register tmp2,
- Label& done,
- Label& runtime) {
- __ membar(Assembler::StoreLoad); // StoreLoad membar
- __ ldrb(tmp2, Address(tmp1)); // tmp2 := card
- __ cbzw(tmp2, done);
- // Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- STATIC_ASSERT(CardTable::dirty_card_val() == 0);
- __ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
- generate_queue_test_and_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime,
- thread, tmp1, tmp2, rscratch1);
- __ b(done);
+ Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
+ __ ldr(tmp2, card_table_addr); // tmp2 := card table base address
+ if (UseCondCardMark) {
+ __ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card
+ // Instead of loading clean_card_val and comparing, we exploit the fact that
+ // the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
+ __ tbz(rscratch1, 0, done);
+ }
+ static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
+ __ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
@@ -249,27 +272,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2) {
- assert(thread == rthread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
- rscratch1);
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
- && tmp2 != noreg, "expecting a register");
-
Label done;
- Label runtime;
-
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
- // If card is young, jump to done
- __ br(Assembler::EQ, done);
- generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
-
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push(saved, sp);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
- __ pop(saved, sp);
-
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */);
__ bind(done);
}
@@ -329,38 +333,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* stub) {
- assert(thread == rthread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
- rscratch1);
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
- && tmp2 != noreg, "expecting a register");
-
- stub->initialize_registers(thread, tmp1, tmp2);
-
- bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
- // If card is not young, jump to stub (slow path)
- __ br(Assembler::NE, *stub->entry());
-
- __ bind(*stub->continuation());
-}
-
-void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const {
- Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
- Register thread = stub->thread();
- Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
- Register tmp2 = stub->tmp2();
- assert(stub->tmp3() == noreg, "not needed in this platform");
-
- __ bind(*stub->entry());
- generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
-
- __ bind(runtime);
- generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
- __ b(*stub->continuation());
+ bool new_val_may_be_null) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
+ __ bind(done);
}
#endif // COMPILER2
@@ -456,20 +432,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ b(*stub->continuation());
}
-void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
- G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
- __ bind(*stub->entry());
- assert(stub->addr()->is_register(), "Precondition.");
- assert(stub->new_val()->is_register(), "Precondition.");
- Register new_val_reg = stub->new_val()->as_register();
- __ cbz(new_val_reg, *stub->continuation());
- ce->store_parameter(stub->addr()->as_pointer_register(), 0);
- __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
- __ b(*stub->continuation());
-}
-
#undef __
+void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ masm->bind(done);
+}
+
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@@ -521,74 +496,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ epilogue();
}
-void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
- __ prologue("g1_post_barrier", false);
-
- // arg0: store_address
- Address store_addr(rfp, 2*BytesPerWord);
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
-
- Label done;
- Label runtime;
-
- // At this point we know new_value is non-null and the new_value crosses regions.
- // Must check to see if card is already dirty
-
- const Register thread = rthread;
-
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- const Register card_offset = rscratch2;
- // LR is free here, so we can use it to hold the byte_map_base.
- const Register byte_map_base = lr;
-
- assert_different_registers(card_offset, byte_map_base, rscratch1);
-
- __ load_parameter(0, card_offset);
- __ lsr(card_offset, card_offset, CardTable::card_shift());
- __ load_byte_map_base(byte_map_base);
- __ ldrb(rscratch1, Address(byte_map_base, card_offset));
- __ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
- __ br(Assembler::EQ, done);
-
- assert((int)CardTable::dirty_card_val() == 0, "must be 0");
-
- __ membar(Assembler::StoreLoad);
- __ ldrb(rscratch1, Address(byte_map_base, card_offset));
- __ cbzw(rscratch1, done);
-
- // storing region crossing non-null, card is clean.
- // dirty card and log.
- __ strb(zr, Address(byte_map_base, card_offset));
-
- // Convert card offset into an address in card_addr
- Register card_addr = card_offset;
- __ add(card_addr, byte_map_base, card_addr);
-
- __ ldr(rscratch1, queue_index);
- __ cbz(rscratch1, runtime);
- __ sub(rscratch1, rscratch1, wordSize);
- __ str(rscratch1, queue_index);
-
- // Reuse LR to hold buffer_addr
- const Register buffer_addr = lr;
-
- __ ldr(buffer_addr, buffer);
- __ str(card_addr, Address(buffer_addr, rscratch1));
- __ b(done);
-
- __ bind(runtime);
- __ push_call_clobbered_registers();
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop_call_clobbered_registers();
- __ bind(done);
- __ epilogue();
-}
-
#undef __
#endif // COMPILER1
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
index 04ac2096096..72040cd7ad2 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
-class G1PostBarrierStub;
class G1PreBarrierStubC2;
-class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -65,10 +63,15 @@ protected:
public:
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
- void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+
+ void g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2);
#endif
#ifdef COMPILER2
@@ -87,9 +90,7 @@ public:
Register thread,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* c2_stub);
- void generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const;
+ bool new_val_may_be_null);
#endif
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
index 081a67d6880..18fc27a4af4 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2) {
- if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
- g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub);
+ bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, new_val_may_be_null);
}
%}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index fe2440fd3fd..0570fad5b8d 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1623,7 +1623,7 @@ public:
FloatRegister p, FloatRegister z, FloatRegister t1);
void ghash_reduce_wide(int index, FloatRegister result, FloatRegister lo, FloatRegister hi,
FloatRegister p, FloatRegister z, FloatRegister t1);
- void ghash_processBlocks_wide(address p, Register state, Register subkeyH,
+ void ghash_processBlocks_wide(Label& p, Register state, Register subkeyH,
Register data, Register blocks, int unrolls);
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp
index 84b85b7b445..25ec9cf9bdd 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp
@@ -507,7 +507,7 @@ void MacroAssembler::ghash_modmul(FloatRegister result,
//
// Clobbers all vector registers.
//
-void MacroAssembler::ghash_processBlocks_wide(address field_polynomial, Register state,
+void MacroAssembler::ghash_processBlocks_wide(Label& field_polynomial, Register state,
Register subkeyH,
Register data, Register blocks, int unrolls) {
int register_stride = 7;
@@ -531,7 +531,10 @@ void MacroAssembler::ghash_processBlocks_wide(address field_polynomial, Register
FloatRegister p = v31;
eor(vzr, T16B, vzr, vzr); // zero register
- ldrq(p, field_polynomial); // The field polynomial
+ // load polynomial via label which must identify local data in the
+ // same code stub
+ adr(rscratch1, field_polynomial);
+ ldrq(p, rscratch1); // The field polynomial
ldrq(v0, Address(state));
ldrq(Hprime, Address(subkeyH));
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index 3f1a9f7daaa..ffe5afd93cb 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -802,7 +802,7 @@ class StubGenerator: public StubCodeGenerator {
//
// s and d are adjusted to point to the remaining words to copy
//
- void generate_copy_longs(StubId stub_id, DecoratorSet decorators, Label &start, Register s, Register d, Register count) {
+ address generate_copy_longs(StubId stub_id, DecoratorSet decorators, Register s, Register d, Register count) {
BasicType type;
copy_direction direction;
@@ -854,7 +854,7 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, stub_id);
- __ bind(start);
+ address start = __ pc();
Label unaligned_copy_long;
if (AvoidUnalignedAccesses) {
@@ -894,9 +894,9 @@ class StubGenerator: public StubCodeGenerator {
int prefetch = PrefetchCopyIntervalInBytes;
bool use_stride = false;
if (direction == copy_backwards) {
- use_stride = prefetch > 256;
- prefetch = -prefetch;
- if (use_stride) __ mov(stride, prefetch);
+ use_stride = prefetch > 256;
+ prefetch = -prefetch;
+ if (use_stride) __ mov(stride, prefetch);
}
__ bind(again);
@@ -1026,9 +1026,9 @@ class StubGenerator: public StubCodeGenerator {
int prefetch = PrefetchCopyIntervalInBytes;
bool use_stride = false;
if (direction == copy_backwards) {
- use_stride = prefetch > 256;
- prefetch = -prefetch;
- if (use_stride) __ mov(stride, prefetch);
+ use_stride = prefetch > 256;
+ prefetch = -prefetch;
+ if (use_stride) __ mov(stride, prefetch);
}
__ bind(again);
@@ -1037,15 +1037,15 @@ class StubGenerator: public StubCodeGenerator {
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
if (direction == copy_forwards) {
- // allowing for the offset of -8 the store instructions place
- // registers into the target 64 bit block at the following
- // offsets
- //
- // t0 at offset 0
- // t1 at offset 8, t2 at offset 16
- // t3 at offset 24, t4 at offset 32
- // t5 at offset 40, t6 at offset 48
- // t7 at offset 56
+ // allowing for the offset of -8 the store instructions place
+ // registers into the target 64 bit block at the following
+ // offsets
+ //
+ // t0 at offset 0
+ // t1 at offset 8, t2 at offset 16
+ // t3 at offset 24, t4 at offset 32
+ // t5 at offset 40, t6 at offset 48
+ // t7 at offset 56
bs.copy_store_at_8(Address(d, 1 * unit), t0);
bs.copy_store_at_16(Address(d, 2 * unit), t1, t2);
@@ -1057,18 +1057,18 @@ class StubGenerator: public StubCodeGenerator {
bs.copy_store_at_8(Address(__ pre(d, 8 * unit)), t7);
bs.copy_load_at_16(t6, t7, Address(__ pre(s, 8 * unit)));
} else {
- // d was not offset when we started so the registers are
- // written into the 64 bit block preceding d with the following
- // offsets
- //
- // t1 at offset -8
- // t3 at offset -24, t0 at offset -16
- // t5 at offset -48, t2 at offset -32
- // t7 at offset -56, t4 at offset -48
- // t6 at offset -64
- //
- // note that this matches the offsets previously noted for the
- // loads
+ // d was not offset when we started so the registers are
+ // written into the 64 bit block preceding d with the following
+ // offsets
+ //
+ // t1 at offset -8
+ // t3 at offset -24, t0 at offset -16
+ // t5 at offset -48, t2 at offset -32
+ // t7 at offset -56, t4 at offset -48
+ // t6 at offset -64
+ //
+ // note that this matches the offsets previously noted for the
+ // loads
bs.copy_store_at_8(Address(d, 1 * unit), t1);
bs.copy_store_at_16(Address(d, 3 * unit), t3, t0);
@@ -1109,10 +1109,10 @@ class StubGenerator: public StubCodeGenerator {
{
Label L1, L2;
__ tbz(count, exact_log2(4), L1);
- // this is the same as above but copying only 4 longs hence
- // with only one intervening stp between the str instructions
- // but note that the offsets and registers still follow the
- // same pattern
+ // this is the same as above but copying only 4 longs hence
+ // with only one intervening stp between the str instructions
+ // but note that the offsets and registers still follow the
+ // same pattern
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
bs.copy_load_at_16(t2, t3, Address(__ pre(s, 4 * unit)));
if (direction == copy_forwards) {
@@ -1127,10 +1127,10 @@ class StubGenerator: public StubCodeGenerator {
__ bind(L1);
__ tbz(count, 1, L2);
- // this is the same as above but copying only 2 longs hence
- // there is no intervening stp between the str instructions
- // but note that the offset and register patterns are still
- // the same
+ // this is the same as above but copying only 2 longs hence
+ // there is no intervening stp between the str instructions
+ // but note that the offset and register patterns are still
+ // the same
bs.copy_load_at_16(t0, t1, Address(__ pre(s, 2 * unit)));
if (direction == copy_forwards) {
bs.copy_store_at_8(Address(d, 1 * unit), t0);
@@ -1141,18 +1141,20 @@ class StubGenerator: public StubCodeGenerator {
}
__ bind(L2);
- // for forwards copy we need to re-adjust the offsets we
- // applied so that s and d are follow the last words written
+ // for forwards copy we need to re-adjust the offsets we
+ // applied so that s and d are follow the last words written
- if (direction == copy_forwards) {
- __ add(s, s, 16);
- __ add(d, d, 8);
- }
+ if (direction == copy_forwards) {
+ __ add(s, s, 16);
+ __ add(d, d, 8);
+ }
}
__ ret(lr);
- }
+ }
+
+ return start;
}
// Small copy: less than 16 bytes.
@@ -1206,10 +1208,6 @@ class StubGenerator: public StubCodeGenerator {
}
}
- Label copy_f, copy_b;
- Label copy_obj_f, copy_obj_b;
- Label copy_obj_uninit_f, copy_obj_uninit_b;
-
// All-singing all-dancing memory copy.
//
// Copy count units of memory from s to d. The size of a unit is
@@ -1447,19 +1445,19 @@ class StubGenerator: public StubCodeGenerator {
}
if (direction == copy_forwards) {
if (type != T_OBJECT) {
- __ bl(copy_f);
+ __ bl(StubRoutines::aarch64::copy_byte_f());
} else if ((decorators & IS_DEST_UNINITIALIZED) != 0) {
- __ bl(copy_obj_uninit_f);
+ __ bl(StubRoutines::aarch64::copy_oop_uninit_f());
} else {
- __ bl(copy_obj_f);
+ __ bl(StubRoutines::aarch64::copy_oop_f());
}
} else {
if (type != T_OBJECT) {
- __ bl(copy_b);
+ __ bl(StubRoutines::aarch64::copy_byte_b());
} else if ((decorators & IS_DEST_UNINITIALIZED) != 0) {
- __ bl(copy_obj_uninit_b);
+ __ bl(StubRoutines::aarch64::copy_oop_uninit_b());
} else {
- __ bl(copy_obj_b);
+ __ bl(StubRoutines::aarch64::copy_oop_b());
}
}
@@ -1522,11 +1520,11 @@ class StubGenerator: public StubCodeGenerator {
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomically.
//
- // Side Effects: entry is set to the (post push) entry point so it
- // can be used by the corresponding conjoint copy
- // method
+ // Side Effects: nopush_entry is set to the (post push) entry point
+ // so it can be used by the corresponding conjoint
+ // copy method
//
- address generate_disjoint_copy(StubId stub_id, address *entry) {
+ address generate_disjoint_copy(StubId stub_id, address *nopush_entry) {
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_reg = RegSet::of(s, d, count);
int size;
@@ -1615,8 +1613,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
__ enter();
- if (entry != nullptr) {
- *entry = __ pc();
+ if (nopush_entry != nullptr) {
+ *nopush_entry = __ pc();
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1679,10 +1677,10 @@ class StubGenerator: public StubCodeGenerator {
// cache line boundaries will still be loaded and stored atomically.
//
// Side Effects:
- // entry is set to the no-overlap entry point so it can be used by
- // some other conjoint copy method
+ // nopush_entry is set to the no-overlap entry point so it can be
+ // used by some other conjoint copy method
//
- address generate_conjoint_copy(StubId stub_id, address nooverlap_target, address *entry) {
+ address generate_conjoint_copy(StubId stub_id, address nooverlap_target, address *nopush_entry) {
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_regs = RegSet::of(s, d, count);
int size;
@@ -1769,16 +1767,19 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
__ enter();
- if (entry != nullptr) {
- *entry = __ pc();
+ if (nopush_entry != nullptr) {
+ *nopush_entry = __ pc();
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// use fwd copy when (d-s) above_equal (count*size)
+ Label L_overlapping;
__ sub(rscratch1, d, s);
__ cmp(rscratch1, count, Assembler::LSL, exact_log2(size));
- __ br(Assembler::HS, nooverlap_target);
+ __ br(Assembler::LO, L_overlapping);
+ __ b(RuntimeAddress(nooverlap_target));
+ __ bind(L_overlapping);
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
if (dest_uninitialized) {
@@ -1850,7 +1851,7 @@ class StubGenerator: public StubCodeGenerator {
// r0 == 0 - success
// r0 == -1^K - failure, where K is partial transfer count
//
- address generate_checkcast_copy(StubId stub_id, address *entry) {
+ address generate_checkcast_copy(StubId stub_id, address *nopush_entry) {
bool dest_uninitialized;
switch (stub_id) {
case StubId::stubgen_checkcast_arraycopy_id:
@@ -1911,8 +1912,8 @@ class StubGenerator: public StubCodeGenerator {
#endif //ASSERT
// Caller of this entry point must set up the argument registers.
- if (entry != nullptr) {
- *entry = __ pc();
+ if (nopush_entry != nullptr) {
+ *nopush_entry = __ pc();
BLOCK_COMMENT("Entry:");
}
@@ -2724,13 +2725,21 @@ class StubGenerator: public StubCodeGenerator {
}
void generate_arraycopy_stubs() {
- address entry;
- address entry_jbyte_arraycopy;
- address entry_jshort_arraycopy;
- address entry_jint_arraycopy;
- address entry_oop_arraycopy;
- address entry_jlong_arraycopy;
- address entry_checkcast_arraycopy;
+ // Some copy stubs publish a normal entry and then a 2nd 'fallback'
+ // entry immediately following their stack push. This can be used
+ // as a post-push branch target for compatible stubs when they
+ // identify a special case that can be handled by the fallback
+ // stub e.g a disjoint copy stub may be use as a special case
+ // fallback for its compatible conjoint copy stub.
+ //
+ // A no push entry is always returned in the following local and
+ // then published by assigning to the appropriate entry field in
+ // class StubRoutines. The entry value is then passed to the
+ // generator for the compatible stub. That means the entry must be
+ // listed when saving to/restoring from the AOT cache, ensuring
+ // that the inter-stub jumps are noted at AOT-cache save and
+ // relocated at AOT cache load.
+ address nopush_entry;
// generate the common exit first so later stubs can rely on it if
// they want an UnsafeMemoryAccess exit non-local to the stub
@@ -2738,83 +2747,123 @@ class StubGenerator: public StubCodeGenerator {
// register the stub as the default exit with class UnsafeMemoryAccess
UnsafeMemoryAccess::set_common_exit_stub_pc(StubRoutines::_unsafecopy_common_exit);
- generate_copy_longs(StubId::stubgen_copy_byte_f_id, IN_HEAP | IS_ARRAY, copy_f, r0, r1, r15);
- generate_copy_longs(StubId::stubgen_copy_byte_b_id, IN_HEAP | IS_ARRAY, copy_b, r0, r1, r15);
+ // generate and publish arch64-specific bulk copy routines first
+ // so we can call them from other copy stubs
+ StubRoutines::aarch64::_copy_byte_f = generate_copy_longs(StubId::stubgen_copy_byte_f_id, IN_HEAP | IS_ARRAY, r0, r1, r15);
+ StubRoutines::aarch64::_copy_byte_b = generate_copy_longs(StubId::stubgen_copy_byte_b_id, IN_HEAP | IS_ARRAY, r0, r1, r15);
- generate_copy_longs(StubId::stubgen_copy_oop_f_id, IN_HEAP | IS_ARRAY, copy_obj_f, r0, r1, r15);
- generate_copy_longs(StubId::stubgen_copy_oop_b_id, IN_HEAP | IS_ARRAY, copy_obj_b, r0, r1, r15);
+ StubRoutines::aarch64::_copy_oop_f = generate_copy_longs(StubId::stubgen_copy_oop_f_id, IN_HEAP | IS_ARRAY, r0, r1, r15);
+ StubRoutines::aarch64::_copy_oop_b = generate_copy_longs(StubId::stubgen_copy_oop_b_id, IN_HEAP | IS_ARRAY, r0, r1, r15);
- generate_copy_longs(StubId::stubgen_copy_oop_uninit_f_id, IN_HEAP | IS_ARRAY | IS_DEST_UNINITIALIZED, copy_obj_uninit_f, r0, r1, r15);
- generate_copy_longs(StubId::stubgen_copy_oop_uninit_b_id, IN_HEAP | IS_ARRAY | IS_DEST_UNINITIALIZED, copy_obj_uninit_b, r0, r1, r15);
+ StubRoutines::aarch64::_copy_oop_uninit_f = generate_copy_longs(StubId::stubgen_copy_oop_uninit_f_id, IN_HEAP | IS_ARRAY | IS_DEST_UNINITIALIZED, r0, r1, r15);
+ StubRoutines::aarch64::_copy_oop_uninit_b = generate_copy_longs(StubId::stubgen_copy_oop_uninit_b_id, IN_HEAP | IS_ARRAY | IS_DEST_UNINITIALIZED, r0, r1, r15);
StubRoutines::aarch64::_zero_blocks = generate_zero_blocks();
//*** jbyte
// Always need aligned and unaligned versions
- StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jbyte_disjoint_arraycopy_id, &entry);
- StubRoutines::_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_jbyte_arraycopy_id, entry, &entry_jbyte_arraycopy);
- StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jbyte_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jbyte_arraycopy_id, entry, nullptr);
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jbyte_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jbyte_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_jbyte_arraycopy_id, StubRoutines::_jbyte_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jbyte_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jbyte_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jbyte_arraycopy_id, StubRoutines::_arrayof_jbyte_disjoint_arraycopy_nopush, nullptr);
//*** jshort
// Always need aligned and unaligned versions
- StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jshort_disjoint_arraycopy_id, &entry);
- StubRoutines::_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_jshort_arraycopy_id, entry, &entry_jshort_arraycopy);
- StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jshort_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jshort_arraycopy_id, entry, nullptr);
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jshort_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jshort_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_jshort_arraycopy_id, StubRoutines::_jshort_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is used by generic/unsafe copy
+ StubRoutines::_jshort_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jshort_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jshort_arraycopy_id, StubRoutines::_arrayof_jshort_disjoint_arraycopy_nopush, nullptr);
//*** jint
// Aligned versions
- StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jint_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jint_arraycopy_id, entry, &entry_jint_arraycopy);
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jint_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jint_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jint_arraycopy_id, StubRoutines::_arrayof_jint_disjoint_arraycopy_nopush, nullptr);
// In 64 bit we need both aligned and unaligned versions of jint arraycopy.
- // entry_jint_arraycopy always points to the unaligned version
- StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jint_disjoint_arraycopy_id, &entry);
- StubRoutines::_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_jint_arraycopy_id, entry, &entry_jint_arraycopy);
+ // jint_arraycopy_nopush always points to the unaligned version
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jint_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jint_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_jint_arraycopy_id, StubRoutines::_jint_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jint_arraycopy_nopush = nopush_entry;
//*** jlong
// It is always aligned
- StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jlong_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jlong_arraycopy_id, entry, &entry_jlong_arraycopy);
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jlong_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jlong_arraycopy_id, StubRoutines::_arrayof_jlong_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jlong_arraycopy_nopush = nopush_entry;
+ // disjoint normal/nopush and conjoint normal entries are not
+ // generated since the arrayof versions are the same
StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+ StubRoutines::_jlong_disjoint_arraycopy_nopush = StubRoutines::_arrayof_jlong_disjoint_arraycopy_nopush;
StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
//*** oops
{
- // With compressed oops we need unaligned versions; notice that
- // we overwrite entry_oop_arraycopy.
- bool aligned = !UseCompressedOops;
-
StubRoutines::_arrayof_oop_disjoint_arraycopy
- = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_id, &entry);
+ = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_nopush = nopush_entry;
StubRoutines::_arrayof_oop_arraycopy
- = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_id, entry, &entry_oop_arraycopy);
+ = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_id, StubRoutines::_arrayof_oop_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint arrayof nopush entry is needed by generic/unsafe copy
+ StubRoutines::_oop_arraycopy_nopush = nopush_entry;
// Aligned versions without pre-barriers
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
- = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_uninit_id, &entry);
+ = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_uninit_id, &nopush_entry);
+ // disjoint arrayof+uninit nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic/unsafe copy does not cater for uninit arrays.
StubRoutines::_arrayof_oop_arraycopy_uninit
- = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_uninit_id, entry, nullptr);
+ = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_uninit_id, StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit_nopush, nullptr);
}
+ // for oop copies reuse arrayof entries for non-arrayof cases
StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+ StubRoutines::_oop_disjoint_arraycopy_nopush = StubRoutines::_arrayof_oop_disjoint_arraycopy_nopush;
StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+ StubRoutines::_oop_disjoint_arraycopy_uninit_nopush = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit_nopush;
StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
- StubRoutines::_checkcast_arraycopy = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_id, &entry_checkcast_arraycopy);
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_id, &nopush_entry);
+ // checkcast nopush entry is needed by generic copy
+ StubRoutines::_checkcast_arraycopy_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic copy does not cater for uninit arrays.
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_uninit_id, nullptr);
- StubRoutines::_unsafe_arraycopy = generate_unsafe_copy(entry_jbyte_arraycopy,
- entry_jshort_arraycopy,
- entry_jint_arraycopy,
- entry_jlong_arraycopy);
+ // unsafe arraycopy may fallback on conjoint stubs
+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy(StubRoutines::_jbyte_arraycopy_nopush,
+ StubRoutines::_jshort_arraycopy_nopush,
+ StubRoutines::_jint_arraycopy_nopush,
+ StubRoutines::_jlong_arraycopy_nopush);
- StubRoutines::_generic_arraycopy = generate_generic_copy(entry_jbyte_arraycopy,
- entry_jshort_arraycopy,
- entry_jint_arraycopy,
- entry_oop_arraycopy,
- entry_jlong_arraycopy,
- entry_checkcast_arraycopy);
+ // generic arraycopy may fallback on conjoint stubs
+ StubRoutines::_generic_arraycopy = generate_generic_copy(StubRoutines::_jbyte_arraycopy_nopush,
+ StubRoutines::_jshort_arraycopy_nopush,
+ StubRoutines::_jint_arraycopy_nopush,
+ StubRoutines::_oop_arraycopy_nopush,
+ StubRoutines::_jlong_arraycopy_nopush,
+ StubRoutines::_checkcast_arraycopy_nopush);
StubRoutines::_jbyte_fill = generate_fill(StubId::stubgen_jbyte_fill_id);
StubRoutines::_jshort_fill = generate_fill(StubId::stubgen_jshort_fill_id);
@@ -3402,14 +3451,9 @@ class StubGenerator: public StubCodeGenerator {
// counter = c_rarg7 - 16 bytes of CTR
// return - number of processed bytes
address generate_galoisCounterMode_AESCrypt() {
- address ghash_polynomial = __ pc();
- __ emit_int64(0x87); // The low-order bits of the field
- // polynomial (i.e. p = z^7+z^2+z+1)
- // repeated in the low and high parts of a
- // 128-bit vector
- __ emit_int64(0x87);
+ Label ghash_polynomial; // local data generated after code
- __ align(CodeEntryAlignment);
+ __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_galoisCounterMode_AESCrypt_id;
StubCodeMark mark(this, stub_id);
address start = __ pc();
@@ -3514,7 +3558,17 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
- return start;
+
+ // bind label and generate polynomial data
+ __ align(wordSize * 2);
+ __ bind(ghash_polynomial);
+ __ emit_int64(0x87); // The low-order bits of the field
+ // polynomial (i.e. p = z^7+z^2+z+1)
+ // repeated in the low and high parts of a
+ // 128-bit vector
+ __ emit_int64(0x87);
+
+ return start;
}
class Cached64Bytes {
@@ -4559,16 +4613,6 @@ class StubGenerator: public StubCodeGenerator {
// by the second lane from all vectors and so on.
address generate_chacha20Block_blockpar() {
Label L_twoRounds, L_cc20_const;
- // The constant data is broken into two 128-bit segments to be loaded
- // onto FloatRegisters. The first 128 bits are a counter add overlay
- // that adds +0/+1/+2/+3 to the vector holding replicated state[12].
- // The second 128-bits is a table constant used for 8-bit left rotations.
- __ BIND(L_cc20_const);
- __ emit_int64(0x0000000100000000UL);
- __ emit_int64(0x0000000300000002UL);
- __ emit_int64(0x0605040702010003UL);
- __ emit_int64(0x0E0D0C0F0A09080BUL);
-
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_chacha20Block_id;
StubCodeMark mark(this, stub_id);
@@ -4716,6 +4760,17 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // bind label and generate local constant data used by this stub
+ // The constant data is broken into two 128-bit segments to be loaded
+ // onto FloatRegisters. The first 128 bits are a counter add overlay
+ // that adds +0/+1/+2/+3 to the vector holding replicated state[12].
+ // The second 128-bits is a table constant used for 8-bit left rotations.
+ __ BIND(L_cc20_const);
+ __ emit_int64(0x0000000100000000UL);
+ __ emit_int64(0x0000000300000002UL);
+ __ emit_int64(0x0605040702010003UL);
+ __ emit_int64(0x0E0D0C0F0A09080BUL);
+
return start;
}
@@ -6036,10 +6091,6 @@ class StubGenerator: public StubCodeGenerator {
address generate_kyber12To16() {
Label L_F00, L_loop, L_end;
- __ BIND(L_F00);
- __ emit_int64(0x0f000f000f000f00);
- __ emit_int64(0x0f000f000f000f00);
-
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyber12To16_id;
StubCodeMark mark(this, stub_id);
@@ -6233,6 +6284,11 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // bind label and generate constant data used by this stub
+ __ BIND(L_F00);
+ __ emit_int64(0x0f000f000f000f00);
+ __ emit_int64(0x0f000f000f000f00);
+
return start;
}
@@ -9642,14 +9698,7 @@ class StubGenerator: public StubCodeGenerator {
StubId stub_id = StubId::stubgen_ghash_processBlocks_id;
StubCodeMark mark(this, stub_id);
- __ align(wordSize * 2);
- address p = __ pc();
- __ emit_int64(0x87); // The low-order bits of the field
- // polynomial (i.e. p = z^7+z^2+z+1)
- // repeated in the low and high parts of a
- // 128-bit vector
- __ emit_int64(0x87);
-
+ Label polynomial; // local data generated at end of stub
__ align(CodeEntryAlignment);
address start = __ pc();
@@ -9661,7 +9710,8 @@ class StubGenerator: public StubCodeGenerator {
FloatRegister vzr = v30;
__ eor(vzr, __ T16B, vzr, vzr); // zero register
- __ ldrq(v24, p); // The field polynomial
+ __ adr(rscratch1, polynomial);
+ __ ldrq(v24, rscratch1); // The field polynomial
__ ldrq(v0, Address(state));
__ ldrq(v1, Address(subkeyH));
@@ -9701,6 +9751,15 @@ class StubGenerator: public StubCodeGenerator {
__ st1(v0, __ T16B, state);
__ ret(lr);
+ // bind label and generate local polynomial data
+ __ align(wordSize * 2);
+ __ bind(polynomial);
+ __ emit_int64(0x87); // The low-order bits of the field
+ // polynomial (i.e. p = z^7+z^2+z+1)
+ // repeated in the low and high parts of a
+ // 128-bit vector
+ __ emit_int64(0x87);
+
return start;
}
@@ -9709,14 +9768,7 @@ class StubGenerator: public StubCodeGenerator {
StubId stub_id = StubId::stubgen_ghash_processBlocks_wide_id;
StubCodeMark mark(this, stub_id);
- __ align(wordSize * 2);
- address p = __ pc();
- __ emit_int64(0x87); // The low-order bits of the field
- // polynomial (i.e. p = z^7+z^2+z+1)
- // repeated in the low and high parts of a
- // 128-bit vector
- __ emit_int64(0x87);
-
+ Label polynomial; // local data generated after stub
__ align(CodeEntryAlignment);
address start = __ pc();
@@ -9738,7 +9790,7 @@ class StubGenerator: public StubCodeGenerator {
__ st1(v8, v9, v10, v11, __ T16B, Address(sp));
}
- __ ghash_processBlocks_wide(p, state, subkeyH, data, blocks, unroll);
+ __ ghash_processBlocks_wide(polynomial, state, subkeyH, data, blocks, unroll);
if (unroll > 1) {
// And restore state
@@ -9751,7 +9803,17 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // bind label and generate polynomial data
+ __ align(wordSize * 2);
+ __ bind(polynomial);
+ __ emit_int64(0x87); // The low-order bits of the field
+ // polynomial (i.e. p = z^7+z^2+z+1)
+ // repeated in the low and high parts of a
+ // 128-bit vector
+ __ emit_int64(0x87);
+
return start;
+
}
void generate_base64_encode_simdround(Register src, Register dst,
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
index 049477cda76..71f8931eb5f 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
@@ -201,12 +201,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
+ const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
- // Does store cross heap regions?
+ assert(thread == Rthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
+ // Does store cross heap regions?
__ eor(tmp1, store_addr, new_val);
__ movs(tmp1, AsmOperand(tmp1, lsr, G1HeapRegion::LogOfHRGrainBytes));
__ b(done, eq);
@@ -215,76 +218,34 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
if (new_val_may_be_null) {
__ cbz(new_val, done);
}
- // storing region crossing non-null, is card already dirty?
- const Register card_addr = tmp1;
- CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
- __ mov_address(tmp2, (address)ct->card_table()->byte_map_base());
- __ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
+ // storing region crossing non-null, is card already non-clean?
+ Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
+ __ ldr(tmp2, card_table_addr);
+ __ add(tmp1, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
- __ ldrb(tmp2, Address(card_addr));
- __ cmp(tmp2, (int)G1CardTable::g1_young_card_val());
+ if (UseCondCardMark) {
+ __ ldrb(tmp2, Address(tmp1));
+ // Instead of loading clean_card_val and comparing, we exploit the fact that
+ // the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
+ __ tbz(tmp2, 0, done);
+ }
+
+ static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zero_register()");
+ __ zero_register(tmp2);
+ __ strb(tmp2, Address(tmp1)); // *(card address) := dirty_card_val
}
-static void generate_post_barrier_slow_path(MacroAssembler* masm,
- const Register thread,
- const Register tmp1,
- const Register tmp2,
- const Register tmp3,
- Label& done,
- Label& runtime) {
- __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
- assert(CardTable::dirty_card_val() == 0, "adjust this code");
- // card_addr is loaded by generate_post_barrier_fast_path
- const Register card_addr = tmp1;
- __ ldrb(tmp2, Address(card_addr));
- __ cbz(tmp2, done);
-
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
-
- __ strb(__ zero_register(tmp2), Address(card_addr));
- generate_queue_test_and_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime,
- thread, card_addr, tmp2, tmp3);
- __ b(done);
-}
-
-
// G1 post-barrier.
// Blows all volatile registers R0-R3, LR).
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
- Register store_addr,
- Register new_val,
- Register tmp1,
- Register tmp2,
- Register tmp3) {
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
Label done;
- Label runtime;
-
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
- // If card is young, jump to done
- // card_addr and card are loaded by generate_post_barrier_fast_path
- const Register card = tmp2;
- const Register card_addr = tmp1;
- __ b(done, eq);
- generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime);
-
- __ bind(runtime);
-
- RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12);
- __ push(set);
-
- if (card_addr != R0) {
- __ mov(R0, card_addr);
- }
- __ mov(R1, Rthread);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1);
-
- __ pop(set);
-
+ generate_post_barrier_fast_path(masm, store_addr, new_val, Rthread, tmp1, tmp2, done, true /* new_val_may_be_null */);
__ bind(done);
}
@@ -344,35 +305,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register tmp1,
Register tmp2,
Register tmp3,
- G1PostBarrierStubC2* stub) {
- assert(thread == Rthread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
-
- stub->initialize_registers(thread, tmp1, tmp2, tmp3);
-
- bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
- // If card is not young, jump to stub (slow path)
- __ b(*stub->entry(), ne);
-
- __ bind(*stub->continuation());
-}
-
-void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const {
- Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
- Register thread = stub->thread();
- Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
- Register tmp2 = stub->tmp2();
- Register tmp3 = stub->tmp3();
-
- __ bind(*stub->entry());
- generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime);
-
- __ bind(runtime);
- generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2);
- __ b(*stub->continuation());
+ bool new_val_may_be_null) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
+ __ bind(done);
}
#endif // COMPILER2
@@ -463,20 +399,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ b(*stub->continuation());
}
-void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
- G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
- __ bind(*stub->entry());
- assert(stub->addr()->is_register(), "Precondition.");
- assert(stub->new_val()->is_register(), "Precondition.");
- Register new_val_reg = stub->new_val()->as_register();
- __ cbz(new_val_reg, *stub->continuation());
- ce->verify_reserved_argument_area_size(1);
- __ str(stub->addr()->as_pointer_register(), Address(SP));
- __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
- __ b(*stub->continuation());
+#undef __
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ masm->bind(done);
}
-#undef __
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@@ -536,102 +471,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ b(done);
}
-void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
- // Input:
- // - store_addr, pushed on the stack
-
- __ set_info("g1_post_barrier_slow_id", false);
-
- Label done;
- Label recheck;
- Label runtime;
-
- Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- AddressLiteral cardtable(ci_card_table_address_as(), relocInfo::none);
-
- // save at least the registers that need saving if the runtime is called
- const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
- const int nb_saved_regs = 6;
- assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
- __ push(saved_regs);
-
- const Register r_card_addr_0 = R0; // must be R0 for the slow case
- const Register r_obj_0 = R0;
- const Register r_card_base_1 = R1;
- const Register r_tmp2 = R2;
- const Register r_index_2 = R2;
- const Register r_buffer_3 = R3;
- const Register tmp1 = Rtemp;
-
- __ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize));
- // Note: there is a comment in x86 code about not using
- // ExternalAddress / lea, due to relocation not working
- // properly for that address. Should be OK for arm, where we
- // explicitly specify that 'cardtable' has a relocInfo::none
- // type.
- __ lea(r_card_base_1, cardtable);
- __ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTable::card_shift()));
-
- // first quick check without barrier
- __ ldrb(r_tmp2, Address(r_card_addr_0));
-
- __ cmp(r_tmp2, (int)G1CardTable::g1_young_card_val());
- __ b(recheck, ne);
-
- __ bind(done);
-
- __ pop(saved_regs);
-
- __ ret();
-
- __ bind(recheck);
-
- __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1);
-
- // reload card state after the barrier that ensures the stored oop was visible
- __ ldrb(r_tmp2, Address(r_card_addr_0));
-
- assert(CardTable::dirty_card_val() == 0, "adjust this code");
- __ cbz(r_tmp2, done);
-
- // storing region crossing non-null, card is clean.
- // dirty card and log.
-
- assert(0 == (int)CardTable::dirty_card_val(), "adjust this code");
- if ((ci_card_table_address_as() & 0xff) == 0) {
- // Card table is aligned so the lowest byte of the table address base is zero.
- __ strb(r_card_base_1, Address(r_card_addr_0));
- } else {
- __ strb(__ zero_register(r_tmp2), Address(r_card_addr_0));
- }
-
- __ ldr(r_index_2, queue_index);
- __ ldr(r_buffer_3, buffer);
-
- __ subs(r_index_2, r_index_2, wordSize);
- __ b(runtime, lt); // go to runtime if now negative
-
- __ str(r_index_2, queue_index);
-
- __ str(r_card_addr_0, Address(r_buffer_3, r_index_2));
-
- __ b(done);
-
- __ bind(runtime);
-
- __ save_live_registers();
-
- assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0");
- __ mov(c_rarg1, Rthread);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), c_rarg0, c_rarg1);
-
- __ restore_live_registers_without_return();
-
- __ b(done);
-}
-
#undef __
#endif // COMPILER1
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
index 4e49e655e3e..9e0eff4601b 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
-class G1PostBarrierStub;
class G1PreBarrierStubC2;
-class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -66,10 +64,15 @@ public:
#ifdef COMPILER1
public:
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
- void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+
+ void g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2);
#endif
#ifdef COMPILER2
@@ -89,9 +92,7 @@ public:
Register tmp1,
Register tmp2,
Register tmp3,
- G1PostBarrierStubC2* c2_stub);
- void generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const;
+ bool new_val_may_be_null);
#endif
};
diff --git a/src/hotspot/cpu/arm/gc/g1/g1_arm.ad b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
index 8a0a9e1aa53..e905ba9ff67 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
+++ b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -63,13 +63,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register tmp1,
Register tmp2,
Register tmp3) {
- if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
- g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub);
+ bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, new_val_may_be_null);
}
%}
diff --git a/src/hotspot/cpu/arm/stubGenerator_arm.cpp b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
index 2e2e0f7a4b9..a36ad3a0c47 100644
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
@@ -3011,6 +3011,10 @@ class StubGenerator: public StubCodeGenerator {
// Note: the disjoint stubs must be generated first, some of
// the conjoint stubs use them.
+ // Note: chaining of stubs does not rely on branching to an
+ // auxiliary post-push entry because none of the stubs
+ // push/pop a frame.
+
// these need always status in case they are called from generic_arraycopy
StubRoutines::_jbyte_disjoint_arraycopy = generate_primitive_copy(StubId::stubgen_jbyte_disjoint_arraycopy_id);
StubRoutines::_jshort_disjoint_arraycopy = generate_primitive_copy(StubId::stubgen_jshort_disjoint_arraycopy_id);
@@ -3024,6 +3028,7 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_primitive_copy(StubId::stubgen_arrayof_jlong_disjoint_arraycopy_id);
StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_oop_copy (StubId::stubgen_arrayof_oop_disjoint_arraycopy_id);
+ // disjoint copy entry is needed by conjoint copy
// these need always status in case they are called from generic_arraycopy
StubRoutines::_jbyte_arraycopy = generate_primitive_copy(StubId::stubgen_jbyte_arraycopy_id, StubRoutines::_jbyte_disjoint_arraycopy);
StubRoutines::_jshort_arraycopy = generate_primitive_copy(StubId::stubgen_jshort_arraycopy_id, StubRoutines::_jshort_disjoint_arraycopy);
diff --git a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
index 24601c5d3b0..7e49ec7455d 100644
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
@@ -26,7 +26,7 @@
#ifndef CPU_PPC_ASSEMBLER_PPC_INLINE_HPP
#define CPU_PPC_ASSEMBLER_PPC_INLINE_HPP
-#include "asm/assembler.inline.hpp"
+#include "asm/assembler.hpp"
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
#include "runtime/vm_version.hpp"
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
index 4fb13422f59..262bb1eae89 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
@@ -28,7 +28,6 @@
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
@@ -230,78 +229,52 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
__ bind(filtered);
}
-static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) {
- __ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value
- __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
-}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ assert_different_registers(store_addr, new_val, tmp1, R0);
+ assert_different_registers(store_addr, tmp1, tmp2, R0);
-static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) {
- CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
- __ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2);
- __ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
- __ lbzx(R0, tmp1, tmp2); // tmp1 := card address
- __ cmpwi(CR0, R0, (int)G1CardTable::g1_young_card_val());
- return Address(tmp1, tmp2); // return card address
-}
+ __ xorr(R0, store_addr, new_val); // R0 := store address ^ new value
+ __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // R0 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ beq(CR0, done);
-static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) {
- __ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement
- __ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card
- __ cmpwi(CR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val?
+ // Crosses regions, storing null?
+ if (!new_val_may_be_null) {
+#ifdef ASSERT
+ __ cmpdi(CR0, new_val, 0);
+ __ asm_assert_ne("null oop not allowed (G1 post)"); // Checked by caller.
+#endif
+ } else {
+ __ cmpdi(CR0, new_val, 0);
+ __ beq(CR0, done);
+ }
+
+ __ ld(tmp1, G1ThreadLocalData::card_table_base_offset(), thread);
+ __ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp2 := card address relative to card table base
+ if (UseCondCardMark) {
+ __ lbzx(R0, tmp1, tmp2);
+ __ cmpwi(CR0, R0, (int)G1CardTable::clean_card_val());
+ __ bne(CR0, done);
+ }
+
+ __ li(R0, G1CardTable::dirty_card_val());
+ __ stbx(R0, tmp1, tmp2);
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
Register store_addr, Register new_val,
- Register tmp1, Register tmp2, Register tmp3,
- MacroAssembler::PreservationLevel preservation_level) {
+ Register tmp1, Register tmp2) {
bool not_null = (decorators & IS_NOT_NULL) != 0;
- Label runtime, filtered;
- assert_different_registers(store_addr, new_val, tmp1, tmp2);
-
- CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
-
- generate_region_crossing_test(masm, store_addr, new_val);
- __ beq(CR0, filtered);
-
- // Crosses regions, storing null?
- if (not_null) {
-#ifdef ASSERT
- __ cmpdi(CR0, new_val, 0);
- __ asm_assert_ne("null oop not allowed (G1 post)"); // Checked by caller.
-#endif
- } else {
- __ cmpdi(CR0, new_val, 0);
- __ beq(CR0, filtered);
- }
-
- Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
- __ beq(CR0, filtered);
-
- generate_card_dirty_test(masm, card_addr);
- __ beq(CR0, filtered);
-
- __ li(R0, (int)G1CardTable::dirty_card_val());
- __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
-
- Register Rcard_addr = tmp3;
- __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
-
- generate_queue_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime, Rcard_addr, tmp1);
- __ b(filtered);
-
- __ bind(runtime);
-
- assert(preservation_level == MacroAssembler::PRESERVATION_NONE,
- "g1_write_barrier_post doesn't support preservation levels higher than PRESERVATION_NONE");
-
- // Save the live input values.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, R16_thread);
-
- __ bind(filtered);
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, R16_thread, tmp1, tmp2, done, !not_null);
+ __ bind(done);
}
void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -333,8 +306,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
}
g1_write_barrier_post(masm, decorators,
base, val,
- tmp1, tmp2, tmp3,
- preservation_level);
+ tmp1, tmp2);
}
}
@@ -457,70 +429,29 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* stub,
+ bool new_val_may_be_null,
bool decode_new_val) {
assert_different_registers(store_addr, new_val, tmp1, R0);
assert_different_registers(store_addr, tmp1, tmp2, R0);
- stub->initialize_registers(R16_thread, tmp1, tmp2);
+ Label done;
- bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
Register new_val_decoded = new_val;
if (decode_new_val) {
assert(UseCompressedOops, "or should not be here");
- if (null_check_required && CompressedOops::base() != nullptr) {
+ if (new_val_may_be_null && CompressedOops::base() != nullptr) {
// We prefer doing the null check after the region crossing check.
// Only compressed oop modes with base != null require a null check here.
__ cmpwi(CR0, new_val, 0);
- __ beq(CR0, *stub->continuation());
- null_check_required = false;
+ __ beq(CR0, done);
+ new_val_may_be_null = false;
}
new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val);
}
- generate_region_crossing_test(masm, store_addr, new_val_decoded);
- __ beq(CR0, *stub->continuation());
-
- // crosses regions, storing null?
- if (null_check_required) {
- __ cmpdi(CR0, new_val_decoded, 0);
- __ beq(CR0, *stub->continuation());
- }
-
- Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
- assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub");
- __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CR0, Assembler::equal), *stub->entry());
-
- __ bind(*stub->continuation());
-}
-
-void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const {
- Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
- Address card_addr(stub->tmp1(), stub->tmp2()); // See above.
-
- __ bind(*stub->entry());
-
- generate_card_dirty_test(masm, card_addr);
- __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
-
- __ li(R0, (int)G1CardTable::dirty_card_val());
- __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
-
- Register Rcard_addr = stub->tmp1();
- __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
-
- generate_queue_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime, Rcard_addr, stub->tmp2());
- __ b(*stub->continuation());
-
- __ bind(runtime);
- generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
- __ b(*stub->continuation());
+ generate_post_barrier_fast_path(masm, store_addr, new_val_decoded, R16_thread, tmp1, tmp2, done, new_val_may_be_null);
+ __ bind(done);
}
#endif // COMPILER2
@@ -558,28 +489,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ b(*stub->continuation());
}
-void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
- G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
- __ bind(*stub->entry());
+#undef __
- assert(stub->addr()->is_register(), "Precondition.");
- assert(stub->new_val()->is_register(), "Precondition.");
- Register addr_reg = stub->addr()->as_pointer_register();
- Register new_val_reg = stub->new_val()->as_register();
-
- __ cmpdi(CR0, new_val_reg, 0);
- __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
-
- address c_code = bs->post_barrier_c1_runtime_code_blob()->code_begin();
- //__ load_const_optimized(R0, c_code);
- __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
- __ mtctr(R0);
- __ mr(R0, addr_reg); // Pass addr in R0.
- __ bctrl();
- __ b(*stub->continuation());
+void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ masm->bind(done);
}
-#undef __
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@@ -642,86 +564,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ b(restart);
}
-void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
- G1BarrierSet* bs = barrier_set_cast(BarrierSet::barrier_set());
-
- __ set_info("g1_post_barrier_slow_id", false);
-
- // Using stack slots: spill addr, spill tmp2
- const int stack_slots = 2;
- Register tmp = R0;
- Register addr = R14;
- Register tmp2 = R15;
- CardTable::CardValue* byte_map_base = bs->card_table()->byte_map_base();
-
- Label restart, refill, ret;
-
- // Spill
- __ std(addr, -8, R1_SP);
- __ std(tmp2, -16, R1_SP);
-
- __ srdi(addr, R0, CardTable::card_shift()); // Addr is passed in R0.
- __ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
- __ add(addr, tmp2, addr);
- __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
-
- // Return if young card.
- __ cmpwi(CR0, tmp, G1CardTable::g1_young_card_val());
- __ beq(CR0, ret);
-
- // Return if sequential consistent value is already dirty.
- __ membar(Assembler::StoreLoad);
- __ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
-
- __ cmpwi(CR0, tmp, G1CardTable::dirty_card_val());
- __ beq(CR0, ret);
-
- // Not dirty.
-
- // First, dirty it.
- __ li(tmp, G1CardTable::dirty_card_val());
- __ stb(tmp, 0, addr);
-
- int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
- int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
-
- __ bind(restart);
-
- // Get the index into the update buffer. G1DirtyCardQueue::_index is
- // a size_t so ld_ptr is appropriate here.
- __ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
-
- // index == 0?
- __ cmpdi(CR0, tmp2, 0);
- __ beq(CR0, refill);
-
- __ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
- __ addi(tmp2, tmp2, -oopSize);
-
- __ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
- __ add(tmp2, tmp, tmp2);
- __ std(addr, 0, tmp2); // [_buf + index] :=
-
- // Restore temp registers and return-from-leaf.
- __ bind(ret);
- __ ld(tmp2, -16, R1_SP);
- __ ld(addr, -8, R1_SP);
- __ blr();
-
- __ bind(refill);
- const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
- __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
- __ mflr(R0);
- __ std(R0, _abi0(lr), R1_SP);
- __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
- __ pop_frame();
- __ ld(R0, _abi0(lr), R1_SP);
- __ mtlr(R0);
- __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
- __ b(restart);
-}
-
#undef __
#endif // COMPILER1
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
index 33cb89dacc6..e059cc661af 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -37,9 +37,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
-class G1PostBarrierStub;
class G1PreBarrierStubC2;
-class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -56,8 +54,7 @@ protected:
MacroAssembler::PreservationLevel preservation_level);
void g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
Register store_addr, Register new_val,
- Register tmp1, Register tmp2, Register tmp3,
- MacroAssembler::PreservationLevel preservation_level);
+ Register tmp1, Register tmp2);
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register base, RegisterOrConstant ind_or_offs, Register val,
@@ -79,17 +76,21 @@ public:
Register new_val,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* c2_stub,
+ bool new_val_may_be_null,
bool decode_new_val);
- void generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const;
#endif
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
- void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+
+ void g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2);
+
#endif
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
index 4f24efe872b..0a4a9442855 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2025 SAP SE. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@@ -64,13 +64,13 @@ static void post_write_barrier(MacroAssembler* masm,
Register tmp1,
Register tmp2,
bool decode_new_val = false) {
- if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
- g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val);
+ bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, new_val_may_be_null, decode_new_val);
}
%}
diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
index f9f43ade501..948092bbb9a 100644
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@@ -3277,8 +3277,12 @@ class StubGenerator: public StubCodeGenerator {
// register the stub as the default exit with class UnsafeMemoryAccess
UnsafeMemoryAccess::set_common_exit_stub_pc(StubRoutines::_unsafecopy_common_exit);
- // Note: the disjoint stubs must be generated first, some of
- // the conjoint stubs use them.
+ // Note: the disjoint stubs must be generated first, some of the
+ // conjoint stubs use them.
+
+ // Note: chaining of stubs does not rely on branching to an
+ // auxiliary post-push entry because none of the stubs
+ // push/pop a frame.
// non-aligned disjoint versions
StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(StubId::stubgen_jbyte_disjoint_arraycopy_id);
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
index 1f9e6df2172..e85b64bd6ba 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
@@ -27,7 +27,7 @@
#ifndef CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
#define CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
-#include "asm/assembler.inline.hpp"
+#include "asm/assembler.hpp"
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
index ef5dcdd8074..9c3bd93f8a6 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
@@ -87,15 +87,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
}
}
-void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
- Register start, Register count, Register tmp, RegSet saved_regs) {
- __ push_reg(saved_regs, sp);
+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
+ DecoratorSet decorators,
+ Register start,
+ Register count,
+ Register tmp,
+ RegSet saved_regs) {
assert_different_registers(start, count, tmp);
- assert_different_registers(c_rarg0, count);
- __ mv(c_rarg0, start);
- __ mv(c_rarg1, count);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
- __ pop_reg(saved_regs, sp);
+
+ Label loop, next, done;
+
+ // Zero count? Nothing to do.
+ __ beqz(count, done);
+
+ // Calculate the number of card marks to set. Since the object might start and
+ // end within a card, we need to calculate this via the card table indexes of
+ // the actual start and last addresses covered by the object.
+ // Temporarily use the count register for the last element address.
+ __ shadd(count, count, start, tmp, LogBytesPerHeapOop); // end = start + count << LogBytesPerHeapOop
+ __ subi(count, count, BytesPerHeapOop); // Use last element address for end.
+
+ __ srli(start, start, CardTable::card_shift());
+ __ srli(count, count, CardTable::card_shift());
+ __ sub(count, count, start); // Number of bytes to mark - 1.
+
+ // Add card table base offset to start.
+ Address card_table_address(xthread, G1ThreadLocalData::card_table_base_offset());
+ __ ld(tmp, card_table_address);
+ __ add(start, start, tmp);
+
+ __ bind(loop);
+ if (UseCondCardMark) {
+ __ add(tmp, start, count);
+ __ lbu(tmp, Address(tmp, 0));
+ static_assert((uint)G1CardTable::clean_card_val() == 0xff, "must be");
+ __ subi(tmp, tmp, G1CardTable::clean_card_val()); // Convert to clean_card_value() to a comparison
+ // against zero to avoid use of an extra temp.
+ __ bnez(tmp, next);
+ }
+
+ __ add(tmp, start, count);
+ static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
+ __ sb(zr, Address(tmp, 0));
+
+ __ bind(next);
+ __ subi(count, count, 1);
+ __ bgez(count, loop);
+
+ __ bind(done);
}
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
@@ -192,44 +231,37 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
- const Register tmp1,
- const Register tmp2,
- Label& done,
- bool new_val_may_be_null) {
- // Does store cross heap regions?
- __ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
- __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
- __ beqz(tmp1, done);
- // Crosses regions, storing null?
- if (new_val_may_be_null) {
- __ beqz(new_val, done);
- }
- // Storing region crossing non-null, is card young?
- __ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
- __ load_byte_map_base(tmp2); // tmp2 := card table base address
- __ add(tmp1, tmp1, tmp2); // tmp1 := card address
- __ lbu(tmp2, Address(tmp1)); // tmp2 := card
-}
-
-static void generate_post_barrier_slow_path(MacroAssembler* masm,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
- Label& runtime) {
- __ membar(MacroAssembler::StoreLoad); // StoreLoad membar
- __ lbu(tmp2, Address(tmp1)); // tmp2 := card
- __ beqz(tmp2, done, true);
- // Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- STATIC_ASSERT(CardTable::dirty_card_val() == 0);
- __ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val
- generate_queue_test_and_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime,
- thread, tmp1, tmp2, t0);
- __ j(done);
+ bool new_val_may_be_null) {
+ assert(thread == xthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
+ // Does store cross heap regions?
+ __ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ beqz(tmp1, done);
+
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ beqz(new_val, done);
+ }
+ // Storing region crossing non-null, is card clean?
+ __ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+
+ Address card_table_address(xthread, G1ThreadLocalData::card_table_base_offset());
+ __ ld(tmp2, card_table_address); // tmp2 := card table base address
+ __ add(tmp1, tmp1, tmp2); // tmp1 := card address
+ if (UseCondCardMark) {
+ static_assert((uint)G1CardTable::clean_card_val() == 0xff, "must be");
+ __ lbu(tmp2, Address(tmp1, 0)); // tmp2 := card
+ __ subi(tmp2, tmp2, G1CardTable::clean_card_val()); // Convert to clean_card_value() to a comparison
+ // against zero to avoid use of an extra temp.
+ __ bnez(tmp2, done);
+ }
+ static_assert((uint)G1CardTable::dirty_card_val() == 0, "must be to use zr");
+ __ sb(zr, Address(tmp1, 0));
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
@@ -238,27 +270,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2) {
- assert(thread == xthread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
- "expecting a register");
-
Label done;
- Label runtime;
-
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
- // If card is young, jump to done (tmp2 holds the card value)
- __ mv(t0, (int)G1CardTable::g1_young_card_val());
- __ beq(tmp2, t0, done); // card == young_card_val?
- generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
-
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push_reg(saved, sp);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
- __ pop_reg(saved, sp);
-
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
__ bind(done);
}
@@ -318,37 +331,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* stub) {
- assert(thread == xthread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
- "expecting a register");
-
- stub->initialize_registers(thread, tmp1, tmp2);
-
- bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
- // If card is not young, jump to stub (slow path) (tmp2 holds the card value)
- __ mv(t0, (int)G1CardTable::g1_young_card_val());
- __ bne(tmp2, t0, *stub->entry(), true);
-
- __ bind(*stub->continuation());
-}
-
-void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const {
- Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
- Register thread = stub->thread();
- Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
- Register tmp2 = stub->tmp2();
-
- __ bind(*stub->entry());
- generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
-
- __ bind(runtime);
- generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
- __ j(*stub->continuation());
+ bool new_val_may_be_null) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
+ __ bind(done);
}
#endif // COMPILER2
@@ -443,20 +429,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ j(*stub->continuation());
}
-void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
- G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
- __ bind(*stub->entry());
- assert(stub->addr()->is_register(), "Precondition");
- assert(stub->new_val()->is_register(), "Precondition");
- Register new_val_reg = stub->new_val()->as_register();
- __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
- ce->store_parameter(stub->addr()->as_pointer_register(), 0);
- __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
- __ j(*stub->continuation());
-}
-
#undef __
+void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ masm->bind(done);
+}
+
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@@ -507,74 +492,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ epilogue();
}
-void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
- __ prologue("g1_post_barrier", false);
-
- // arg0 : store_address
- Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
-
- Label done;
- Label runtime;
-
- // At this point we know new_value is non-null and the new_value crosses regions.
- // Must check to see if card is already dirty
- const Register thread = xthread;
-
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- const Register card_offset = t1;
- // RA is free here, so we can use it to hold the byte_map_base.
- const Register byte_map_base = ra;
-
- assert_different_registers(card_offset, byte_map_base, t0);
-
- __ load_parameter(0, card_offset);
- __ srli(card_offset, card_offset, CardTable::card_shift());
- __ load_byte_map_base(byte_map_base);
-
- // Convert card offset into an address in card_addr
- Register card_addr = card_offset;
- __ add(card_addr, byte_map_base, card_addr);
-
- __ lbu(t0, Address(card_addr, 0));
- __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
- __ beqz(t0, done);
-
- assert((int)CardTable::dirty_card_val() == 0, "must be 0");
-
- __ membar(MacroAssembler::StoreLoad);
- __ lbu(t0, Address(card_addr, 0));
- __ beqz(t0, done);
-
- // storing region crossing non-null, card is clean.
- // dirty card and log.
- __ sb(zr, Address(card_addr, 0));
-
- __ ld(t0, queue_index);
- __ beqz(t0, runtime);
- __ subi(t0, t0, wordSize);
- __ sd(t0, queue_index);
-
- // Reuse RA to hold buffer_addr
- const Register buffer_addr = ra;
-
- __ ld(buffer_addr, buffer);
- __ add(t0, buffer_addr, t0);
- __ sd(card_addr, Address(t0, 0));
- __ j(done);
-
- __ bind(runtime);
- __ push_call_clobbered_registers();
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop_call_clobbered_registers();
- __ bind(done);
- __ epilogue();
-}
-
#undef __
#endif // COMPILER1
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
index 26310231362..654ba934242 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -35,9 +35,7 @@ class LIR_Assembler;
#endif
class StubAssembler;
class G1PreBarrierStub;
-class G1PostBarrierStub;
class G1PreBarrierStubC2;
-class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -68,10 +66,16 @@ protected:
public:
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
- void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+
+ void g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2);
+
#endif
#ifdef COMPILER2
@@ -90,9 +94,7 @@ public:
Register thread,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* c2_stub);
- void generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const;
+ bool new_val_may_be_null);
#endif
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
index 7a525323021..8461a36e68c 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@@ -63,13 +63,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2) {
- if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
- g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub);
+ bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, new_val_may_be_null);
}
%}
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 385c839879c..88961ccd5a4 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -732,8 +732,7 @@ class StubGenerator: public StubCodeGenerator {
//
// s and d are adjusted to point to the remaining words to copy
//
- void generate_copy_longs(StubId stub_id, Label &start,
- Register s, Register d, Register count) {
+ address generate_copy_longs(StubId stub_id, Register s, Register d, Register count) {
BasicType type;
copy_direction direction;
switch (stub_id) {
@@ -763,7 +762,7 @@ class StubGenerator: public StubCodeGenerator {
Label again, drain;
StubCodeMark mark(this, stub_id);
__ align(CodeEntryAlignment);
- __ bind(start);
+ address start = __ pc();
if (direction == copy_forwards) {
__ sub(s, s, bias);
@@ -879,9 +878,9 @@ class StubGenerator: public StubCodeGenerator {
}
__ ret();
- }
- Label copy_f, copy_b;
+ return start;
+ }
typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
@@ -1099,8 +1098,8 @@ class StubGenerator: public StubCodeGenerator {
// stub_id - is used to name the stub and identify all details of
// how to perform the copy.
//
- // entry - is assigned to the stub's post push entry point unless
- // it is null
+ // nopush_entry - is assigned to the stub's post push entry point
+ // unless it is null
//
// Inputs:
// c_rarg0 - source array address
@@ -1111,11 +1110,11 @@ class StubGenerator: public StubCodeGenerator {
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomically.
//
- // Side Effects: entry is set to the (post push) entry point so it
- // can be used by the corresponding conjoint copy
- // method
+ // Side Effects: nopush_entry is set to the (post push) entry point
+ // so it can be used by the corresponding conjoint
+ // copy method
//
- address generate_disjoint_copy(StubId stub_id, address* entry) {
+ address generate_disjoint_copy(StubId stub_id, address* nopush_entry) {
size_t size;
bool aligned;
bool is_oop;
@@ -1204,8 +1203,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
__ enter();
- if (entry != nullptr) {
- *entry = __ pc();
+ if (nopush_entry != nullptr) {
+ *nopush_entry = __ pc();
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1256,8 +1255,8 @@ class StubGenerator: public StubCodeGenerator {
// corresponding disjoint copy routine which can be
// jumped to if the ranges do not actually overlap
//
- // entry - is assigned to the stub's post push entry point unless
- // it is null
+ // nopush_entry - is assigned to the stub's post push entry point
+ // unless it is null
//
// Inputs:
// c_rarg0 - source array address
@@ -1269,10 +1268,10 @@ class StubGenerator: public StubCodeGenerator {
// cache line boundaries will still be loaded and stored atomically.
//
// Side Effects:
- // entry is set to the no-overlap entry point so it can be used by
- // some other conjoint copy method
+ // nopush_entry is set to the no-overlap entry point so it can be
+ // used by some other conjoint copy method
//
- address generate_conjoint_copy(StubId stub_id, address nooverlap_target, address *entry) {
+ address generate_conjoint_copy(StubId stub_id, address nooverlap_target, address *nopush_entry) {
const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_regs = RegSet::of(s, d, count);
int size;
@@ -1359,8 +1358,8 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
__ enter();
- if (entry != nullptr) {
- *entry = __ pc();
+ if (nopush_entry != nullptr) {
+ *nopush_entry = __ pc();
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1370,7 +1369,7 @@ class StubGenerator: public StubCodeGenerator {
__ slli(t1, count, exact_log2(size));
Label L_continue;
__ bltu(t0, t1, L_continue);
- __ j(nooverlap_target);
+ __ j(RuntimeAddress(nooverlap_target));
__ bind(L_continue);
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
@@ -1445,7 +1444,7 @@ class StubGenerator: public StubCodeGenerator {
// x10 == 0 - success
// x10 == -1^K - failure, where K is partial transfer count
//
- address generate_checkcast_copy(StubId stub_id, address* entry) {
+ address generate_checkcast_copy(StubId stub_id, address* nopush_entry) {
bool dest_uninitialized;
switch (stub_id) {
case StubId::stubgen_checkcast_arraycopy_id:
@@ -1496,8 +1495,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
// Caller of this entry point must set up the argument registers.
- if (entry != nullptr) {
- *entry = __ pc();
+ if (nopush_entry != nullptr) {
+ *nopush_entry = __ pc();
BLOCK_COMMENT("Entry:");
}
@@ -2294,13 +2293,21 @@ class StubGenerator: public StubCodeGenerator {
}
void generate_arraycopy_stubs() {
- address entry = nullptr;
- address entry_jbyte_arraycopy = nullptr;
- address entry_jshort_arraycopy = nullptr;
- address entry_jint_arraycopy = nullptr;
- address entry_oop_arraycopy = nullptr;
- address entry_jlong_arraycopy = nullptr;
- address entry_checkcast_arraycopy = nullptr;
+ // Some copy stubs publish a normal entry and then a 2nd 'fallback'
+ // entry immediately following their stack push. This can be used
+ // as a post-push branch target for compatible stubs when they
+ // identify a special case that can be handled by the fallback
+ // stub e.g a disjoint copy stub may be use as a special case
+ // fallback for its compatible conjoint copy stub.
+ //
+ // A no push entry is always returned in the following local and
+ // then published by assigning to the appropriate entry field in
+ // class StubRoutines. The entry value is then passed to the
+ // generator for the compatible stub. That means the entry must be
+ // listed when saving to/restoring from the AOT cache, ensuring
+ // that the inter-stub jumps are noted at AOT-cache save and
+ // relocated at AOT cache load.
+ address nopush_entry = nullptr;
// generate the common exit first so later stubs can rely on it if
// they want an UnsafeMemoryAccess exit non-local to the stub
@@ -2308,72 +2315,117 @@ class StubGenerator: public StubCodeGenerator {
// register the stub as the default exit with class UnsafeMemoryAccess
UnsafeMemoryAccess::set_common_exit_stub_pc(StubRoutines::_unsafecopy_common_exit);
- generate_copy_longs(StubId::stubgen_copy_byte_f_id, copy_f, c_rarg0, c_rarg1, t1);
- generate_copy_longs(StubId::stubgen_copy_byte_b_id, copy_b, c_rarg0, c_rarg1, t1);
+ // generate and publish riscv-specific bulk copy routines first
+ // so we can call them from other copy stubs
+ StubRoutines::riscv::_copy_byte_f = generate_copy_longs(StubId::stubgen_copy_byte_f_id, c_rarg0, c_rarg1, t1);
+ StubRoutines::riscv::_copy_byte_b = generate_copy_longs(StubId::stubgen_copy_byte_b_id, c_rarg0, c_rarg1, t1);
StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
//*** jbyte
// Always need aligned and unaligned versions
- StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jbyte_disjoint_arraycopy_id, &entry);
- StubRoutines::_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_jbyte_arraycopy_id, entry, &entry_jbyte_arraycopy);
- StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jbyte_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jbyte_arraycopy_id, entry, nullptr);
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jbyte_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jbyte_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_jbyte_arraycopy_id, StubRoutines::_jbyte_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jbyte_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jbyte_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jbyte_arraycopy_id, StubRoutines::_arrayof_jbyte_disjoint_arraycopy_nopush, nullptr);
//*** jshort
// Always need aligned and unaligned versions
- StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jshort_disjoint_arraycopy_id, &entry);
- StubRoutines::_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_jshort_arraycopy_id, entry, &entry_jshort_arraycopy);
- StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jshort_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jshort_arraycopy_id, entry, nullptr);
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jshort_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jshort_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_jshort_arraycopy_id, StubRoutines::_jshort_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is used by generic/unsafe copy
+ StubRoutines::_jshort_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jshort_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jshort_arraycopy_id, StubRoutines::_arrayof_jshort_disjoint_arraycopy_nopush, nullptr);
//*** jint
// Aligned versions
- StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jint_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jint_arraycopy_id, entry, &entry_jint_arraycopy);
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jint_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jint_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jint_arraycopy_id, StubRoutines::_arrayof_jint_disjoint_arraycopy_nopush, nullptr);
// In 64 bit we need both aligned and unaligned versions of jint arraycopy.
// entry_jint_arraycopy always points to the unaligned version
- StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jint_disjoint_arraycopy_id, &entry);
- StubRoutines::_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_jint_arraycopy_id, entry, &entry_jint_arraycopy);
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_jint_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jint_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jint_arraycopy = generate_conjoint_copy(StubId::stubgen_jint_arraycopy_id, StubRoutines::_jint_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jint_arraycopy_nopush = nopush_entry;
//*** jlong
// It is always aligned
- StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jlong_disjoint_arraycopy_id, &entry);
- StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jlong_arraycopy_id, entry, &entry_jlong_arraycopy);
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_copy(StubId::stubgen_arrayof_jlong_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_copy(StubId::stubgen_arrayof_jlong_arraycopy_id, StubRoutines::_arrayof_jlong_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jlong_arraycopy_nopush = nopush_entry;
+ // disjoint normal/nopush and conjoint normal entries are not
+ // generated since the arrayof versions are the same
StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+ StubRoutines::_jlong_disjoint_arraycopy_nopush = StubRoutines::_arrayof_jlong_disjoint_arraycopy_nopush;
StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
//*** oops
StubRoutines::_arrayof_oop_disjoint_arraycopy
- = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_id, &entry);
+ = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint arrayof nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_nopush = nopush_entry;
StubRoutines::_arrayof_oop_arraycopy
- = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_id, entry, &entry_oop_arraycopy);
+ = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_id, StubRoutines::_arrayof_oop_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint arrayof nopush entry is needed by generic/unsafe copy
+ StubRoutines::_oop_arraycopy_nopush = nopush_entry;
// Aligned versions without pre-barriers
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
- = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_uninit_id, &entry);
- StubRoutines::_arrayof_oop_arraycopy_uninit
- = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_uninit_id, entry, nullptr);
+ = generate_disjoint_copy(StubId::stubgen_arrayof_oop_disjoint_arraycopy_uninit_id, &nopush_entry);
+ // disjoint arrayof+uninit nopush entry is needed by conjoint copy
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic/unsafe copy does not cater for uninit arrays.
+ StubRoutines::_arrayof_oop_arraycopy_uninit
+ = generate_conjoint_copy(StubId::stubgen_arrayof_oop_arraycopy_uninit_id, StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit_nopush, nullptr);
+
+ // for oop copies reuse arrayof entries for non-arrayof cases
StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+ StubRoutines::_oop_disjoint_arraycopy_nopush = StubRoutines::_arrayof_oop_disjoint_arraycopy_nopush;
StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+ StubRoutines::_oop_disjoint_arraycopy_uninit_nopush = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit_nopush;
StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
- StubRoutines::_checkcast_arraycopy = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_id, &entry_checkcast_arraycopy);
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_id, &nopush_entry);
+ // checkcast nopush entry is needed by generic copy
+ StubRoutines::_checkcast_arraycopy_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic copy does not cater for uninit arrays.
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_uninit_id, nullptr);
- StubRoutines::_unsafe_arraycopy = generate_unsafe_copy(entry_jbyte_arraycopy,
- entry_jshort_arraycopy,
- entry_jint_arraycopy,
- entry_jlong_arraycopy);
+ // unsafe arraycopy may fallback on conjoint stubs
+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy(StubRoutines::_jbyte_arraycopy_nopush,
+ StubRoutines::_jshort_arraycopy_nopush,
+ StubRoutines::_jint_arraycopy_nopush,
+ StubRoutines::_jlong_arraycopy_nopush);
- StubRoutines::_generic_arraycopy = generate_generic_copy(entry_jbyte_arraycopy,
- entry_jshort_arraycopy,
- entry_jint_arraycopy,
- entry_oop_arraycopy,
- entry_jlong_arraycopy,
- entry_checkcast_arraycopy);
+ // generic arraycopy may fallback on conjoint stubs
+ StubRoutines::_generic_arraycopy = generate_generic_copy(StubRoutines::_jbyte_arraycopy_nopush,
+ StubRoutines::_jshort_arraycopy_nopush,
+ StubRoutines::_jint_arraycopy_nopush,
+ StubRoutines::_oop_arraycopy_nopush,
+ StubRoutines::_jlong_arraycopy_nopush,
+ StubRoutines::_checkcast_arraycopy_nopush);
StubRoutines::_jbyte_fill = generate_fill(StubId::stubgen_jbyte_fill_id);
StubRoutines::_jshort_fill = generate_fill(StubId::stubgen_jshort_fill_id);
diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
index 567f3d75a62..3bab60f0bb6 100644
--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
@@ -26,7 +26,7 @@
#ifndef CPU_S390_ASSEMBLER_S390_INLINE_HPP
#define CPU_S390_ASSEMBLER_S390_INLINE_HPP
-#include "asm/assembler.inline.hpp"
+#include "asm/assembler.hpp"
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
index dea3317270e..3e176309c27 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
@@ -28,7 +28,6 @@
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
@@ -205,104 +204,71 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
BLOCK_COMMENT("} generate_c2_pre_barrier_stub");
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+
+ __ block_comment("generate_post_barrier_fast_path {");
+
+ assert(thread == Z_thread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
+
+ // Does store cross heap regions?
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_xgrk(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ } else {
+ __ z_lgr(tmp1, store_addr);
+ __ z_xgr(tmp1, new_val);
+ }
+ __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ branch_optimized(Assembler::bcondEqual, done);
+
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ z_ltgr(new_val, new_val);
+ __ z_bre(done);
+ } else {
+#ifdef ASSERT
+ __ z_ltgr(new_val, new_val);
+ __ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
+#endif
+ }
+
+ __ z_srag(tmp1, store_addr, CardTable::card_shift());
+
+ Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
+ __ z_alg(tmp1, card_table_addr); // tmp1 := card address
+
+ if(UseCondCardMark) {
+ __ z_cli(0, tmp1, G1CardTable::clean_card_val());
+ __ branch_optimized(Assembler::bcondNotEqual, done);
+ }
+
+ static_assert(G1CardTable::dirty_card_val() == 0, "must be to use z_mvi");
+ __ z_mvi(0, tmp1, G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
+
+ __ block_comment("} generate_post_barrier_fast_path");
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* stub) {
+ bool new_val_may_be_null) {
BLOCK_COMMENT("g1_write_barrier_post_c2 {");
-
- assert(thread == Z_thread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch);
-
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
-
- stub->initialize_registers(thread, tmp1, tmp2);
-
- BLOCK_COMMENT("generate_region_crossing_test {");
- if (VM_Version::has_DistinctOpnds()) {
- __ z_xgrk(tmp1, store_addr, new_val);
- } else {
- __ z_lgr(tmp1, store_addr);
- __ z_xgr(tmp1, new_val);
- }
- __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
- __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
- BLOCK_COMMENT("} generate_region_crossing_test");
-
- // crosses regions, storing null?
- if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) {
- __ z_ltgr(new_val, new_val);
- __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
- }
-
- BLOCK_COMMENT("generate_card_young_test {");
- CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
- // calculate address of card
- __ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base.
- __ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table.
- __ z_algr(tmp1, tmp2); // Explicit calculation needed for cli.
-
- // Filter young.
- __ z_cli(0, tmp1, G1CardTable::g1_young_card_val());
-
- BLOCK_COMMENT("} generate_card_young_test");
-
- // From here on, tmp1 holds the card address.
- __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
-
- __ bind(*stub->continuation());
-
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
+ __ bind(done);
BLOCK_COMMENT("} g1_write_barrier_post_c2");
}
-void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const {
-
- BLOCK_COMMENT("generate_c2_post_barrier_stub {");
-
- Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
-
- Register thread = stub->thread();
- Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
- Register tmp2 = stub->tmp2();
- Register Rcard_addr = tmp1;
-
- __ bind(*stub->entry());
-
- BLOCK_COMMENT("generate_card_clean_test {");
- __ z_sync(); // Required to support concurrent cleaning.
- __ z_cli(0, Rcard_addr, 0); // Reload after membar.
- __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
- BLOCK_COMMENT("} generate_card_clean_test");
-
- BLOCK_COMMENT("generate_dirty_card {");
- // Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- STATIC_ASSERT(CardTable::dirty_card_val() == 0);
- __ z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
- BLOCK_COMMENT("} generate_dirty_card");
-
- generate_queue_test_and_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime,
- Z_thread, tmp1, tmp2);
-
- __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
-
- __ bind(runtime);
-
- generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
-
- __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
-
- BLOCK_COMMENT("} generate_c2_post_barrier_stub");
-}
-
#endif //COMPILER2
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -451,99 +417,9 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
Register Rtmp1, Register Rtmp2, Register Rtmp3) {
bool not_null = (decorators & IS_NOT_NULL) != 0;
- assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
-
- Label callRuntime, filtered;
-
- CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
-
- BLOCK_COMMENT("g1_write_barrier_post {");
-
- // Does store cross heap regions?
- // It does if the two addresses specify different grain addresses.
- if (VM_Version::has_DistinctOpnds()) {
- __ z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
- } else {
- __ z_lgr(Rtmp1, Rstore_addr);
- __ z_xgr(Rtmp1, Rnew_val);
- }
- __ z_srag(Rtmp1, Rtmp1, G1HeapRegion::LogOfHRGrainBytes);
- __ z_bre(filtered);
-
- // Crosses regions, storing null?
- if (not_null) {
-#ifdef ASSERT
- __ z_ltgr(Rnew_val, Rnew_val);
- __ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
-#endif
- } else {
- __ z_ltgr(Rnew_val, Rnew_val);
- __ z_bre(filtered);
- }
-
- Rnew_val = noreg; // end of lifetime
-
- // Storing region crossing non-null, is card already dirty?
- assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
- // Make sure not to use Z_R0 for any of these registers.
- Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
- Register Rbase = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
-
- // calculate address of card
- __ load_const_optimized(Rbase, (address)ct->card_table()->byte_map_base()); // Card table base.
- __ z_srlg(Rcard_addr, Rstore_addr, CardTable::card_shift()); // Index into card table.
- __ z_algr(Rcard_addr, Rbase); // Explicit calculation needed for cli.
- Rbase = noreg; // end of lifetime
-
- // Filter young.
- __ z_cli(0, Rcard_addr, G1CardTable::g1_young_card_val());
- __ z_bre(filtered);
-
- // Check the card value. If dirty, we're done.
- // This also avoids false sharing of the (already dirty) card.
- __ z_sync(); // Required to support concurrent cleaning.
- __ z_cli(0, Rcard_addr, G1CardTable::dirty_card_val()); // Reload after membar.
- __ z_bre(filtered);
-
- // Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- __ z_mvi(0, Rcard_addr, G1CardTable::dirty_card_val());
-
- Register Rcard_addr_x = Rcard_addr;
- Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
- if (Rcard_addr == Rqueue_index) {
- Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0.
- }
- __ lgr_if_needed(Rcard_addr_x, Rcard_addr);
-
- generate_queue_test_and_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- callRuntime,
- Z_thread, Rcard_addr_x, Rqueue_index);
- __ z_bru(filtered);
-
- __ bind(callRuntime);
-
- // TODO: do we need a frame? Introduced to be on the safe side.
- bool needs_frame = true;
- __ lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
-
- // VM call need frame to access(write) O register.
- if (needs_frame) {
- __ save_return_pc();
- __ push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
- }
-
- // Save the live input values.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, Z_thread);
-
- if (needs_frame) {
- __ pop_frame();
- __ restore_return_pc();
- }
-
- __ bind(filtered);
+ Label done;
+ generate_post_barrier_fast_path(masm, Rstore_addr, Rnew_val, Z_thread, Rtmp1, Rtmp2, done, !not_null);
+ __ bind(done);
BLOCK_COMMENT("} g1_write_barrier_post");
}
@@ -615,22 +491,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
}
-void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
- G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
- __ bind(*stub->entry());
- ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
- assert(stub->addr()->is_register(), "Precondition.");
- assert(stub->new_val()->is_register(), "Precondition.");
- Register new_val_reg = stub->new_val()->as_register();
- __ z_ltgr(new_val_reg, new_val_reg);
- __ branch_optimized(Assembler::bcondZero, *stub->continuation());
- __ z_lgr(Z_R1_scratch, stub->addr()->as_pointer_register());
- ce->emit_call_c(bs->post_barrier_c1_runtime_code_blob()->code_begin());
- __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
-}
-
#undef __
+void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ masm->bind(done);
+}
+
#define __ sasm->
static OopMap* save_volatile_registers(StubAssembler* sasm, Register return_pc = Z_R14) {
@@ -705,92 +578,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ z_bru(restart);
}
-void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
- // Z_R1_scratch: oop address, address of updated memory slot
-
- BarrierSet* bs = BarrierSet::barrier_set();
- __ set_info("g1_post_barrier_slow_id", false);
-
- Register addr_oop = Z_R1_scratch;
- Register addr_card = Z_R1_scratch;
- Register r1 = Z_R6; // Must be saved/restored.
- Register r2 = Z_R7; // Must be saved/restored.
- Register cardtable = r1; // Must be non-volatile, because it is used to save addr_card.
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
- CardTable::CardValue* byte_map_base = ct->byte_map_base();
-
- // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
- __ z_stg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
-
- Label not_already_dirty, restart, refill, young_card;
-
- // Calculate address of card corresponding to the updated oop slot.
- AddressLiteral rs(byte_map_base);
- __ z_srlg(addr_card, addr_oop, CardTable::card_shift());
- addr_oop = noreg; // dead now
- __ load_const_optimized(cardtable, rs); // cardtable :=
- __ z_agr(addr_card, cardtable); // addr_card := addr_oop>>card_shift + cardtable
-
- __ z_cli(0, addr_card, (int)G1CardTable::g1_young_card_val());
- __ z_bre(young_card);
-
- __ z_sync(); // Required to support concurrent cleaning.
-
- __ z_cli(0, addr_card, (int)CardTable::dirty_card_val());
- __ z_brne(not_already_dirty);
-
- __ bind(young_card);
- // We didn't take the branch, so we're already dirty: restore
- // used registers and return.
- __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
- __ z_br(Z_R14);
-
- // Not dirty.
- __ bind(not_already_dirty);
-
- // First, dirty it: [addr_card] := 0
- __ z_mvi(0, addr_card, CardTable::dirty_card_val());
-
- Register idx = cardtable; // Must be non-volatile, because it is used to save addr_card.
- Register buf = r2;
- cardtable = noreg; // now dead
-
- // Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
- __ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
-
- ByteSize dirty_card_q_index_byte_offset = G1ThreadLocalData::dirty_card_queue_index_offset();
- ByteSize dirty_card_q_buf_byte_offset = G1ThreadLocalData::dirty_card_queue_buffer_offset();
-
- __ bind(restart);
-
- // Get the index into the update buffer. G1DirtyCardQueue::_index is
- // a size_t so z_ltg is appropriate here.
- __ z_ltg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
-
- // index == 0?
- __ z_brz(refill);
-
- __ z_lg(buf, Address(Z_thread, dirty_card_q_buf_byte_offset));
- __ add2reg(idx, -oopSize);
-
- __ z_stg(addr_card, 0, idx, buf); // [_buf + index] :=
- __ z_stg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
- // Restore killed registers and return.
- __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
- __ z_lg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
- __ z_br(Z_R14);
-
- __ bind(refill);
- save_volatile_registers(sasm);
- __ z_lgr(idx, addr_card); // Save addr_card, tmp3 must be non-volatile.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread),
- Z_thread);
- __ z_lgr(addr_card, idx);
- restore_volatile_registers(sasm); // Restore addr_card.
- __ z_bru(restart);
-}
-
#undef __
#endif // COMPILER1
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
index 0f0bdd8b83c..fdec751c43b 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -33,9 +33,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
-class G1PostBarrierStub;
class G1PreBarrierStubC2;
-class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -60,10 +58,16 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
public:
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
- void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+
+ void g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2);
+
#endif // COMPILER1
#ifdef COMPILER2
@@ -81,9 +85,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
Register thread,
Register tmp1,
Register tmp2,
- G1PostBarrierStubC2* c2_stub);
- void generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const;
+ bool new_val_may_be_null);
#endif // COMPILER2
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
diff --git a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
index 31f60c4aeff..7aed374fdae 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
+++ b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright 2024 IBM Corporation. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2) {
- if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
- g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub);
+ bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, new_val_may_be_null);
}
%} // source
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
index 2d663061aec..c6f5a4e119c 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
@@ -171,6 +171,7 @@ void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Re
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+ __ align(4, __ offset() + OFFSET_TO_PATCHABLE_DATA); // must align the following block which requires atomic updates
__ block_comment("nmethod_entry_barrier (nmethod_entry_barrier) {");
// Load jump addr:
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
index acc0d3b4988..3e0b2be4873 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
@@ -66,6 +66,14 @@ public:
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg) const;
#endif // COMPILER2
+
+ static const int OFFSET_TO_PATCHABLE_DATA_INSTRUCTION = 6 + 6 + 6; // iihf(6) + iilf(6) + lg(6)
+ static const int BARRIER_TOTAL_LENGTH = OFFSET_TO_PATCHABLE_DATA_INSTRUCTION + 6 + 6 + 2; // cfi(6) + larl(6) + bcr(2)
+
+ // first 2 bytes are for cfi instruction opcode and next 4 bytes will be the value/data to be patched,
+ // so we are skipping first 2 bytes and returning the address of value/data field
+ static const int OFFSET_TO_PATCHABLE_DATA = 6 + 6 + 6 + 2; // iihf(6) + iilf(6) + lg(6) + CFI_OPCODE(2)
+
};
#ifdef COMPILER2
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp
index 8f43f4ef723..f6bf137da1c 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetNMethod_s390.cpp
@@ -26,26 +26,32 @@
#include "code/codeBlob.hpp"
#include "code/nativeInst.hpp"
#include "code/nmethod.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "utilities/debug.hpp"
class NativeMethodBarrier: public NativeInstruction {
private:
- static const int PATCHABLE_INSTRUCTION_OFFSET = 3*6; // bytes
address get_barrier_start_address() const {
return NativeInstruction::addr_at(0);
}
address get_patchable_data_address() const {
- address inst_addr = get_barrier_start_address() + PATCHABLE_INSTRUCTION_OFFSET;
+ address start_address = get_barrier_start_address();
+#ifdef ASSERT
+ address inst_addr = start_address + BarrierSetAssembler::OFFSET_TO_PATCHABLE_DATA_INSTRUCTION;
- DEBUG_ONLY(Assembler::is_z_cfi(*((long*)inst_addr)));
- return inst_addr + 2;
+ unsigned long instr = 0;
+ Assembler::get_instruction(inst_addr, &instr);
+ assert(Assembler::is_z_cfi(instr), "sanity check");
+#endif // ASSERT
+
+ return start_address + BarrierSetAssembler::OFFSET_TO_PATCHABLE_DATA;
}
public:
- static const int BARRIER_TOTAL_LENGTH = PATCHABLE_INSTRUCTION_OFFSET + 2*6 + 2; // bytes
+ static const int BARRIER_TOTAL_LENGTH = BarrierSetAssembler::BARRIER_TOTAL_LENGTH;
int get_guard_value() const {
address data_addr = get_patchable_data_address();
@@ -77,23 +83,30 @@ class NativeMethodBarrier: public NativeInstruction {
#ifdef ASSERT
void verify() const {
+ unsigned long instr = 0;
int offset = 0; // bytes
const address start = get_barrier_start_address();
- MacroAssembler::is_load_const(/* address */ start + offset); // two instructions
+ assert(MacroAssembler::is_load_const(/* address */ start + offset), "sanity check"); // two instructions
offset += Assembler::instr_len(&start[offset]);
offset += Assembler::instr_len(&start[offset]);
- Assembler::is_z_lg(*((long*)(start + offset)));
+ Assembler::get_instruction(start + offset, &instr);
+ assert(Assembler::is_z_lg(instr), "sanity check");
offset += Assembler::instr_len(&start[offset]);
- Assembler::is_z_cfi(*((long*)(start + offset)));
+ // it will be assignment operation, So it doesn't matter what value is already present in instr
+ // hence, no need to 0 it out.
+ Assembler::get_instruction(start + offset, &instr);
+ assert(Assembler::is_z_cfi(instr), "sanity check");
offset += Assembler::instr_len(&start[offset]);
- Assembler::is_z_larl(*((long*)(start + offset)));
+ Assembler::get_instruction(start + offset, &instr);
+ assert(Assembler::is_z_larl(instr), "sanity check");
offset += Assembler::instr_len(&start[offset]);
- Assembler::is_z_bcr(*((long*)(start + offset)));
+ Assembler::get_instruction(start + offset, &instr);
+ assert(Assembler::is_z_bcr(instr), "sanity check");
offset += Assembler::instr_len(&start[offset]);
assert(offset == BARRIER_TOTAL_LENGTH, "check offset == barrier length constant");
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index d1b6897f287..3f1140c937b 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -1398,11 +1398,7 @@ void Assembler::addl(Address dst, Register src) {
void Assembler::eaddl(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x01);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x01, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::addl(Register dst, int32_t imm32) {
@@ -1432,11 +1428,7 @@ void Assembler::addl(Register dst, Register src) {
}
void Assembler::eaddl(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_arith(0x03, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x03, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::addr_nop_4() {
@@ -1657,17 +1649,18 @@ void Assembler::eandl(Register dst, Register src1, Address src2, bool no_flags)
emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, no_flags);
}
+void Assembler::eandl(Register dst, Address src1, Register src2, bool no_flags) {
+ InstructionMark im(this);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x21, no_flags, false /* is_map1 */, true /* is_commutative */);
+}
+
void Assembler::andl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x23, 0xC0, dst, src);
}
void Assembler::eandl(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_arith(0x23, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x23, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::andnl(Register dst, Register src1, Register src2) {
@@ -2519,7 +2512,7 @@ void Assembler::imull(Register dst, Register src) {
}
void Assembler::eimull(Register dst, Register src1, Register src2, bool no_flags) {
- emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */);
+ emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */, true /* is_commutative */);
}
void Assembler::imull(Register dst, Address src, int32_t value) {
@@ -4419,11 +4412,7 @@ void Assembler::enotl(Register dst, Register src) {
}
void Assembler::eorw(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_arith(0x0B, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_16bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::orl(Address dst, int32_t imm32) {
@@ -4467,11 +4456,7 @@ void Assembler::orl(Register dst, Register src) {
}
void Assembler::eorl(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_arith(0x0B, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::orl(Address dst, Register src) {
@@ -4483,11 +4468,7 @@ void Assembler::orl(Address dst, Register src) {
void Assembler::eorl(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x09);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x09, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::orb(Address dst, int imm8) {
@@ -4517,11 +4498,7 @@ void Assembler::orb(Address dst, Register src) {
void Assembler::eorb(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x08);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x08, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
@@ -7323,11 +7300,7 @@ void Assembler::xorl(Register dst, Register src) {
}
void Assembler::exorl(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_arith(0x33, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x33, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::xorl(Address dst, Register src) {
@@ -7339,11 +7312,7 @@ void Assembler::xorl(Address dst, Register src) {
void Assembler::exorl(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x31);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_32bit, 0x31, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::xorb(Register dst, Address src) {
@@ -7367,11 +7336,7 @@ void Assembler::xorb(Address dst, Register src) {
void Assembler::exorb(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_8bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x30);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_8bit, 0x30, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::xorw(Register dst, Address src) {
@@ -12955,6 +12920,31 @@ void Assembler::eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimd
vex_prefix(adr, ndd_enc, xreg_enc, pre, opc, attributes, /* nds_is_ndd */ true, no_flags);
}
+void Assembler::emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+ int size, int opcode_byte, bool no_flags, bool is_map1, bool is_commutative) {
+ if (is_commutative && is_demotable(no_flags, dst->encoding(), src2->encoding())) {
+ // Opcode byte adjustment due to mismatch between NDD and equivalent demotable variant
+ opcode_byte += 2;
+ if (size == EVEX_64bit) {
+ emit_prefix_and_int8(get_prefixq(src1, dst, is_map1), opcode_byte);
+ } else {
+ // For 32-bit, 16-bit and 8-bit
+ if (size == EVEX_16bit) {
+ emit_int8(0x66);
+ }
+ prefix(src1, dst, false, is_map1);
+ emit_int8(opcode_byte);
+ }
+ } else {
+ bool vex_w = (size == EVEX_64bit) ? true : false;
+ InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size);
+ eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), pre, opc, &attributes, no_flags);
+ emit_int8(opcode_byte);
+ }
+ emit_operand(src2, src1, 0);
+}
+
void Assembler::emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
int size, int opcode_byte, bool no_flags, bool is_map1) {
if (is_demotable(no_flags, dst->encoding(), src1->encoding())) {
@@ -13055,18 +13045,20 @@ void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8
}
void Assembler::emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
- int size, int opcode_byte, bool no_flags, bool is_map1, bool swap) {
+ int size, int opcode_byte, bool no_flags, bool is_map1, bool swap, bool is_commutative) {
int encode;
bool is_prefixq = (size == EVEX_64bit) ? true : false;
- if (is_demotable(no_flags, dst_enc, nds_enc)) {
+ bool first_operand_demotable = is_demotable(no_flags, dst_enc, nds_enc);
+ bool second_operand_demotable = is_commutative && is_demotable(no_flags, dst_enc, src_enc);
+ if (first_operand_demotable || second_operand_demotable) {
if (size == EVEX_16bit) {
emit_int8(0x66);
}
-
+ int src = first_operand_demotable ? src_enc : nds_enc;
if (swap) {
- encode = is_prefixq ? prefixq_and_encode(dst_enc, src_enc, is_map1) : prefix_and_encode(dst_enc, src_enc, is_map1);
+ encode = is_prefixq ? prefixq_and_encode(dst_enc, src, is_map1) : prefix_and_encode(dst_enc, src, is_map1);
} else {
- encode = is_prefixq ? prefixq_and_encode(src_enc, dst_enc, is_map1) : prefix_and_encode(src_enc, dst_enc, is_map1);
+ encode = is_prefixq ? prefixq_and_encode(src, dst_enc, is_map1) : prefix_and_encode(src, dst_enc, is_map1);
}
emit_opcode_prefix_and_encoding((unsigned char)opcode_byte, 0xC0, encode);
} else {
@@ -13114,6 +13106,26 @@ int Assembler::eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc,
return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);
}
+void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+ int size, int op1, int op2, bool no_flags, bool is_commutative) {
+ bool demotable = is_demotable(no_flags, dst->encoding(), src1->encoding());
+ if (!demotable && is_commutative) {
+ if (is_demotable(no_flags, dst->encoding(), src2->encoding())) {
+ // swap src1 and src2
+ Register tmp = src1;
+ src1 = src2;
+ src2 = tmp;
+ }
+ }
+ bool vex_w = (size == EVEX_64bit) ? true : false;
+ bool use_prefixq = vex_w;
+ InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
+ // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
+ (void)emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), pre, opc, &attributes, no_flags, use_prefixq);
+ emit_arith(op1, op2, src1, src2);
+}
+
void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
int size, int op1, int op2, bool no_flags) {
int dst_enc = dst->encoding();
@@ -13124,7 +13136,6 @@ void Assembler::emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds
} else {
bool vex_w = (size == EVEX_64bit) ? true : false;
InstructionAttr attributes(AVX_128bit, vex_w, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- //attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, size);
attributes.set_is_evex_instruction();
vex_prefix_and_encode(0, dst_enc, nds_enc, pre, opc, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ true, no_flags);
@@ -13769,7 +13780,7 @@ void Assembler::pdepq(Register dst, Register src1, Address src2) {
void Assembler::sarxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes, true);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
@@ -13777,7 +13788,7 @@ void Assembler::sarxl(Register dst, Register src1, Register src2) {
void Assembler::sarxl(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
@@ -13786,7 +13797,7 @@ void Assembler::sarxl(Register dst, Address src1, Register src2) {
void Assembler::sarxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes, true);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
@@ -13794,7 +13805,7 @@ void Assembler::sarxq(Register dst, Register src1, Register src2) {
void Assembler::sarxq(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
@@ -13803,7 +13814,7 @@ void Assembler::sarxq(Register dst, Address src1, Register src2) {
void Assembler::shlxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
@@ -13811,7 +13822,7 @@ void Assembler::shlxl(Register dst, Register src1, Register src2) {
void Assembler::shlxl(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
@@ -13820,7 +13831,7 @@ void Assembler::shlxl(Register dst, Address src1, Register src2) {
void Assembler::shlxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
@@ -13828,7 +13839,7 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
void Assembler::shlxq(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
@@ -13837,7 +13848,7 @@ void Assembler::shlxq(Register dst, Address src1, Register src2) {
void Assembler::shrxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
@@ -13845,7 +13856,7 @@ void Assembler::shrxl(Register dst, Register src1, Register src2) {
void Assembler::shrxl(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
@@ -13854,7 +13865,7 @@ void Assembler::shrxl(Register dst, Address src1, Register src2) {
void Assembler::shrxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
@@ -13862,7 +13873,7 @@ void Assembler::shrxq(Register dst, Register src1, Register src2) {
void Assembler::shrxq(Register dst, Address src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
@@ -14623,11 +14634,7 @@ void Assembler::addq(Address dst, Register src) {
void Assembler::eaddq(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x01);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x01, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::addq(Register dst, int32_t imm32) {
@@ -14656,11 +14663,7 @@ void Assembler::addq(Register dst, Register src) {
}
void Assembler::eaddq(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
- emit_arith(0x03, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x03, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::adcxq(Register dst, Register src) {
@@ -14753,11 +14756,7 @@ void Assembler::andq(Register dst, Register src) {
}
void Assembler::eandq(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
- emit_arith(0x23, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x23, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::andq(Address dst, Register src) {
@@ -14768,11 +14767,7 @@ void Assembler::andq(Address dst, Register src) {
void Assembler::eandq(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x21);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x21, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::andnq(Register dst, Register src1, Register src2) {
@@ -15118,7 +15113,7 @@ void Assembler::eimulq(Register dst, Register src, bool no_flags) {
}
void Assembler::eimulq(Register dst, Register src1, Register src2, bool no_flags) {
- emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */);
+ emit_eevex_or_demote(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0xAF, no_flags, true /* is_map1 */, true /* swap */, true /* is_commutative */);
}
void Assembler::imulq(Register src) {
@@ -15580,11 +15575,7 @@ void Assembler::orq(Address dst, Register src) {
void Assembler::eorq(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x09);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x09, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void Assembler::orq(Register dst, int32_t imm32) {
@@ -15624,13 +15615,8 @@ void Assembler::orq(Register dst, Register src) {
}
void Assembler::eorq(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
- emit_arith(0x0B, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x0B, 0xC0, no_flags, true /* is_commutative */);
}
-
void Assembler::popcntq(Register dst, Address src) {
assert(VM_Version::supports_popcnt(), "must support");
InstructionMark im(this);
@@ -16372,11 +16358,7 @@ void Assembler::xorq(Register dst, Register src) {
}
void Assembler::exorq(Register dst, Register src1, Register src2, bool no_flags) {
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- // NDD shares its encoding bits with NDS bits for regular EVEX instruction.
- // Therefore, DST is passed as the second argument to minimize changes in the leaf level routine.
- (void) emit_eevex_prefix_or_demote_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags, true /* use_prefixq */);
- emit_arith(0x33, 0xC0, src1, src2);
+ emit_eevex_prefix_or_demote_arith_ndd(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x33, 0xC0, no_flags, true /* is_commutative */);
}
void Assembler::xorq(Register dst, Address src) {
@@ -16430,11 +16412,7 @@ void Assembler::esetzucc(Condition cc, Register dst) {
void Assembler::exorq(Register dst, Address src1, Register src2, bool no_flags) {
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
- eevex_prefix_ndd(src1, dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, &attributes, no_flags);
- emit_int8(0x31);
- emit_operand(src2, src1, 0);
+ emit_eevex_or_demote(dst, src1, src2, VEX_SIMD_NONE, VEX_OPCODE_0F_3C /* MAP4 */, EVEX_64bit, 0x31, no_flags, false /* is_map1 */, true /* is_commutative */);
}
void InstructionAttr::set_address_attributes(int tuple_type, int input_size_in_bits) {
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 45c24f8c832..99dade412b2 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -807,14 +807,20 @@ private:
int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc,
InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);
+ void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+ int size, int op1, int op2, bool no_flags = false, bool is_commutative = false);
+
void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
int size, int op1, int op2, bool no_flags);
void emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
int size, int opcode_byte, bool no_flags = false, bool is_map1 = false);
+ void emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
+ int size, int opcode_byte, bool no_flags = false, bool is_map1 = false, bool is_commutative = false);
+
void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
- int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false);
+ int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false, bool is_commutative = false);
void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc,
int size, int opcode_byte, bool no_flags, bool is_map1 = false);
@@ -1149,6 +1155,7 @@ private:
void eandl(Register dst, Register src, int32_t imm32, bool no_flags);
void andl(Register dst, Address src);
void eandl(Register dst, Register src1, Address src2, bool no_flags);
+ void eandl(Register dst, Address src1, Register src2, bool no_flags);
void andl(Register dst, Register src);
void eandl(Register dst, Register src1, Register src2, bool no_flags);
void andl(Address dst, Register src);
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
index c1920b52837..31f27e140e0 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
@@ -89,19 +89,53 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
- __ push_call_clobbered_registers(false /* save_fpu */);
- if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
- assert_different_registers(c_rarg1, addr);
- __ mov(c_rarg1, count);
- __ mov(c_rarg0, addr);
- } else {
- assert_different_registers(c_rarg0, count);
- __ mov(c_rarg0, addr);
- __ mov(c_rarg1, count);
- }
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
- __ pop_call_clobbered_registers(false /* save_fpu */);
+ Label done;
+ __ testptr(count, count);
+ __ jcc(Assembler::zero, done);
+
+ // Calculate end address in "count".
+ Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
+ __ leaq(count, Address(addr, count, scale));
+
+ // Calculate start card address in "addr".
+ __ shrptr(addr, CardTable::card_shift());
+
+ Register thread = r15_thread;
+
+ __ movptr(tmp, Address(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
+ __ addptr(addr, tmp);
+
+ // Calculate address of card of last word in the array.
+ __ subptr(count, 1);
+ __ shrptr(count, CardTable::card_shift());
+ __ addptr(count, tmp);
+
+ Label loop;
+ // Iterate from start card to end card (inclusive).
+ __ bind(loop);
+
+ Label is_clean_card;
+ if (UseCondCardMark) {
+ __ cmpb(Address(addr, 0), G1CardTable::clean_card_val());
+ __ jcc(Assembler::equal, is_clean_card);
+ } else {
+ __ movb(Address(addr, 0), G1CardTable::dirty_card_val());
+ }
+
+ Label next_card;
+ __ bind(next_card);
+ __ addptr(addr, sizeof(CardTable::CardValue));
+ __ cmpptr(addr, count);
+ __ jcc(Assembler::belowEqual, loop);
+ __ jmp(done);
+
+ __ bind(is_clean_card);
+ // Card was clean. Dirty card and go to next..
+ __ movb(Address(addr, 0), G1CardTable::dirty_card_val());
+ __ jmp(next_card);
+
+ __ bind(done);
}
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -182,7 +216,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
// If expand_call is true then we expand the call_VM_leaf macro
// directly to skip generating the check by
// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-
const Register thread = r15_thread;
Label done;
@@ -238,73 +271,46 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
- const Register tmp,
- const Register tmp2,
+ const Register tmp1,
Label& done,
bool new_val_may_be_null) {
- CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+
+ assert_different_registers(store_addr, new_val, tmp1, noreg);
+
+ Register thread = r15_thread;
+
// Does store cross heap regions?
- __ movptr(tmp, store_addr); // tmp := store address
- __ xorptr(tmp, new_val); // tmp := store address ^ new value
- __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
+ __ movptr(tmp1, store_addr); // tmp1 := store address
+ __ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
+ __ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
__ jcc(Assembler::equal, done);
+
// Crosses regions, storing null?
if (new_val_may_be_null) {
- __ cmpptr(new_val, NULL_WORD); // new value == null?
+ __ cmpptr(new_val, NULL_WORD); // new value == null?
__ jcc(Assembler::equal, done);
}
- // Storing region crossing non-null, is card young?
- __ movptr(tmp, store_addr); // tmp := store address
- __ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base
- // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
- // a valid address and therefore is not properly handled by the relocation code.
- __ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address
- __ addptr(tmp, tmp2); // tmp := card address
- __ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val?
-}
-static void generate_post_barrier_slow_path(MacroAssembler* masm,
- const Register thread,
- const Register tmp,
- const Register tmp2,
- Label& done,
- Label& runtime) {
- __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar
- __ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val?
- __ jcc(Assembler::equal, done);
+ __ movptr(tmp1, store_addr); // tmp1 := store address
+ __ shrptr(tmp1, CardTable::card_shift()); // tmp1 := card address relative to card table base
+
+ Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
+ __ addptr(tmp1, card_table_addr); // tmp1 := card address
+ if (UseCondCardMark) {
+ __ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
+ __ jcc(Assembler::notEqual, done);
+ }
// Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- __ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
- generate_queue_insertion(masm,
- G1ThreadLocalData::dirty_card_queue_index_offset(),
- G1ThreadLocalData::dirty_card_queue_buffer_offset(),
- runtime,
- thread, tmp, tmp2);
- __ jmp(done);
+ // Dirty card.
+ __ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
- Register tmp,
- Register tmp2) {
- const Register thread = r15_thread;
-
+ Register tmp) {
Label done;
- Label runtime;
-
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */);
- // If card is young, jump to done
- __ jcc(Assembler::equal, done);
- generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime);
-
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push_set(saved);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread);
- __ pop_set(saved);
-
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
__ bind(done);
}
@@ -367,34 +373,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp,
- Register tmp2,
- G1PostBarrierStubC2* stub) {
- const Register thread = r15_thread;
- stub->initialize_registers(thread, tmp, tmp2);
-
- bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
- generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null);
- // If card is not young, jump to stub (slow path)
- __ jcc(Assembler::notEqual, *stub->entry());
-
- __ bind(*stub->continuation());
-}
-
-void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const {
- Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
- Label runtime;
- Register thread = stub->thread();
- Register tmp = stub->tmp1(); // tmp holds the card address.
- Register tmp2 = stub->tmp2();
- assert(stub->tmp3() == noreg, "not needed in this platform");
-
- __ bind(*stub->entry());
- generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime);
-
- __ bind(runtime);
- generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
- __ jmp(*stub->continuation());
+ bool new_val_may_be_null) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
+ __ bind(done);
}
#endif // COMPILER2
@@ -441,8 +423,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
g1_write_barrier_post(masm /*masm*/,
tmp1 /* store_adr */,
new_val /* new_val */,
- tmp3 /* tmp */,
- tmp2 /* tmp2 */);
+ tmp3 /* tmp */);
}
}
}
@@ -476,21 +457,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
}
-void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
- G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
- __ bind(*stub->entry());
- assert(stub->addr()->is_register(), "Precondition.");
- assert(stub->new_val()->is_register(), "Precondition.");
- Register new_val_reg = stub->new_val()->as_register();
- __ cmpptr(new_val_reg, NULL_WORD);
- __ jcc(Assembler::equal, *stub->continuation());
- ce->store_parameter(stub->addr()->as_pointer_register(), 0);
- __ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
- __ jmp(*stub->continuation());
-}
-
#undef __
+void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2 /* unused on x86 */) {
+ Label done;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
+ masm->bind(done);
+}
+
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@@ -555,78 +534,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ epilogue();
}
-void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
- __ prologue("g1_post_barrier", false);
-
- CardTableBarrierSet* ct =
- barrier_set_cast(BarrierSet::barrier_set());
-
- Label done;
- Label enqueued;
- Label runtime;
-
- // At this point we know new_value is non-null and the new_value crosses regions.
- // Must check to see if card is already dirty
-
- const Register thread = r15_thread;
-
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- __ push_ppx(rax);
- __ push_ppx(rcx);
-
- const Register cardtable = rax;
- const Register card_addr = rcx;
-
- __ load_parameter(0, card_addr);
- __ shrptr(card_addr, CardTable::card_shift());
- // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
- // a valid address and therefore is not properly handled by the relocation code.
- __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
- __ addptr(card_addr, cardtable);
-
- __ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
- __ jcc(Assembler::equal, done);
-
- __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
- __ cmpb(Address(card_addr, 0), CardTable::dirty_card_val());
- __ jcc(Assembler::equal, done);
-
- // storing region crossing non-null, card is clean.
- // dirty card and log.
-
- __ movb(Address(card_addr, 0), CardTable::dirty_card_val());
-
- const Register tmp = rdx;
- __ push_ppx(rdx);
-
- __ movptr(tmp, queue_index);
- __ testptr(tmp, tmp);
- __ jcc(Assembler::zero, runtime);
- __ subptr(tmp, wordSize);
- __ movptr(queue_index, tmp);
- __ addptr(tmp, buffer);
- __ movptr(Address(tmp, 0), card_addr);
- __ jmp(enqueued);
-
- __ bind(runtime);
- __ push_call_clobbered_registers();
-
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-
- __ pop_call_clobbered_registers();
-
- __ bind(enqueued);
- __ pop_ppx(rdx);
-
- __ bind(done);
- __ pop_ppx(rcx);
- __ pop_ppx(rax);
-
- __ epilogue();
-}
-
#undef __
#endif // COMPILER1
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
index 774e87b916c..4b2de41de69 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,10 +31,8 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
-class G1PostBarrierStub;
class G1BarrierStubC2;
class G1PreBarrierStubC2;
-class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -51,22 +49,28 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
- Register tmp,
- Register tmp2);
+ Register tmp);
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
public:
- void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
- void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
-
- void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
-
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1);
+#ifdef COMPILER1
+ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
+
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+
+ void g1_write_barrier_post_c1(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2);
+#endif
+
#ifdef COMPILER2
void g1_write_barrier_pre_c2(MacroAssembler* masm,
Register obj,
@@ -79,10 +83,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
Register store_addr,
Register new_val,
Register tmp,
- Register tmp2,
- G1PostBarrierStubC2* c2_stub);
- void generate_c2_post_barrier_stub(MacroAssembler* masm,
- G1PostBarrierStubC2* stub) const;
+ bool new_val_may_be_null);
#endif // COMPILER2
};
diff --git a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
index 819cd97696c..94607cd6796 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -59,15 +59,14 @@ static void write_barrier_post(MacroAssembler* masm,
const MachNode* node,
Register store_addr,
Register new_val,
- Register tmp1,
- Register tmp2) {
- if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ Register tmp1) {
+ if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
- g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub);
+ bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, new_val_may_be_null);
}
%}
@@ -95,8 +94,7 @@ instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3,
write_barrier_post(masm, this,
$tmp1$$Register /* store_addr */,
$src$$Register /* new_val */,
- $tmp3$$Register /* tmp1 */,
- $tmp2$$Register /* tmp2 */);
+ $tmp3$$Register /* tmp1 */);
%}
ins_pipe(ialu_mem_reg);
%}
@@ -127,8 +125,7 @@ instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFl
write_barrier_post(masm, this,
$tmp1$$Register /* store_addr */,
$tmp2$$Register /* new_val */,
- $tmp3$$Register /* tmp1 */,
- $tmp2$$Register /* tmp2 */);
+ $tmp3$$Register /* tmp1 */);
%}
ins_pipe(ialu_mem_reg);
%}
@@ -158,8 +155,7 @@ instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rR
write_barrier_post(masm, this,
$tmp1$$Register /* store_addr */,
$src$$Register /* new_val */,
- $tmp3$$Register /* tmp1 */,
- $tmp2$$Register /* tmp2 */);
+ $tmp3$$Register /* tmp1 */);
%}
ins_pipe(ialu_mem_reg);
%}
@@ -187,8 +183,7 @@ instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
- $tmp2$$Register /* tmp1 */,
- $tmp3$$Register /* tmp2 */);
+ $tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@@ -214,8 +209,7 @@ instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
- $tmp2$$Register /* tmp1 */,
- $tmp3$$Register /* tmp2 */);
+ $tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@@ -246,8 +240,7 @@ instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rR
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
- $tmp2$$Register /* tmp1 */,
- $tmp3$$Register /* tmp2 */);
+ $tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@@ -279,8 +272,7 @@ instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rR
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
- $tmp2$$Register /* tmp1 */,
- $tmp3$$Register /* tmp2 */);
+ $tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@@ -303,8 +295,7 @@ instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
- $tmp2$$Register /* tmp1 */,
- $tmp3$$Register /* tmp2 */);
+ $tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@@ -328,8 +319,7 @@ instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
- $tmp2$$Register /* tmp1 */,
- $tmp3$$Register /* tmp2 */);
+ $tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
index 743457f87af..d53fafafdb4 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
@@ -76,50 +76,95 @@ static uint& get_profile_ctr(int shift) {
#endif // !PRODUCT
void StubGenerator::generate_arraycopy_stubs() {
- address entry;
- address entry_jbyte_arraycopy;
- address entry_jshort_arraycopy;
- address entry_jint_arraycopy;
- address entry_oop_arraycopy;
- address entry_jlong_arraycopy;
- address entry_checkcast_arraycopy;
+ // Some copy stubs publish a normal entry and then a 2nd 'fallback'
+ // entry immediately following their stack push. This can be used
+ // as a post-push branch target for compatible stubs when they
+ // identify a special case that can be handled by the fallback
+ // stub e.g a disjoint copy stub may be use as a special case
+ // fallback for its compatible conjoint copy stub.
+ //
+ // A no push entry is always returned in the following local and
+ // then published by assigning to the appropriate entry field in
+ // class StubRoutines. The entry value is then passed to the
+ // generator for the compatible stub. That means the entry must be
+ // listed when saving to/restoring from the AOT cache, ensuring
+ // that the inter-stub jumps are noted at AOT-cache save and
+ // relocated at AOT cache load.
+ address nopush_entry;
- StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(&entry);
- StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(entry, &entry_jbyte_arraycopy);
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(&nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jbyte_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(StubRoutines::_jbyte_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jbyte_arraycopy_nopush = nopush_entry;
- StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(&entry);
- StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(entry, &entry_jshort_arraycopy);
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(&nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jshort_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(StubRoutines::_jshort_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jshort_arraycopy_nopush = nopush_entry;
- StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(StubId::stubgen_jint_disjoint_arraycopy_id, &entry);
- StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(StubId::stubgen_jint_arraycopy_id, entry, &entry_jint_arraycopy);
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(StubId::stubgen_jint_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jint_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(StubId::stubgen_jint_arraycopy_id, StubRoutines::_jint_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jint_arraycopy_nopush = nopush_entry;
+
+ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(StubId::stubgen_jlong_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_jlong_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(StubId::stubgen_jlong_arraycopy_id, StubRoutines::_jlong_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_jlong_arraycopy_nopush = nopush_entry;
- StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(StubId::stubgen_jlong_disjoint_arraycopy_id, &entry);
- StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(StubId::stubgen_jlong_arraycopy_id, entry, &entry_jlong_arraycopy);
if (UseCompressedOops) {
- StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_id, &entry);
- StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(StubId::stubgen_oop_arraycopy_id, entry, &entry_oop_arraycopy);
- StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_uninit_id, &entry);
- StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(StubId::stubgen_oop_arraycopy_uninit_id, entry, nullptr);
+ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_oop_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(StubId::stubgen_oop_arraycopy_id, StubRoutines::_oop_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_oop_arraycopy_nopush = nopush_entry;
+ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_uninit_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_oop_disjoint_arraycopy_uninit_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic/unsafe copy does not cater for uninit arrays.
+ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(StubId::stubgen_oop_arraycopy_uninit_id, StubRoutines::_oop_disjoint_arraycopy_uninit_nopush, nullptr);
} else {
- StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_id, &entry);
- StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(StubId::stubgen_oop_arraycopy_id, entry, &entry_oop_arraycopy);
- StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_uninit_id, &entry);
- StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(StubId::stubgen_oop_arraycopy_uninit_id, entry, nullptr);
+ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_oop_disjoint_arraycopy_nopush = nopush_entry;
+ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(StubId::stubgen_oop_arraycopy_id, StubRoutines::_oop_disjoint_arraycopy_nopush, &nopush_entry);
+ // conjoint nopush entry is needed by generic/unsafe copy
+ StubRoutines::_oop_arraycopy_nopush = nopush_entry;
+ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(StubId::stubgen_oop_disjoint_arraycopy_uninit_id, &nopush_entry);
+ // disjoint nopush entry is needed by conjoint copy
+ StubRoutines::_oop_disjoint_arraycopy_uninit_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic/unsafe copy does not cater for uninit arrays.
+ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(StubId::stubgen_oop_arraycopy_uninit_id, StubRoutines::_oop_disjoint_arraycopy_uninit_nopush, nullptr);
}
- StubRoutines::_checkcast_arraycopy = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_id, &entry_checkcast_arraycopy);
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_id, &nopush_entry);
+ // checkcast nopush entry is needed by generic copy
+ StubRoutines::_checkcast_arraycopy_nopush = nopush_entry;
+ // note that we don't need a returned nopush entry because the
+ // generic copy does not cater for uninit arrays.
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy(StubId::stubgen_checkcast_arraycopy_uninit_id, nullptr);
- StubRoutines::_unsafe_arraycopy = generate_unsafe_copy(entry_jbyte_arraycopy,
- entry_jshort_arraycopy,
- entry_jint_arraycopy,
- entry_jlong_arraycopy);
- StubRoutines::_generic_arraycopy = generate_generic_copy(entry_jbyte_arraycopy,
- entry_jshort_arraycopy,
- entry_jint_arraycopy,
- entry_oop_arraycopy,
- entry_jlong_arraycopy,
- entry_checkcast_arraycopy);
+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy(StubRoutines::_jbyte_arraycopy_nopush,
+ StubRoutines::_jshort_arraycopy_nopush,
+ StubRoutines::_jint_arraycopy_nopush,
+ StubRoutines::_jlong_arraycopy_nopush);
+ StubRoutines::_generic_arraycopy = generate_generic_copy(StubRoutines::_jbyte_arraycopy_nopush,
+ StubRoutines::_jshort_arraycopy_nopush,
+ StubRoutines::_jint_arraycopy_nopush,
+ StubRoutines::_oop_arraycopy_nopush,
+ StubRoutines::_jlong_arraycopy_nopush,
+ StubRoutines::_checkcast_arraycopy_nopush);
StubRoutines::_jbyte_fill = generate_fill(StubId::stubgen_jbyte_fill_id);
StubRoutines::_jshort_fill = generate_fill(StubId::stubgen_jshort_fill_id);
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 094ab370190..f0dfdb22745 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -139,7 +139,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
- Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24;
+ Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
@@ -338,6 +338,16 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);
+ //
+ // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
+ //
+ __ bind(std_cpuid29);
+ __ movl(rax, 0x29);
+ __ movl(rcx, 0);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
+ __ movl(Address(rsi, 0), rbx);
+
//
// cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
//
@@ -1016,16 +1026,6 @@ void VM_Version::get_processor_features() {
_features.clear_feature(CPU_AVX10_2);
}
- // Currently APX support is only enabled for targets supporting AVX512VL feature.
- bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
- if (UseAPX && !apx_supported) {
- warning("UseAPX is not supported on this CPU, setting it to false");
- FLAG_SET_DEFAULT(UseAPX, false);
- }
-
- if (!UseAPX) {
- _features.clear_feature(CPU_APX_F);
- }
if (UseAVX < 2) {
_features.clear_feature(CPU_AVX2);
@@ -1049,6 +1049,7 @@ void VM_Version::get_processor_features() {
_features.clear_feature(CPU_VZEROUPPER);
_features.clear_feature(CPU_AVX512BW);
_features.clear_feature(CPU_AVX512VL);
+ _features.clear_feature(CPU_APX_F);
_features.clear_feature(CPU_AVX512DQ);
_features.clear_feature(CPU_AVX512_VNNI);
_features.clear_feature(CPU_AVX512_VAES);
@@ -1068,6 +1069,17 @@ void VM_Version::get_processor_features() {
}
}
+ // Currently APX support is only enabled for targets supporting AVX512VL feature.
+ bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
+ if (UseAPX && !apx_supported) {
+ warning("UseAPX is not supported on this CPU, setting it to false");
+ FLAG_SET_DEFAULT(UseAPX, false);
+ }
+
+ if (!UseAPX) {
+ _features.clear_feature(CPU_APX_F);
+ }
+
if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
@@ -2912,7 +2924,8 @@ VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
if (std_cpuid1_ecx.bits.popcnt != 0)
vm_features.set_feature(CPU_POPCNT);
if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
- xem_xcr0_eax.bits.apx_f != 0) {
+ xem_xcr0_eax.bits.apx_f != 0 &&
+ std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
vm_features.set_feature(CPU_APX_F);
}
if (std_cpuid1_ecx.bits.avx != 0 &&
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index 54b3a93d64b..cd8e957ca9a 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -306,6 +306,14 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
+ union StdCpuidEax29Ecx0 {
+ uint32_t value;
+ struct {
+ uint32_t apx_nci_ndd_nf : 1,
+ : 31;
+ } bits;
+ };
+
union StdCpuid24MainLeafEax {
uint32_t value;
struct {
@@ -591,6 +599,10 @@ protected:
StdCpuid24MainLeafEax std_cpuid24_eax;
StdCpuid24MainLeafEbx std_cpuid24_ebx;
+ // cpuid function 0x29 APX Advanced Performance Extensions Leaf
+ // eax = 0x29, ecx = 0
+ StdCpuidEax29Ecx0 std_cpuid29_ebx;
+
// cpuid function 0xB (processor topology)
// ecx = 0
uint32_t tpl_cpuidB0_eax;
@@ -711,6 +723,7 @@ public:
static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
static ByteSize std_cpuid24_offset() { return byte_offset_of(CpuidInfo, std_cpuid24_eax); }
+ static ByteSize std_cpuid29_offset() { return byte_offset_of(CpuidInfo, std_cpuid29_ebx); }
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
static ByteSize sefsl1_cpuid7_offset() { return byte_offset_of(CpuidInfo, sefsl1_cpuid7_eax); }
@@ -760,7 +773,9 @@ public:
_features.set_feature(CPU_SSE2);
_features.set_feature(CPU_VZEROUPPER);
}
- static void set_apx_cpuFeatures() { _features.set_feature(CPU_APX_F); }
+ static void set_apx_cpuFeatures() {
+ _features.set_feature(CPU_APX_F);
+ }
static void set_bmi_cpuFeatures() {
_features.set_feature(CPU_BMI1);
_features.set_feature(CPU_BMI2);
diff --git a/src/hotspot/cpu/zero/assembler_zero.inline.hpp b/src/hotspot/cpu/zero/assembler_zero.inline.hpp
index 0a3f4fc25fa..d78eb39c973 100644
--- a/src/hotspot/cpu/zero/assembler_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/assembler_zero.inline.hpp
@@ -26,7 +26,7 @@
#ifndef CPU_ZERO_ASSEMBLER_ZERO_INLINE_HPP
#define CPU_ZERO_ASSEMBLER_ZERO_INLINE_HPP
-#include "asm/assembler.inline.hpp"
+#include "asm/assembler.hpp"
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
#include "runtime/handles.inline.hpp"
diff --git a/src/hotspot/os/linux/compilerThreadTimeout_linux.hpp b/src/hotspot/os/linux/compilerThreadTimeout_linux.hpp
index 2dc6fa7b9c9..7c27080eb5e 100644
--- a/src/hotspot/os/linux/compilerThreadTimeout_linux.hpp
+++ b/src/hotspot/os/linux/compilerThreadTimeout_linux.hpp
@@ -46,6 +46,10 @@ class CompilerThreadTimeoutLinux : public CHeapObj {
bool init_timeout();
void arm();
void disarm();
+ void reset() {
+ disarm();
+ arm();
+ };
};
#endif //LINUX_COMPILER_THREAD_TIMEOUT_LINUX_HPP
diff --git a/src/hotspot/os/linux/memMapPrinter_linux.cpp b/src/hotspot/os/linux/memMapPrinter_linux.cpp
index 91fd552eec5..228a823dcb0 100644
--- a/src/hotspot/os/linux/memMapPrinter_linux.cpp
+++ b/src/hotspot/os/linux/memMapPrinter_linux.cpp
@@ -106,6 +106,7 @@ public:
PRINTIF(info.swap > 0, "swap");
PRINTIF(info.ht, "huge");
PRINTIF(info.anonhugepages > 0, "thp");
+ PRINTIF(info.thpeligible, "thpel");
PRINTIF(info.hg, "thpad");
PRINTIF(info.nh, "nothp");
if (num_printed == 0) {
@@ -135,6 +136,7 @@ public:
st->print_cr(" com: mapping committed (swap space reserved)");
st->print_cr(" swap: mapping partly or completely swapped out");
st->print_cr(" thp: mapping uses THP");
+ st->print_cr(" thpel: mapping is THP-eligible");
st->print_cr(" thpad: mapping is THP-madvised");
st->print_cr(" nothp: mapping is forbidden to use THP");
st->print_cr(" huge: mapping uses hugetlb pages");
diff --git a/src/hotspot/os/linux/procMapsParser.cpp b/src/hotspot/os/linux/procMapsParser.cpp
index 71b828bcefb..47c5c6cc594 100644
--- a/src/hotspot/os/linux/procMapsParser.cpp
+++ b/src/hotspot/os/linux/procMapsParser.cpp
@@ -76,8 +76,16 @@ void ProcSmapsParser::scan_additional_line(ProcSmapsInfo& out) {
SCAN("Private_Hugetlb", out.private_hugetlb);
SCAN("Shared_Hugetlb", out.shared_hugetlb);
SCAN("Swap", out.swap);
- int i = 0;
#undef SCAN
+
+ // scan THPeligible into a bool
+ int thpel = 0;
+ if (::sscanf(_line, "THPeligible: %d", &thpel) == 1) {
+ assert(thpel == 1 || thpel == 0, "Unexpected value %d", thpel);
+ out.thpeligible = (thpel == 1);
+ return;
+ }
+
// scan some flags too
if (strncmp(_line, "VmFlags:", 8) == 0) {
#define SCAN(flag) { out.flag = (::strstr(_line + 8, " " #flag) != nullptr); }
diff --git a/src/hotspot/os/linux/procMapsParser.hpp b/src/hotspot/os/linux/procMapsParser.hpp
index 0971c4fb084..06035333b2f 100644
--- a/src/hotspot/os/linux/procMapsParser.hpp
+++ b/src/hotspot/os/linux/procMapsParser.hpp
@@ -49,6 +49,7 @@ struct ProcSmapsInfo {
size_t shared_hugetlb;
size_t anonhugepages;
size_t swap;
+ bool thpeligible;
bool rd, wr, ex;
bool sh; // shared
bool nr; // no reserve
@@ -64,7 +65,7 @@ struct ProcSmapsInfo {
from = to = nullptr;
prot[0] = filename[0] = '\0';
kernelpagesize = rss = private_hugetlb = shared_hugetlb = anonhugepages = swap = 0;
- rd = wr = ex = sh = nr = hg = ht = nh = false;
+ thpeligible = rd = wr = ex = sh = nr = hg = ht = nh = false;
}
};
diff --git a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp b/src/hotspot/os_cpu/aix_ppc/atomicAccess_aix_ppc.hpp
similarity index 98%
rename from src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
rename to src/hotspot/os_cpu/aix_ppc/atomicAccess_aix_ppc.hpp
index d32f7c93ecf..3540e364bc6 100644
--- a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
+++ b/src/hotspot/os_cpu/aix_ppc/atomicAccess_aix_ppc.hpp
@@ -23,8 +23,8 @@
*
*/
-#ifndef OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
-#define OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
+#ifndef OS_CPU_AIX_PPC_ATOMICACCESS_AIX_PPC_HPP
+#define OS_CPU_AIX_PPC_ATOMICACCESS_AIX_PPC_HPP
#ifndef PPC64
#error "Atomic currently only implemented for PPC64"
@@ -33,7 +33,7 @@
#include "orderAccess_aix_ppc.hpp"
#include "utilities/debug.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
//
// machine barrier instructions:
@@ -414,4 +414,4 @@ struct AtomicAccess::PlatformOrderedLoad {
}
};
-#endif // OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
+#endif // OS_CPU_AIX_PPC_ATOMICACCESS_AIX_PPC_HPP
diff --git a/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp b/src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp
similarity index 95%
rename from src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp
rename to src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp
index 1ecdd59f59e..3d2c632ace8 100644
--- a/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp
+++ b/src/hotspot/os_cpu/bsd_aarch64/atomicAccess_bsd_aarch64.hpp
@@ -24,12 +24,12 @@
*
*/
-#ifndef OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP
-#define OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP
+#ifndef OS_CPU_BSD_AARCH64_ATOMICACCESS_BSD_AARCH64_HPP
+#define OS_CPU_BSD_AARCH64_ATOMICACCESS_BSD_AARCH64_HPP
#include "utilities/debug.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
// Note that memory_order_conservative requires a full barrier after atomic stores.
// See https://patchwork.kernel.org/patch/3575821/
@@ -129,5 +129,4 @@ struct AtomicAccess::PlatformOrderedStore
void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
};
-
-#endif // OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP
+#endif // OS_CPU_BSD_AARCH64_ATOMICACCESS_BSD_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp
similarity index 97%
rename from src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
rename to src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp
index 8fbc319e766..975580fbd71 100644
--- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
+++ b/src/hotspot/os_cpu/bsd_x86/atomicAccess_bsd_x86.hpp
@@ -22,10 +22,10 @@
*
*/
-#ifndef OS_CPU_BSD_X86_ATOMIC_BSD_X86_HPP
-#define OS_CPU_BSD_X86_ATOMIC_BSD_X86_HPP
+#ifndef OS_CPU_BSD_X86_ATOMICACCESS_BSD_X86_HPP
+#define OS_CPU_BSD_X86_ATOMICACCESS_BSD_X86_HPP
-// Implementation of class atomic
+// Implementation of class AtomicAccess
template
struct AtomicAccess::PlatformAdd {
@@ -230,4 +230,4 @@ struct AtomicAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
};
#endif // AMD64
-#endif // OS_CPU_BSD_X86_ATOMIC_BSD_X86_HPP
+#endif // OS_CPU_BSD_X86_ATOMICACCESS_BSD_X86_HPP
diff --git a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp b/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
similarity index 96%
rename from src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
rename to src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
index b5cedac867b..6a720dac54e 100644
--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
@@ -23,13 +23,13 @@
*
*/
-#ifndef OS_CPU_BSD_ZERO_ATOMIC_BSD_ZERO_HPP
-#define OS_CPU_BSD_ZERO_ATOMIC_BSD_ZERO_HPP
+#ifndef OS_CPU_BSD_ZERO_ATOMICACCESS_BSD_ZERO_HPP
+#define OS_CPU_BSD_ZERO_ATOMICACCESS_BSD_ZERO_HPP
#include "orderAccess_bsd_zero.hpp"
#include "runtime/os.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
template
struct AtomicAccess::PlatformAdd {
@@ -149,4 +149,4 @@ inline void AtomicAccess::PlatformStore<8>::operator()(T volatile* dest,
__atomic_store(dest, &store_value, __ATOMIC_RELAXED);
}
-#endif // OS_CPU_BSD_ZERO_ATOMIC_BSD_ZERO_HPP
+#endif // OS_CPU_BSD_ZERO_ATOMICACCESS_BSD_ZERO_HPP
diff --git a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp
index 3fefbdbe56c..facad184426 100644
--- a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp
+++ b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp
@@ -24,7 +24,6 @@
*/
#include "asm/assembler.inline.hpp"
-#include "atomic_bsd_zero.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/vtableStubs.hpp"
#include "interpreter/interpreter.hpp"
@@ -36,6 +35,7 @@
#include "prims/jniFastGetField.hpp"
#include "prims/jvm_misc.hpp"
#include "runtime/arguments.hpp"
+#include "runtime/atomicAccess.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/java.hpp"
diff --git a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp
similarity index 97%
rename from src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
rename to src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp
index 4940cbdc246..6e5f53edfa3 100644
--- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/atomicAccess_linux_aarch64.hpp
@@ -23,13 +23,13 @@
*
*/
-#ifndef OS_CPU_LINUX_AARCH64_ATOMIC_LINUX_AARCH64_HPP
-#define OS_CPU_LINUX_AARCH64_ATOMIC_LINUX_AARCH64_HPP
+#ifndef OS_CPU_LINUX_AARCH64_ATOMICACCESS_LINUX_AARCH64_HPP
+#define OS_CPU_LINUX_AARCH64_ATOMICACCESS_LINUX_AARCH64_HPP
#include "atomic_aarch64.hpp"
#include "runtime/vm_version.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
// Note that memory_order_conservative requires a full barrier after atomic stores.
// See https://patchwork.kernel.org/patch/3575821/
@@ -217,4 +217,4 @@ struct AtomicAccess::PlatformOrderedStore
void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
};
-#endif // OS_CPU_LINUX_AARCH64_ATOMIC_LINUX_AARCH64_HPP
+#endif // OS_CPU_LINUX_AARCH64_ATOMICACCESS_LINUX_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
similarity index 97%
rename from src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
rename to src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
index db00c347dea..5b5f9da51a6 100644
--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
@@ -22,14 +22,14 @@
*
*/
-#ifndef OS_CPU_LINUX_ARM_ATOMIC_LINUX_ARM_HPP
-#define OS_CPU_LINUX_ARM_ATOMIC_LINUX_ARM_HPP
+#ifndef OS_CPU_LINUX_ARM_ATOMICACCESS_LINUX_ARM_HPP
+#define OS_CPU_LINUX_ARM_ATOMICACCESS_LINUX_ARM_HPP
#include "memory/allStatic.hpp"
#include "runtime/os.hpp"
#include "runtime/vm_version.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
class ARMAtomicFuncs : AllStatic {
public:
@@ -178,4 +178,4 @@ inline T AtomicAccess::PlatformCmpxchg<8>::operator()(T volatile* dest,
return cmpxchg_using_helper(reorder_cmpxchg_long_func, dest, compare_value, exchange_value);
}
-#endif // OS_CPU_LINUX_ARM_ATOMIC_LINUX_ARM_HPP
+#endif // OS_CPU_LINUX_ARM_ATOMICACCESS_LINUX_ARM_HPP
diff --git a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp b/src/hotspot/os_cpu/linux_ppc/atomicAccess_linux_ppc.hpp
similarity index 98%
rename from src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
rename to src/hotspot/os_cpu/linux_ppc/atomicAccess_linux_ppc.hpp
index 9f1d90c26bd..f4eac1207bf 100644
--- a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
+++ b/src/hotspot/os_cpu/linux_ppc/atomicAccess_linux_ppc.hpp
@@ -23,8 +23,8 @@
*
*/
-#ifndef OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
-#define OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
+#ifndef OS_CPU_LINUX_PPC_ATOMICACCESS_LINUX_PPC_HPP
+#define OS_CPU_LINUX_PPC_ATOMICACCESS_LINUX_PPC_HPP
#ifndef PPC64
#error "Atomic currently only implemented for PPC64"
@@ -33,7 +33,7 @@
#include "orderAccess_linux_ppc.hpp"
#include "utilities/debug.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
//
// machine barrier instructions:
@@ -392,4 +392,4 @@ struct AtomicAccess::PlatformOrderedLoad
}
};
-#endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
+#endif // OS_CPU_LINUX_PPC_ATOMICACCESS_LINUX_PPC_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp
similarity index 97%
rename from src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
rename to src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp
index f713465edeb..6d57ea55a83 100644
--- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/atomicAccess_linux_riscv.hpp
@@ -23,12 +23,12 @@
*
*/
-#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+#ifndef OS_CPU_LINUX_RISCV_ATOMICACCESS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_ATOMICACCESS_LINUX_RISCV_HPP
#include "runtime/vm_version.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
// Note that memory_order_conservative requires a full barrier after atomic stores.
// See https://patchwork.kernel.org/patch/3575821/
@@ -226,4 +226,4 @@ struct AtomicAccess::PlatformOrderedStore
#undef FULL_COMPILER_ATOMIC_SUPPORT
-#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+#endif // OS_CPU_LINUX_RISCV_ATOMICACCESS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp b/src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp
similarity index 98%
rename from src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
rename to src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp
index ec620e3907a..5849d69ae2f 100644
--- a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
+++ b/src/hotspot/os_cpu/linux_s390/atomicAccess_linux_s390.hpp
@@ -23,8 +23,8 @@
*
*/
-#ifndef OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
-#define OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
+#ifndef OS_CPU_LINUX_S390_ATOMICACCESS_LINUX_S390_HPP
+#define OS_CPU_LINUX_S390_ATOMICACCESS_LINUX_S390_HPP
#include "runtime/atomicAccess.hpp"
#include "runtime/os.hpp"
@@ -345,4 +345,4 @@ struct AtomicAccess::PlatformOrderedLoad
T operator()(const volatile T* p) const { T t = *p; OrderAccess::acquire(); return t; }
};
-#endif // OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
+#endif // OS_CPU_LINUX_S390_ATOMICACCESS_LINUX_S390_HPP
diff --git a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp
similarity index 97%
rename from src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
rename to src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp
index 561224f56be..c9af982525d 100644
--- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
+++ b/src/hotspot/os_cpu/linux_x86/atomicAccess_linux_x86.hpp
@@ -22,10 +22,10 @@
*
*/
-#ifndef OS_CPU_LINUX_X86_ATOMIC_LINUX_X86_HPP
-#define OS_CPU_LINUX_X86_ATOMIC_LINUX_X86_HPP
+#ifndef OS_CPU_LINUX_X86_ATOMICACCESS_LINUX_X86_HPP
+#define OS_CPU_LINUX_X86_ATOMICACCESS_LINUX_X86_HPP
-// Implementation of class atomic
+// Implementation of class AtomicAccess
template
struct AtomicAccess::PlatformAdd {
@@ -230,4 +230,4 @@ struct AtomicAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
};
#endif // AMD64
-#endif // OS_CPU_LINUX_X86_ATOMIC_LINUX_X86_HPP
+#endif // OS_CPU_LINUX_X86_ATOMICACCESS_LINUX_X86_HPP
diff --git a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp b/src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp
similarity index 96%
rename from src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
rename to src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp
index 05d567d3e28..376ef7a9dc9 100644
--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
+++ b/src/hotspot/os_cpu/linux_zero/atomicAccess_linux_zero.hpp
@@ -23,12 +23,12 @@
*
*/
-#ifndef OS_CPU_LINUX_ZERO_ATOMIC_LINUX_ZERO_HPP
-#define OS_CPU_LINUX_ZERO_ATOMIC_LINUX_ZERO_HPP
+#ifndef OS_CPU_LINUX_ZERO_ATOMICACCESS_LINUX_ZERO_HPP
+#define OS_CPU_LINUX_ZERO_ATOMICACCESS_LINUX_ZERO_HPP
#include "orderAccess_linux_zero.hpp"
-// Implementation of class atomic
+// Implementation of class AtomicAccess
template
struct AtomicAccess::PlatformAdd {
@@ -149,4 +149,4 @@ inline void AtomicAccess::PlatformStore<8>::operator()(T volatile* dest,
__atomic_store(dest, &store_value, __ATOMIC_RELAXED);
}
-#endif // OS_CPU_LINUX_ZERO_ATOMIC_LINUX_ZERO_HPP
+#endif // OS_CPU_LINUX_ZERO_ATOMICACCESS_LINUX_ZERO_HPP
diff --git a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
index c9ac461851a..0ea379fec50 100644
--- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
+++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
@@ -24,7 +24,6 @@
*/
#include "asm/assembler.inline.hpp"
-#include "atomic_linux_zero.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/vtableStubs.hpp"
#include "interpreter/interpreter.hpp"
@@ -36,6 +35,7 @@
#include "prims/jniFastGetField.hpp"
#include "prims/jvm_misc.hpp"
#include "runtime/arguments.hpp"
+#include "runtime/atomicAccess.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/java.hpp"
diff --git a/src/hotspot/os_cpu/windows_aarch64/atomic_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp
similarity index 96%
rename from src/hotspot/os_cpu/windows_aarch64/atomic_windows_aarch64.hpp
rename to src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp
index 42c5b0e4a6c..62b6e3f87ec 100644
--- a/src/hotspot/os_cpu/windows_aarch64/atomic_windows_aarch64.hpp
+++ b/src/hotspot/os_cpu/windows_aarch64/atomicAccess_windows_aarch64.hpp
@@ -23,8 +23,8 @@
*
*/
-#ifndef OS_CPU_WINDOWS_AARCH64_ATOMIC_WINDOWS_AARCH64_HPP
-#define OS_CPU_WINDOWS_AARCH64_ATOMIC_WINDOWS_AARCH64_HPP
+#ifndef OS_CPU_WINDOWS_AARCH64_ATOMICACCESS_WINDOWS_AARCH64_HPP
+#define OS_CPU_WINDOWS_AARCH64_ATOMICACCESS_WINDOWS_AARCH64_HPP
#include
#include "runtime/os.hpp"
@@ -109,4 +109,4 @@ DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange64, __int64)
#undef DEFINE_INTRINSIC_CMPXCHG
-#endif // OS_CPU_WINDOWS_AARCH64_ATOMIC_WINDOWS_AARCH64_HPP
+#endif // OS_CPU_WINDOWS_AARCH64_ATOMICACCESS_WINDOWS_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp
similarity index 97%
rename from src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
rename to src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp
index 4529da29092..a95da151688 100644
--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/atomicAccess_windows_x86.hpp
@@ -22,8 +22,8 @@
*
*/
-#ifndef OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
-#define OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
+#ifndef OS_CPU_WINDOWS_X86_ATOMICACCESS_WINDOWS_X86_HPP
+#define OS_CPU_WINDOWS_X86_ATOMICACCESS_WINDOWS_X86_HPP
#include
#include "runtime/os.hpp"
@@ -111,4 +111,4 @@ DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange64, __int64)
#undef DEFINE_INTRINSIC_CMPXCHG
-#endif // OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
+#endif // OS_CPU_WINDOWS_X86_ATOMICACCESS_WINDOWS_X86_HPP
diff --git a/src/hotspot/share/cds/aotClassLinker.cpp b/src/hotspot/share/cds/aotClassLinker.cpp
index 1f9a03de83f..0eb8f141c20 100644
--- a/src/hotspot/share/cds/aotClassLinker.cpp
+++ b/src/hotspot/share/cds/aotClassLinker.cpp
@@ -191,7 +191,7 @@ void AOTClassLinker::write_to_archive() {
assert_at_safepoint();
if (CDSConfig::is_dumping_aot_linked_classes()) {
- AOTLinkedClassTable* table = AOTLinkedClassTable::get(CDSConfig::is_dumping_static_archive());
+ AOTLinkedClassTable* table = AOTLinkedClassTable::get();
table->set_boot(write_classes(nullptr, true));
table->set_boot2(write_classes(nullptr, false));
table->set_platform(write_classes(SystemDictionary::java_platform_loader(), false));
@@ -212,16 +212,7 @@ Array* AOTClassLinker::write_classes(oop class_loader, bool is_j
continue;
}
- if (ik->in_aot_cache() && CDSConfig::is_dumping_dynamic_archive()) {
- if (CDSConfig::is_using_aot_linked_classes()) {
- // This class was recorded as AOT-linked for the base archive,
- // so there's no need to do so again for the dynamic archive.
- } else {
- list.append(ik);
- }
- } else {
- list.append(ArchiveBuilder::current()->get_buffered_addr(ik));
- }
+ list.append(ArchiveBuilder::current()->get_buffered_addr(ik));
}
if (list.length() == 0) {
diff --git a/src/hotspot/share/cds/aotLinkedClassBulkLoader.cpp b/src/hotspot/share/cds/aotLinkedClassBulkLoader.cpp
index 6e5816cd589..8795a29fd5c 100644
--- a/src/hotspot/share/cds/aotLinkedClassBulkLoader.cpp
+++ b/src/hotspot/share/cds/aotLinkedClassBulkLoader.cpp
@@ -46,8 +46,8 @@ bool AOTLinkedClassBulkLoader::_platform_completed = false;
bool AOTLinkedClassBulkLoader::_app_completed = false;
bool AOTLinkedClassBulkLoader::_all_completed = false;
-void AOTLinkedClassBulkLoader::serialize(SerializeClosure* soc, bool is_static_archive) {
- AOTLinkedClassTable::get(is_static_archive)->serialize(soc);
+void AOTLinkedClassBulkLoader::serialize(SerializeClosure* soc) {
+ AOTLinkedClassTable::get()->serialize(soc);
}
bool AOTLinkedClassBulkLoader::class_preloading_finished() {
@@ -117,27 +117,24 @@ void AOTLinkedClassBulkLoader::exit_on_exception(JavaThread* current) {
void AOTLinkedClassBulkLoader::load_classes_in_loader_impl(AOTLinkedClassCategory class_category, oop class_loader_oop, TRAPS) {
Handle h_loader(THREAD, class_loader_oop);
- load_table(AOTLinkedClassTable::for_static_archive(), class_category, h_loader, CHECK);
- load_table(AOTLinkedClassTable::for_dynamic_archive(), class_category, h_loader, CHECK);
+ AOTLinkedClassTable* table = AOTLinkedClassTable::get();
+ load_table(table, class_category, h_loader, CHECK);
// Initialize the InstanceKlasses of all archived heap objects that are reachable from the
// archived java class mirrors.
- //
- // Only the classes in the static archive can have archived mirrors.
- AOTLinkedClassTable* static_table = AOTLinkedClassTable::for_static_archive();
switch (class_category) {
case AOTLinkedClassCategory::BOOT1:
// Delayed until finish_loading_javabase_classes(), as the VM is not ready to
// execute some of the methods.
break;
case AOTLinkedClassCategory::BOOT2:
- init_required_classes_for_loader(h_loader, static_table->boot2(), CHECK);
+ init_required_classes_for_loader(h_loader, table->boot2(), CHECK);
break;
case AOTLinkedClassCategory::PLATFORM:
- init_required_classes_for_loader(h_loader, static_table->platform(), CHECK);
+ init_required_classes_for_loader(h_loader, table->platform(), CHECK);
break;
case AOTLinkedClassCategory::APP:
- init_required_classes_for_loader(h_loader, static_table->app(), CHECK);
+ init_required_classes_for_loader(h_loader, table->app(), CHECK);
break;
case AOTLinkedClassCategory::UNREGISTERED:
ShouldNotReachHere();
@@ -333,7 +330,7 @@ void AOTLinkedClassBulkLoader::load_hidden_class(ClassLoaderData* loader_data, I
}
void AOTLinkedClassBulkLoader::finish_loading_javabase_classes(TRAPS) {
- init_required_classes_for_loader(Handle(), AOTLinkedClassTable::for_static_archive()->boot(), CHECK);
+ init_required_classes_for_loader(Handle(), AOTLinkedClassTable::get()->boot(), CHECK);
}
// Some AOT-linked classes for must be initialized early. This includes
@@ -427,8 +424,7 @@ void AOTLinkedClassBulkLoader::replay_training_at_init(Array* cl
void AOTLinkedClassBulkLoader::replay_training_at_init_for_preloaded_classes(TRAPS) {
if (CDSConfig::is_using_aot_linked_classes() && TrainingData::have_data()) {
- // Only static archive can have training data.
- AOTLinkedClassTable* table = AOTLinkedClassTable::for_static_archive();
+ AOTLinkedClassTable* table = AOTLinkedClassTable::get();
replay_training_at_init(table->boot(), CHECK);
replay_training_at_init(table->boot2(), CHECK);
replay_training_at_init(table->platform(), CHECK);
diff --git a/src/hotspot/share/cds/aotLinkedClassBulkLoader.hpp b/src/hotspot/share/cds/aotLinkedClassBulkLoader.hpp
index 0a8b0c4d537..95e64a7ddd4 100644
--- a/src/hotspot/share/cds/aotLinkedClassBulkLoader.hpp
+++ b/src/hotspot/share/cds/aotLinkedClassBulkLoader.hpp
@@ -57,7 +57,7 @@ class AOTLinkedClassBulkLoader : AllStatic {
static void init_required_classes_for_loader(Handle class_loader, Array* classes, TRAPS);
static void replay_training_at_init(Array* classes, TRAPS) NOT_CDS_RETURN;
public:
- static void serialize(SerializeClosure* soc, bool is_static_archive) NOT_CDS_RETURN;
+ static void serialize(SerializeClosure* soc) NOT_CDS_RETURN;
static void load_javabase_classes(JavaThread* current) NOT_CDS_RETURN;
static void load_non_javabase_classes(JavaThread* current) NOT_CDS_RETURN;
diff --git a/src/hotspot/share/cds/aotLinkedClassTable.cpp b/src/hotspot/share/cds/aotLinkedClassTable.cpp
index b602c599f54..79d78b05be1 100644
--- a/src/hotspot/share/cds/aotLinkedClassTable.cpp
+++ b/src/hotspot/share/cds/aotLinkedClassTable.cpp
@@ -27,8 +27,7 @@
#include "cds/serializeClosure.hpp"
#include "oops/array.hpp"
-AOTLinkedClassTable AOTLinkedClassTable::_for_static_archive;
-AOTLinkedClassTable AOTLinkedClassTable::_for_dynamic_archive;
+AOTLinkedClassTable AOTLinkedClassTable::_instance;
void AOTLinkedClassTable::serialize(SerializeClosure* soc) {
soc->do_ptr((void**)&_boot);
diff --git a/src/hotspot/share/cds/aotLinkedClassTable.hpp b/src/hotspot/share/cds/aotLinkedClassTable.hpp
index 2a199c15edd..0ec733d1df7 100644
--- a/src/hotspot/share/cds/aotLinkedClassTable.hpp
+++ b/src/hotspot/share/cds/aotLinkedClassTable.hpp
@@ -39,10 +39,7 @@ class SerializeClosure;
// in a production run.
//
class AOTLinkedClassTable {
- // The VM may load up to 2 CDS archives -- static and dynamic. Each
- // archive can have its own AOTLinkedClassTable.
- static AOTLinkedClassTable _for_static_archive;
- static AOTLinkedClassTable _for_dynamic_archive;
+ static AOTLinkedClassTable _instance;
Array* _boot; // only java.base classes
Array* _boot2; // boot classes in other modules
@@ -54,11 +51,8 @@ public:
_boot(nullptr), _boot2(nullptr),
_platform(nullptr), _app(nullptr) {}
- static AOTLinkedClassTable* for_static_archive() { return &_for_static_archive; }
- static AOTLinkedClassTable* for_dynamic_archive() { return &_for_dynamic_archive; }
-
- static AOTLinkedClassTable* get(bool is_static_archive) {
- return is_static_archive ? for_static_archive() : for_dynamic_archive();
+ static AOTLinkedClassTable* get() {
+ return &_instance;
}
Array* boot() const { return _boot; }
diff --git a/src/hotspot/share/cds/aotMetaspace.cpp b/src/hotspot/share/cds/aotMetaspace.cpp
index b3f859fc4a8..f866461e7d4 100644
--- a/src/hotspot/share/cds/aotMetaspace.cpp
+++ b/src/hotspot/share/cds/aotMetaspace.cpp
@@ -501,7 +501,7 @@ void AOTMetaspace::serialize(SerializeClosure* soc) {
StringTable::serialize_shared_table_header(soc);
HeapShared::serialize_tables(soc);
SystemDictionaryShared::serialize_dictionary_headers(soc);
- AOTLinkedClassBulkLoader::serialize(soc, true);
+ AOTLinkedClassBulkLoader::serialize(soc);
FinalImageRecipes::serialize(soc);
TrainingData::serialize(soc);
InstanceMirrorKlass::serialize_offsets(soc);
@@ -720,6 +720,7 @@ void VM_PopulateDumpSharedSpace::doit() {
_map_info->set_cloned_vtables(CppVtables::vtables_serialized_base());
_map_info->header()->set_class_location_config(cl_config);
+ HeapShared::delete_tables_with_raw_oops();
CDSConfig::set_is_at_aot_safepoint(false);
}
@@ -1076,11 +1077,6 @@ bool AOTMetaspace::write_static_archive(ArchiveBuilder* builder, FileMapInfo* ma
return false;
}
builder->write_archive(map_info, heap_info);
-
- if (AllowArchivingWithJavaAgent) {
- aot_log_warning(aot)("This %s was created with AllowArchivingWithJavaAgent. It should be used "
- "for testing purposes only and should not be used in a production environment", CDSConfig::type_of_archive_being_loaded());
- }
return true;
}
@@ -2001,7 +1997,7 @@ void AOTMetaspace::initialize_shared_spaces() {
if (dynamic_mapinfo != nullptr) {
intptr_t* buffer = (intptr_t*)dynamic_mapinfo->serialized_data();
ReadClosure rc(&buffer, (intptr_t)SharedBaseAddress);
- ArchiveBuilder::serialize_dynamic_archivable_items(&rc);
+ DynamicArchive::serialize(&rc);
DynamicArchive::setup_array_klasses();
}
diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp
index 77f51443bb2..41a1d3d3c6d 100644
--- a/src/hotspot/share/cds/archiveBuilder.cpp
+++ b/src/hotspot/share/cds/archiveBuilder.cpp
@@ -24,7 +24,6 @@
#include "cds/aotArtifactFinder.hpp"
#include "cds/aotClassLinker.hpp"
-#include "cds/aotLinkedClassBulkLoader.hpp"
#include "cds/aotLogging.hpp"
#include "cds/aotMapLogger.hpp"
#include "cds/aotMetaspace.hpp"
@@ -1015,13 +1014,6 @@ void ArchiveBuilder::make_training_data_shareable() {
_src_obj_table.iterate_all(clean_td);
}
-void ArchiveBuilder::serialize_dynamic_archivable_items(SerializeClosure* soc) {
- SymbolTable::serialize_shared_table_header(soc, false);
- SystemDictionaryShared::serialize_dictionary_headers(soc, false);
- DynamicArchive::serialize_array_klasses(soc);
- AOTLinkedClassBulkLoader::serialize(soc, false);
-}
-
uintx ArchiveBuilder::buffer_to_offset(address p) const {
address requested_p = to_requested(p);
assert(requested_p >= _requested_static_archive_bottom, "must be");
diff --git a/src/hotspot/share/cds/archiveBuilder.hpp b/src/hotspot/share/cds/archiveBuilder.hpp
index 170e61beba8..815a6f07273 100644
--- a/src/hotspot/share/cds/archiveBuilder.hpp
+++ b/src/hotspot/share/cds/archiveBuilder.hpp
@@ -382,7 +382,6 @@ public:
bool gather_klass_and_symbol(MetaspaceClosure::Ref* ref, bool read_only);
bool gather_one_source_obj(MetaspaceClosure::Ref* ref, bool read_only);
void remember_embedded_pointer_in_enclosing_obj(MetaspaceClosure::Ref* ref);
- static void serialize_dynamic_archivable_items(SerializeClosure* soc);
DumpRegion* pz_region() { return &_pz_region; }
DumpRegion* rw_region() { return &_rw_region; }
diff --git a/src/hotspot/share/cds/archiveHeapWriter.cpp b/src/hotspot/share/cds/archiveHeapWriter.cpp
index c7750c70f1b..d1a8772874a 100644
--- a/src/hotspot/share/cds/archiveHeapWriter.cpp
+++ b/src/hotspot/share/cds/archiveHeapWriter.cpp
@@ -95,6 +95,11 @@ void ArchiveHeapWriter::init() {
}
}
+void ArchiveHeapWriter::delete_tables_with_raw_oops() {
+ delete _source_objs;
+ _source_objs = nullptr;
+}
+
void ArchiveHeapWriter::add_source_obj(oop src_obj) {
_source_objs->append(src_obj);
}
@@ -145,7 +150,7 @@ oop ArchiveHeapWriter::requested_obj_from_buffer_offset(size_t offset) {
oop ArchiveHeapWriter::source_obj_to_requested_obj(oop src_obj) {
assert(CDSConfig::is_dumping_heap(), "dump-time only");
- HeapShared::CachedOopInfo* p = HeapShared::archived_object_cache()->get(src_obj);
+ HeapShared::CachedOopInfo* p = HeapShared::get_cached_oop_info(src_obj);
if (p != nullptr) {
return requested_obj_from_buffer_offset(p->buffer_offset());
} else {
@@ -154,9 +159,9 @@ oop ArchiveHeapWriter::source_obj_to_requested_obj(oop src_obj) {
}
oop ArchiveHeapWriter::buffered_addr_to_source_obj(address buffered_addr) {
- oop* p = _buffer_offset_to_source_obj_table->get(buffered_address_to_offset(buffered_addr));
- if (p != nullptr) {
- return *p;
+ OopHandle* oh = _buffer_offset_to_source_obj_table->get(buffered_address_to_offset(buffered_addr));
+ if (oh != nullptr) {
+ return oh->resolve();
} else {
return nullptr;
}
@@ -356,12 +361,13 @@ void ArchiveHeapWriter::copy_source_objs_to_buffer(GrowableArrayCHeaplength(); i++) {
int src_obj_index = _source_objs_order->at(i)._index;
oop src_obj = _source_objs->at(src_obj_index);
- HeapShared::CachedOopInfo* info = HeapShared::archived_object_cache()->get(src_obj);
+ HeapShared::CachedOopInfo* info = HeapShared::get_cached_oop_info(src_obj);
assert(info != nullptr, "must be");
size_t buffer_offset = copy_one_source_obj_to_buffer(src_obj);
info->set_buffer_offset(buffer_offset);
- _buffer_offset_to_source_obj_table->put_when_absent(buffer_offset, src_obj);
+ OopHandle handle(Universe::vm_global(), src_obj);
+ _buffer_offset_to_source_obj_table->put_when_absent(buffer_offset, handle);
_buffer_offset_to_source_obj_table->maybe_grow();
if (java_lang_Module::is_instance(src_obj)) {
@@ -696,7 +702,7 @@ void ArchiveHeapWriter::relocate_embedded_oops(GrowableArrayCHeaplength(); i++) {
int src_obj_index = _source_objs_order->at(i)._index;
oop src_obj = _source_objs->at(src_obj_index);
- HeapShared::CachedOopInfo* info = HeapShared::archived_object_cache()->get(src_obj);
+ HeapShared::CachedOopInfo* info = HeapShared::get_cached_oop_info(src_obj);
assert(info != nullptr, "must be");
oop requested_obj = requested_obj_from_buffer_offset(info->buffer_offset());
update_header_for_requested_obj(requested_obj, src_obj, src_obj->klass());
@@ -758,7 +764,7 @@ void ArchiveHeapWriter::compute_ptrmap(ArchiveHeapInfo* heap_info) {
NativePointerInfo info = _native_pointers->at(i);
oop src_obj = info._src_obj;
int field_offset = info._field_offset;
- HeapShared::CachedOopInfo* p = HeapShared::archived_object_cache()->get(src_obj);
+ HeapShared::CachedOopInfo* p = HeapShared::get_cached_oop_info(src_obj);
// requested_field_addr = the address of this field in the requested space
oop requested_obj = requested_obj_from_buffer_offset(p->buffer_offset());
Metadata** requested_field_addr = (Metadata**)(cast_from_oop(requested_obj) + field_offset);
diff --git a/src/hotspot/share/cds/archiveHeapWriter.hpp b/src/hotspot/share/cds/archiveHeapWriter.hpp
index 18e647912f1..80e72c12e7e 100644
--- a/src/hotspot/share/cds/archiveHeapWriter.hpp
+++ b/src/hotspot/share/cds/archiveHeapWriter.hpp
@@ -152,7 +152,7 @@ private:
};
static GrowableArrayCHeap* _source_objs_order;
- typedef ResizeableHashTable BufferOffsetToSourceObjectTable;
static BufferOffsetToSourceObjectTable* _buffer_offset_to_source_obj_table;
@@ -227,6 +227,7 @@ private:
public:
static void init() NOT_CDS_JAVA_HEAP_RETURN;
+ static void delete_tables_with_raw_oops();
static void add_source_obj(oop src_obj);
static bool is_too_large_to_archive(size_t size);
static bool is_too_large_to_archive(oop obj);
diff --git a/src/hotspot/share/cds/cdsConfig.cpp b/src/hotspot/share/cds/cdsConfig.cpp
index 0505ae20a78..5bb46deb9bc 100644
--- a/src/hotspot/share/cds/cdsConfig.cpp
+++ b/src/hotspot/share/cds/cdsConfig.cpp
@@ -470,10 +470,6 @@ void CDSConfig::check_aot_flags() {
assert(strcmp(AOTMode, "create") == 0, "checked by AOTModeConstraintFunc");
check_aotmode_create();
}
-
- // This is an old flag used by CDS regression testing only. It doesn't apply
- // to the AOT workflow.
- FLAG_SET_ERGO(AllowArchivingWithJavaAgent, false);
}
void CDSConfig::check_aotmode_off() {
@@ -716,13 +712,6 @@ bool CDSConfig::check_vm_args_consistency(bool patch_mod_javabase, bool mode_fla
}
}
- if (is_dumping_classic_static_archive() && AOTClassLinking) {
- if (JvmtiAgentList::disable_agent_list()) {
- FLAG_SET_ERGO(AllowArchivingWithJavaAgent, false);
- log_warning(cds)("Disabled all JVMTI agents with -Xshare:dump -XX:+AOTClassLinking");
- }
- }
-
return true;
}
@@ -756,6 +745,13 @@ void CDSConfig::setup_compiler_args() {
void CDSConfig::prepare_for_dumping() {
assert(CDSConfig::is_dumping_archive(), "sanity");
+ if (is_dumping_dynamic_archive() && AOTClassLinking) {
+ if (FLAG_IS_CMDLINE(AOTClassLinking)) {
+ log_warning(cds)("AOTClassLinking is not supported for dynamic CDS archive");
+ }
+ FLAG_SET_ERGO(AOTClassLinking, false);
+ }
+
if (is_dumping_dynamic_archive() && !is_using_archive()) {
assert(!is_dumping_static_archive(), "cannot be dumping both static and dynamic archives");
@@ -1014,11 +1010,10 @@ void CDSConfig::stop_using_full_module_graph(const char* reason) {
}
bool CDSConfig::is_dumping_aot_linked_classes() {
- if (is_dumping_preimage_static_archive()) {
- return false;
- } else if (is_dumping_dynamic_archive()) {
- return is_using_full_module_graph() && AOTClassLinking;
- } else if (is_dumping_static_archive()) {
+ if (is_dumping_classic_static_archive() || is_dumping_final_static_archive()) {
+ // FMG is required to guarantee that all cached boot/platform/app classes
+ // are visible in the production run, so they can be unconditionally
+ // loaded during VM bootstrap.
return is_dumping_full_module_graph() && AOTClassLinking;
} else {
return false;
diff --git a/src/hotspot/share/cds/cdsHeapVerifier.cpp b/src/hotspot/share/cds/cdsHeapVerifier.cpp
index a9f46c21ad3..9429a0d8264 100644
--- a/src/hotspot/share/cds/cdsHeapVerifier.cpp
+++ b/src/hotspot/share/cds/cdsHeapVerifier.cpp
@@ -36,6 +36,7 @@
#include "oops/fieldStreams.inline.hpp"
#include "oops/klass.inline.hpp"
#include "oops/oop.inline.hpp"
+#include "oops/oopHandle.inline.hpp"
#include "runtime/fieldDescriptor.inline.hpp"
#if INCLUDE_CDS_JAVA_HEAP
@@ -273,7 +274,8 @@ void CDSHeapVerifier::add_static_obj_field(InstanceKlass* ik, oop field, Symbol*
// This function is called once for every archived heap object. Warn if this object is referenced by
// a static field of a class that's not aot-initialized.
-inline bool CDSHeapVerifier::do_entry(oop& orig_obj, HeapShared::CachedOopInfo& value) {
+inline bool CDSHeapVerifier::do_entry(OopHandle& orig_obj_handle, HeapShared::CachedOopInfo& value) {
+ oop orig_obj = orig_obj_handle.resolve();
_archived_objs++;
if (java_lang_String::is_instance(orig_obj) && HeapShared::is_dumped_interned_string(orig_obj)) {
@@ -323,7 +325,7 @@ public:
// Call this function (from gdb, etc) if you want to know why an object is archived.
void CDSHeapVerifier::trace_to_root(outputStream* st, oop orig_obj) {
- HeapShared::CachedOopInfo* info = HeapShared::archived_object_cache()->get(orig_obj);
+ HeapShared::CachedOopInfo* info = HeapShared::get_cached_oop_info(orig_obj);
if (info != nullptr) {
trace_to_root(st, orig_obj, nullptr, info);
} else {
@@ -357,7 +359,7 @@ const char* static_field_name(oop mirror, oop field) {
int CDSHeapVerifier::trace_to_root(outputStream* st, oop orig_obj, oop orig_field, HeapShared::CachedOopInfo* info) {
int level = 0;
if (info->orig_referrer() != nullptr) {
- HeapShared::CachedOopInfo* ref = HeapShared::archived_object_cache()->get(info->orig_referrer());
+ HeapShared::CachedOopInfo* ref = HeapShared::get_cached_oop_info(info->orig_referrer());
assert(ref != nullptr, "sanity");
level = trace_to_root(st, info->orig_referrer(), orig_obj, ref) + 1;
} else if (java_lang_String::is_instance(orig_obj)) {
diff --git a/src/hotspot/share/cds/cdsHeapVerifier.hpp b/src/hotspot/share/cds/cdsHeapVerifier.hpp
index 811751e8ca2..1cc03975c5c 100644
--- a/src/hotspot/share/cds/cdsHeapVerifier.hpp
+++ b/src/hotspot/share/cds/cdsHeapVerifier.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
#include "cds/heapShared.hpp"
#include "memory/iterator.hpp"
+#include "oops/oopHandle.hpp"
#include "utilities/growableArray.hpp"
#include "utilities/hashTable.hpp"
@@ -80,7 +81,7 @@ public:
virtual void do_klass(Klass* k);
// For HashTable::iterate()
- inline bool do_entry(oop& orig_obj, HeapShared::CachedOopInfo& value);
+ inline bool do_entry(OopHandle& orig_obj, HeapShared::CachedOopInfo& value);
static void verify();
diff --git a/src/hotspot/share/cds/cds_globals.hpp b/src/hotspot/share/cds/cds_globals.hpp
index f4094aec1ac..3e3062097f9 100644
--- a/src/hotspot/share/cds/cds_globals.hpp
+++ b/src/hotspot/share/cds/cds_globals.hpp
@@ -63,10 +63,6 @@
"Average number of symbols per bucket in shared table") \
range(2, 246) \
\
- product(bool, AllowArchivingWithJavaAgent, false, DIAGNOSTIC, \
- "Allow Java agent to be run with CDS dumping (not applicable" \
- " to AOT") \
- \
develop(ccstr, ArchiveHeapTestClass, nullptr, \
"For JVM internal testing only. The static field named " \
"\"archivedObjects\" of the specified class is stored in the " \
diff --git a/src/hotspot/share/cds/dynamicArchive.cpp b/src/hotspot/share/cds/dynamicArchive.cpp
index 58b354b9240..dd24f1e0c51 100644
--- a/src/hotspot/share/cds/dynamicArchive.cpp
+++ b/src/hotspot/share/cds/dynamicArchive.cpp
@@ -160,11 +160,10 @@ public:
SystemDictionaryShared::write_to_archive(false);
cl_config = AOTClassLocationConfig::dumptime()->write_to_archive();
DynamicArchive::dump_array_klasses();
- AOTClassLinker::write_to_archive();
serialized_data = ro_region()->top();
WriteClosure wc(ro_region());
- ArchiveBuilder::serialize_dynamic_archivable_items(&wc);
+ DynamicArchive::serialize(&wc);
}
if (CDSConfig::is_dumping_lambdas_in_legacy_mode()) {
@@ -396,11 +395,6 @@ public:
VMOp_Type type() const { return VMOp_PopulateDumpSharedSpace; }
void doit() {
ResourceMark rm;
- if (AllowArchivingWithJavaAgent) {
- aot_log_warning(aot)("This %s was created with AllowArchivingWithJavaAgent. It should be used "
- "for testing purposes only and should not be used in a production environment",
- CDSConfig::type_of_archive_being_loaded());
- }
AOTClassLocationConfig::dumptime_check_nonempty_dirs();
_builder.doit();
}
@@ -414,6 +408,12 @@ public:
GrowableArray* DynamicArchive::_array_klasses = nullptr;
Array* DynamicArchive::_dynamic_archive_array_klasses = nullptr;
+void DynamicArchive::serialize(SerializeClosure* soc) {
+ SymbolTable::serialize_shared_table_header(soc, false);
+ SystemDictionaryShared::serialize_dictionary_headers(soc, false);
+ soc->do_ptr(&_dynamic_archive_array_klasses);
+}
+
void DynamicArchive::append_array_klass(ObjArrayKlass* ak) {
if (_array_klasses == nullptr) {
_array_klasses = new (mtClassShared) GrowableArray(50, mtClassShared);
@@ -456,10 +456,6 @@ void DynamicArchive::setup_array_klasses() {
}
}
-void DynamicArchive::serialize_array_klasses(SerializeClosure* soc) {
- soc->do_ptr(&_dynamic_archive_array_klasses);
-}
-
void DynamicArchive::make_array_klasses_shareable() {
if (_array_klasses != nullptr) {
int num_array_klasses = _array_klasses->length();
diff --git a/src/hotspot/share/cds/dynamicArchive.hpp b/src/hotspot/share/cds/dynamicArchive.hpp
index 19086053d76..c42c4b7dfde 100644
--- a/src/hotspot/share/cds/dynamicArchive.hpp
+++ b/src/hotspot/share/cds/dynamicArchive.hpp
@@ -71,7 +71,7 @@ public:
static void dump_array_klasses();
static void setup_array_klasses();
static void append_array_klass(ObjArrayKlass* oak);
- static void serialize_array_klasses(SerializeClosure* soc);
+ static void serialize(SerializeClosure* soc);
static void make_array_klasses_shareable();
static void post_dump();
static int num_array_klasses();
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index 409052eae6a..8c2175622e9 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -259,7 +259,6 @@ void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment,
_has_platform_or_app_classes = AOTClassLocationConfig::dumptime()->has_platform_or_app_classes();
_requested_base_address = (char*)SharedBaseAddress;
_mapped_base_address = (char*)SharedBaseAddress;
- _allow_archiving_with_java_agent = AllowArchivingWithJavaAgent;
}
void FileMapHeader::copy_base_archive_name(const char* archive) {
@@ -316,7 +315,6 @@ void FileMapHeader::print(outputStream* st) {
st->print_cr("- _heap_ptrmap_start_pos: %zu", _heap_ptrmap_start_pos);
st->print_cr("- _rw_ptrmap_start_pos: %zu", _rw_ptrmap_start_pos);
st->print_cr("- _ro_ptrmap_start_pos: %zu", _ro_ptrmap_start_pos);
- st->print_cr("- allow_archiving_with_java_agent:%d", _allow_archiving_with_java_agent);
st->print_cr("- use_optimized_module_handling: %d", _use_optimized_module_handling);
st->print_cr("- has_full_module_graph %d", _has_full_module_graph);
st->print_cr("- has_aot_linked_classes %d", _has_aot_linked_classes);
@@ -2051,21 +2049,6 @@ bool FileMapHeader::validate() {
_has_platform_or_app_classes = false;
}
- // Java agents are allowed during run time. Therefore, the following condition is not
- // checked: (!_allow_archiving_with_java_agent && AllowArchivingWithJavaAgent)
- // Note: _allow_archiving_with_java_agent is set in the shared archive during dump time
- // while AllowArchivingWithJavaAgent is set during the current run.
- if (_allow_archiving_with_java_agent && !AllowArchivingWithJavaAgent) {
- AOTMetaspace::report_loading_error("The setting of the AllowArchivingWithJavaAgent is different "
- "from the setting in the %s.", file_type);
- return false;
- }
-
- if (_allow_archiving_with_java_agent) {
- aot_log_warning(aot)("This %s was created with AllowArchivingWithJavaAgent. It should be used "
- "for testing purposes only and should not be used in a production environment", file_type);
- }
-
aot_log_info(aot)("The %s was created with UseCompressedOops = %d, UseCompressedClassPointers = %d, UseCompactObjectHeaders = %d",
file_type, compressed_oops(), compressed_class_pointers(), compact_headers());
if (compressed_oops() != UseCompressedOops || compressed_class_pointers() != UseCompressedClassPointers) {
diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp
index b40e793a0fd..a58271eefc7 100644
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -135,7 +135,6 @@ private:
char* _requested_base_address; // Archive relocation is not necessary if we map with this base address.
char* _mapped_base_address; // Actual base address where archive is mapped.
- bool _allow_archiving_with_java_agent; // setting of the AllowArchivingWithJavaAgent option
bool _use_optimized_module_handling;// No module-relation VM options were specified, so we can skip
// some expensive operations.
bool _has_aot_linked_classes; // Was the CDS archive created with -XX:+AOTClassLinking
diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp
index 6b7cffdf321..92f55ce5b33 100644
--- a/src/hotspot/share/cds/heapShared.cpp
+++ b/src/hotspot/share/cds/heapShared.cpp
@@ -58,6 +58,7 @@
#include "oops/fieldStreams.inline.hpp"
#include "oops/objArrayOop.inline.hpp"
#include "oops/oop.inline.hpp"
+#include "oops/oopHandle.inline.hpp"
#include "oops/typeArrayOop.inline.hpp"
#include "prims/jvmtiExport.hpp"
#include "runtime/arguments.hpp"
@@ -159,12 +160,35 @@ bool HeapShared::is_subgraph_root_class(InstanceKlass* ik) {
is_subgraph_root_class_of(fmg_archive_subgraph_entry_fields, ik);
}
+oop HeapShared::CachedOopInfo::orig_referrer() const {
+ return _orig_referrer.resolve();
+}
+
unsigned HeapShared::oop_hash(oop const& p) {
+ assert(SafepointSynchronize::is_at_safepoint() ||
+ JavaThread::current()->is_in_no_safepoint_scope(), "sanity");
// Do not call p->identity_hash() as that will update the
// object header.
return primitive_hash(cast_from_oop(p));
}
+unsigned int HeapShared::oop_handle_hash_raw(const OopHandle& oh) {
+ return oop_hash(oh.resolve());
+}
+
+unsigned int HeapShared::oop_handle_hash(const OopHandle& oh) {
+ oop o = oh.resolve();
+ if (o == nullptr) {
+ return 0;
+ } else {
+ return o->identity_hash();
+ }
+}
+
+bool HeapShared::oop_handle_equals(const OopHandle& a, const OopHandle& b) {
+ return a.resolve() == b.resolve();
+}
+
static void reset_states(oop obj, TRAPS) {
Handle h_obj(THREAD, obj);
InstanceKlass* klass = InstanceKlass::cast(obj->klass());
@@ -216,7 +240,8 @@ HeapShared::ArchivedObjectCache* HeapShared::_archived_object_cache = nullptr;
bool HeapShared::has_been_archived(oop obj) {
assert(CDSConfig::is_dumping_heap(), "dump-time only");
- return archived_object_cache()->get(obj) != nullptr;
+ OopHandle oh(&obj);
+ return archived_object_cache()->get(oh) != nullptr;
}
int HeapShared::append_root(oop obj) {
@@ -303,7 +328,9 @@ bool HeapShared::archive_object(oop obj, oop referrer, KlassSubGraphInfo* subgra
count_allocation(obj->size());
ArchiveHeapWriter::add_source_obj(obj);
CachedOopInfo info = make_cached_oop_info(obj, referrer);
- archived_object_cache()->put_when_absent(obj, info);
+
+ OopHandle oh(Universe::vm_global(), obj);
+ archived_object_cache()->put_when_absent(oh, info);
archived_object_cache()->maybe_grow();
mark_native_pointers(obj);
@@ -636,14 +663,16 @@ void HeapShared::mark_native_pointers(oop orig_obj) {
}
void HeapShared::get_pointer_info(oop src_obj, bool& has_oop_pointers, bool& has_native_pointers) {
- CachedOopInfo* info = archived_object_cache()->get(src_obj);
+ OopHandle oh(&src_obj);
+ CachedOopInfo* info = archived_object_cache()->get(oh);
assert(info != nullptr, "must be");
has_oop_pointers = info->has_oop_pointers();
has_native_pointers = info->has_native_pointers();
}
void HeapShared::set_has_native_pointers(oop src_obj) {
- CachedOopInfo* info = archived_object_cache()->get(src_obj);
+ OopHandle oh(&src_obj);
+ CachedOopInfo* info = archived_object_cache()->get(oh);
assert(info != nullptr, "must be");
info->set_has_native_pointers();
}
@@ -1453,7 +1482,7 @@ public:
HeapShared::CachedOopInfo HeapShared::make_cached_oop_info(oop obj, oop referrer) {
PointsToOopsChecker points_to_oops_checker;
obj->oop_iterate(&points_to_oops_checker);
- return CachedOopInfo(referrer, points_to_oops_checker.result());
+ return CachedOopInfo(OopHandle(Universe::vm_global(), referrer), points_to_oops_checker.result());
}
void HeapShared::init_box_classes(TRAPS) {
@@ -2096,6 +2125,18 @@ bool HeapShared::is_dumped_interned_string(oop o) {
return _dumped_interned_strings->get(o) != nullptr;
}
+// These tables should be used only within the CDS safepoint, so
+// delete them before we exit the safepoint. Otherwise the table will
+// contain bad oops after a GC.
+void HeapShared::delete_tables_with_raw_oops() {
+ assert(_seen_objects_table == nullptr, "should have been deleted");
+
+ delete _dumped_interned_strings;
+ _dumped_interned_strings = nullptr;
+
+ ArchiveHeapWriter::delete_tables_with_raw_oops();
+}
+
void HeapShared::debug_trace() {
ResourceMark rm;
oop referrer = _object_being_archived.referrer();
diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp
index 110cdef8796..c9a810a6c0b 100644
--- a/src/hotspot/share/cds/heapShared.hpp
+++ b/src/hotspot/share/cds/heapShared.hpp
@@ -167,6 +167,9 @@ private:
public:
static void debug_trace();
static unsigned oop_hash(oop const& p);
+ static unsigned oop_handle_hash(OopHandle const& oh);
+ static unsigned oop_handle_hash_raw(OopHandle const& oh);
+ static bool oop_handle_equals(const OopHandle& a, const OopHandle& b);
static unsigned string_oop_hash(oop const& string) {
return java_lang_String::hash_code(string);
}
@@ -175,7 +178,7 @@ public:
class CachedOopInfo {
// Used by CDSHeapVerifier.
- oop _orig_referrer;
+ OopHandle _orig_referrer;
// The location of this object inside ArchiveHeapWriter::_buffer
size_t _buffer_offset;
@@ -186,12 +189,12 @@ public:
// One or more fields in this object are pointing to MetaspaceObj
bool _has_native_pointers;
public:
- CachedOopInfo(oop orig_referrer, bool has_oop_pointers)
+ CachedOopInfo(OopHandle orig_referrer, bool has_oop_pointers)
: _orig_referrer(orig_referrer),
_buffer_offset(0),
_has_oop_pointers(has_oop_pointers),
_has_native_pointers(false) {}
- oop orig_referrer() const { return _orig_referrer; }
+ oop orig_referrer() const;
void set_buffer_offset(size_t offset) { _buffer_offset = offset; }
size_t buffer_offset() const { return _buffer_offset; }
bool has_oop_pointers() const { return _has_oop_pointers; }
@@ -202,10 +205,11 @@ public:
private:
static const int INITIAL_TABLE_SIZE = 15889; // prime number
static const int MAX_TABLE_SIZE = 1000000;
- typedef ResizeableHashTable ArchivedObjectCache;
+ HeapShared::oop_handle_hash_raw,
+ HeapShared::oop_handle_equals> ArchivedObjectCache;
static ArchivedObjectCache* _archived_object_cache;
class DumpTimeKlassSubGraphInfoTable
@@ -378,6 +382,11 @@ private:
return _archived_object_cache;
}
+ static CachedOopInfo* get_cached_oop_info(oop orig_obj) {
+ OopHandle oh(&orig_obj);
+ return _archived_object_cache->get(oh);
+ }
+
static int archive_exception_instance(oop exception);
static bool archive_reachable_objects_from(int level,
@@ -435,6 +444,7 @@ private:
CDS_JAVA_HEAP_ONLY(return (idx == AOTMetaspace::hp);)
NOT_CDS_JAVA_HEAP_RETURN_(false);
}
+ static void delete_tables_with_raw_oops() NOT_CDS_JAVA_HEAP_RETURN;
static void resolve_classes(JavaThread* current) NOT_CDS_JAVA_HEAP_RETURN;
static void initialize_from_archived_subgraph(JavaThread* current, Klass* k) NOT_CDS_JAVA_HEAP_RETURN;
diff --git a/src/hotspot/share/ci/ciClassList.hpp b/src/hotspot/share/ci/ciClassList.hpp
index 618a052765e..bce1e52e80b 100644
--- a/src/hotspot/share/ci/ciClassList.hpp
+++ b/src/hotspot/share/ci/ciClassList.hpp
@@ -80,6 +80,7 @@ friend class ciObjectFactory; \
// Any more access must be given explicitly.
#define CI_PACKAGE_ACCESS_TO \
friend class ciObjectFactory; \
+friend class VMStructs; \
friend class ciCallSite; \
friend class ciConstantPoolCache; \
friend class ciField; \
diff --git a/src/hotspot/share/ci/ciKlass.hpp b/src/hotspot/share/ci/ciKlass.hpp
index 37091471a2a..8d03b910de5 100644
--- a/src/hotspot/share/ci/ciKlass.hpp
+++ b/src/hotspot/share/ci/ciKlass.hpp
@@ -107,7 +107,7 @@ public:
bool is_in_encoding_range() {
Klass* k = get_Klass();
bool is_in_encoding_range = CompressedKlassPointers::is_encodable(k);
- assert(is_in_encoding_range || k->is_interface() || k->is_abstract(), "sanity");
+ assert(is_in_encoding_range, "sanity");
return is_in_encoding_range;
}
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index 852d23cbc2e..11633c8cb11 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -5929,15 +5929,6 @@ bool ClassFileParser::is_java_lang_ref_Reference_subclass() const {
return _super_klass->reference_type() != REF_NONE;
}
-// Returns true if the future Klass will need to be addressable with a narrow Klass ID.
-bool ClassFileParser::klass_needs_narrow_id() const {
- // Classes that are never instantiated need no narrow Klass Id, since the
- // only point of having a narrow id is to put it into an object header. Keeping
- // never instantiated classes out of class space lessens the class space pressure.
- // For more details, see JDK-8338526.
- return !is_interface() && !is_abstract();
-}
-
// ----------------------------------------------------------------------------
// debugging
diff --git a/src/hotspot/share/classfile/classFileParser.hpp b/src/hotspot/share/classfile/classFileParser.hpp
index 52e966f6260..5d4236132f1 100644
--- a/src/hotspot/share/classfile/classFileParser.hpp
+++ b/src/hotspot/share/classfile/classFileParser.hpp
@@ -515,11 +515,6 @@ class ClassFileParser {
bool is_hidden() const { return _is_hidden; }
bool is_interface() const { return _access_flags.is_interface(); }
- bool is_abstract() const { return _access_flags.is_abstract(); }
-
- // Returns true if the Klass to be generated will need to be addressable
- // with a narrow Klass ID.
- bool klass_needs_narrow_id() const;
ClassLoaderData* loader_data() const { return _loader_data; }
const Symbol* class_name() const { return _class_name; }
diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp
index 1f2eb6d25cc..3c7f6f8130e 100644
--- a/src/hotspot/share/classfile/classLoader.cpp
+++ b/src/hotspot/share/classfile/classLoader.cpp
@@ -1306,24 +1306,6 @@ void ClassLoader::record_result_for_builtin_loader(s2 classpath_index, InstanceK
AOTClassLocationConfig::dumptime_update_max_used_index(classpath_index);
result->set_shared_classpath_index(classpath_index);
-
-#if INCLUDE_CDS_JAVA_HEAP
- if (CDSConfig::is_dumping_heap() && AllowArchivingWithJavaAgent && result->defined_by_boot_loader() &&
- classpath_index < 0 && redefined) {
- // When dumping the heap (which happens only during static dump), classes for the built-in
- // loaders are always loaded from known locations (jimage, classpath or modulepath),
- // so classpath_index should always be >= 0.
- // The only exception is when a java agent is used during dump time (for testing
- // purposes only). If a class is transformed by the agent, the AOTClassLocation of
- // this class may point to an unknown location. This may break heap object archiving,
- // which requires all the boot classes to be from known locations. This is an
- // uncommon scenario (even in test cases). Let's simply disable heap object archiving.
- ResourceMark rm;
- log_warning(aot)("heap objects cannot be written because class %s maybe modified by ClassFileLoadHook.",
- result->external_name());
- CDSConfig::disable_heap_dumping();
- }
-#endif // INCLUDE_CDS_JAVA_HEAP
}
void ClassLoader::record_hidden_class(InstanceKlass* ik) {
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index da093936ce5..60a63892518 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -79,7 +79,7 @@
#include "runtime/javaCalls.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/jniHandles.inline.hpp"
-#include "runtime/reflectionUtils.hpp"
+#include "runtime/reflection.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointVerifiers.hpp"
#include "runtime/threadSMR.hpp"
@@ -3741,20 +3741,17 @@ oop java_lang_reflect_RecordComponent::create(InstanceKlass* holder, RecordCompo
return element();
}
-int reflect_ConstantPool::_oop_offset;
-
-#define CONSTANTPOOL_FIELDS_DO(macro) \
- macro(_oop_offset, k, "constantPoolOop", object_signature, false)
+int reflect_ConstantPool::_vmholder_offset;
void reflect_ConstantPool::compute_offsets() {
InstanceKlass* k = vmClasses::reflect_ConstantPool_klass();
- // The field is called ConstantPool* in the sun.reflect.ConstantPool class.
- CONSTANTPOOL_FIELDS_DO(FIELD_COMPUTE_OFFSET);
+ // The field is injected and called Object vmholder in the jdk.internal.reflect.ConstantPool class.
+ CONSTANTPOOL_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);
}
#if INCLUDE_CDS
void reflect_ConstantPool::serialize_offsets(SerializeClosure* f) {
- CONSTANTPOOL_FIELDS_DO(FIELD_SERIALIZE_OFFSET);
+ CONSTANTPOOL_INJECTED_FIELDS(INJECTED_FIELD_SERIALIZE_OFFSET);
}
#endif
@@ -3907,13 +3904,15 @@ Handle reflect_ConstantPool::create(TRAPS) {
void reflect_ConstantPool::set_cp(oop reflect, ConstantPool* value) {
+ assert(_vmholder_offset != 0, "Uninitialized vmholder");
oop mirror = value->pool_holder()->java_mirror();
// Save the mirror to get back the constant pool.
- reflect->obj_field_put(_oop_offset, mirror);
+ reflect->obj_field_put(_vmholder_offset, mirror);
}
ConstantPool* reflect_ConstantPool::get_cp(oop reflect) {
- oop mirror = reflect->obj_field(_oop_offset);
+ assert(_vmholder_offset != 0, "Uninitialized vmholder");
+ oop mirror = reflect->obj_field(_vmholder_offset);
InstanceKlass* ik = java_lang_Class::as_InstanceKlass(mirror);
// Get the constant pool back from the klass. Since class redefinition
@@ -5554,5 +5553,4 @@ int InjectedField::compute_offset() {
void javaClasses_init() {
JavaClasses::compute_offsets();
JavaClasses::check_offsets();
- FilteredFieldsMap::initialize(); // must be done after computing offsets.
}
diff --git a/src/hotspot/share/classfile/javaClasses.hpp b/src/hotspot/share/classfile/javaClasses.hpp
index 6f82ca10fd6..b137f1a8035 100644
--- a/src/hotspot/share/classfile/javaClasses.hpp
+++ b/src/hotspot/share/classfile/javaClasses.hpp
@@ -936,12 +936,16 @@ class java_lang_Module {
friend class JavaClasses;
};
+#define CONSTANTPOOL_INJECTED_FIELDS(macro) \
+ macro(reflect_ConstantPool, vmholder, object_signature, false)
+
// Interface to jdk.internal.reflect.ConstantPool objects
class reflect_ConstantPool {
private:
// Note that to reduce dependencies on the JDK we compute these
- // offsets at run-time.
- static int _oop_offset;
+ // offsets at run-time. This field is the oop offset for the
+ // actual constant pool, previously called constantPoolOop.
+ static int _vmholder_offset;
static void compute_offsets();
@@ -953,7 +957,6 @@ class reflect_ConstantPool {
// Accessors
static void set_cp(oop reflect, ConstantPool* value);
- static int oop_offset() { CHECK_INIT(_oop_offset); }
static ConstantPool* get_cp(oop reflect);
diff --git a/src/hotspot/share/classfile/javaClassesImpl.hpp b/src/hotspot/share/classfile/javaClassesImpl.hpp
index b450a4e3cc4..5f88f708523 100644
--- a/src/hotspot/share/classfile/javaClassesImpl.hpp
+++ b/src/hotspot/share/classfile/javaClassesImpl.hpp
@@ -42,7 +42,8 @@
THREAD_INJECTED_FIELDS(macro) \
VTHREAD_INJECTED_FIELDS(macro) \
INTERNALERROR_INJECTED_FIELDS(macro) \
- STACKCHUNK_INJECTED_FIELDS(macro)
+ STACKCHUNK_INJECTED_FIELDS(macro) \
+ CONSTANTPOOL_INJECTED_FIELDS(macro)
#define INJECTED_FIELD_COMPUTE_OFFSET(klass, name, signature, may_be_java) \
klass::_##name##_offset = JavaClasses::compute_injected_offset(InjectedFieldID::klass##_##name##_enum);
diff --git a/src/hotspot/share/classfile/systemDictionaryShared.cpp b/src/hotspot/share/classfile/systemDictionaryShared.cpp
index eda823704ca..04c2d7ffb84 100644
--- a/src/hotspot/share/classfile/systemDictionaryShared.cpp
+++ b/src/hotspot/share/classfile/systemDictionaryShared.cpp
@@ -89,7 +89,7 @@ DEBUG_ONLY(bool SystemDictionaryShared::_class_loading_may_happen = true;)
#ifdef ASSERT
static void check_klass_after_loading(const Klass* k) {
#ifdef _LP64
- if (k != nullptr && UseCompressedClassPointers && k->needs_narrow_id()) {
+ if (k != nullptr && UseCompressedClassPointers) {
CompressedKlassPointers::check_encodable(k);
}
#endif
@@ -867,11 +867,6 @@ bool SystemDictionaryShared::should_be_excluded(Klass* k) {
} else {
InstanceKlass* ik = InstanceKlass::cast(k);
- if (CDSConfig::is_dumping_dynamic_archive() && ik->in_aot_cache()) {
- // ik is already part of the static archive, so it will never be considered as excluded.
- return false;
- }
-
if (!SafepointSynchronize::is_at_safepoint()) {
if (!ik->is_linked()) {
// should_be_excluded_impl() below doesn't link unlinked classes. We come
diff --git a/src/hotspot/share/code/aotCodeCache.cpp b/src/hotspot/share/code/aotCodeCache.cpp
index a24bae03137..04776f4c16c 100644
--- a/src/hotspot/share/code/aotCodeCache.cpp
+++ b/src/hotspot/share/code/aotCodeCache.cpp
@@ -1365,7 +1365,6 @@ void AOTCodeAddressTable::init_extrs() {
#endif // COMPILER2
#if INCLUDE_G1GC
- SET_ADDRESS(_extrs, G1BarrierSetRuntime::write_ref_field_post_entry);
SET_ADDRESS(_extrs, G1BarrierSetRuntime::write_ref_field_pre_entry);
#endif
#if INCLUDE_SHENANDOAHGC
diff --git a/src/hotspot/share/compiler/compilationPolicy.cpp b/src/hotspot/share/compiler/compilationPolicy.cpp
index 36b597b6e37..c91d299510d 100644
--- a/src/hotspot/share/compiler/compilationPolicy.cpp
+++ b/src/hotspot/share/compiler/compilationPolicy.cpp
@@ -1350,17 +1350,24 @@ CompLevel CompilationPolicy::standard_transition(const methodHandle& method, Com
return next_level;
}
+template static inline bool apply_predicate(const methodHandle& method, CompLevel cur_level, int i, int b, bool delay_profiling, double delay_profiling_scale) {
+ if (delay_profiling) {
+ return Predicate::apply_scaled(method, cur_level, i, b, delay_profiling_scale);
+ } else {
+ return Predicate::apply(method, cur_level, i, b);
+ }
+}
+
template
CompLevel CompilationPolicy::transition_from_none(const methodHandle& method, CompLevel cur_level, bool delay_profiling, bool disable_feedback) {
precond(cur_level == CompLevel_none);
CompLevel next_level = cur_level;
int i = method->invocation_count();
int b = method->backedge_count();
- double scale = delay_profiling ? Tier0ProfileDelayFactor : 1.0;
// If we were at full profile level, would we switch to full opt?
if (transition_from_full_profile(method, CompLevel_full_profile) == CompLevel_full_optimization) {
next_level = CompLevel_full_optimization;
- } else if (!CompilationModeFlag::disable_intermediate() && Predicate::apply_scaled(method, cur_level, i, b, scale)) {
+ } else if (!CompilationModeFlag::disable_intermediate() && apply_predicate(method, cur_level, i, b, delay_profiling, Tier0ProfileDelayFactor)) {
// C1-generated fully profiled code is about 30% slower than the limited profile
// code that has only invocation and backedge counters. The observation is that
// if C2 queue is large enough we can spend too much time in the fully profiled code
@@ -1402,13 +1409,12 @@ CompLevel CompilationPolicy::transition_from_limited_profile(const methodHandle&
CompLevel next_level = cur_level;
int i = method->invocation_count();
int b = method->backedge_count();
- double scale = delay_profiling ? Tier2ProfileDelayFactor : 1.0;
MethodData* mdo = method->method_data();
if (mdo != nullptr) {
if (mdo->would_profile()) {
if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
- Predicate::apply_scaled(method, cur_level, i, b, scale))) {
+ apply_predicate(method, cur_level, i, b, delay_profiling, Tier2ProfileDelayFactor))) {
next_level = CompLevel_full_profile;
}
} else {
@@ -1418,7 +1424,7 @@ CompLevel CompilationPolicy::transition_from_limited_profile(const methodHandle&
// If there is no MDO we need to profile
if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
- Predicate::apply_scaled(method, cur_level, i, b, scale))) {
+ apply_predicate(method, cur_level, i, b, delay_profiling, Tier2ProfileDelayFactor))) {
next_level = CompLevel_full_profile;
}
}
diff --git a/src/hotspot/share/compiler/compileBroker.cpp b/src/hotspot/share/compiler/compileBroker.cpp
index d1e3154bbd9..226a6d3ad5c 100644
--- a/src/hotspot/share/compiler/compileBroker.cpp
+++ b/src/hotspot/share/compiler/compileBroker.cpp
@@ -224,12 +224,15 @@ CompileTaskWrapper::CompileTaskWrapper(CompileTask* task) {
CompileTaskWrapper::~CompileTaskWrapper() {
CompilerThread* thread = CompilerThread::current();
+
+ // First, disarm the timeout. This still relies on the underlying task.
+ thread->timeout()->disarm();
+
CompileTask* task = thread->task();
CompileLog* log = thread->log();
if (log != nullptr && !task->is_unloaded()) task->log_task_done(log);
thread->set_task(nullptr);
thread->set_env(nullptr);
- thread->timeout()->disarm();
if (task->is_blocking()) {
bool free_task = false;
{
@@ -2346,6 +2349,7 @@ void CompileBroker::invoke_compiler_on_method(CompileTask* task) {
while (repeat_compilation_count > 0) {
ResourceMark rm(thread);
task->print_ul("NO CODE INSTALLED");
+ thread->timeout()->reset();
comp->compile_method(&ci_env, target, osr_bci, false, directive);
repeat_compilation_count--;
}
diff --git a/src/hotspot/share/compiler/compilerThread.hpp b/src/hotspot/share/compiler/compilerThread.hpp
index e4641780a12..e5b14560872 100644
--- a/src/hotspot/share/compiler/compilerThread.hpp
+++ b/src/hotspot/share/compiler/compilerThread.hpp
@@ -51,6 +51,7 @@ class CompilerThreadTimeoutGeneric : public CHeapObj {
CompilerThreadTimeoutGeneric() {};
void arm() {};
void disarm() {};
+ void reset() {};
bool init_timeout() { return true; };
};
#endif // !LINUX
diff --git a/src/hotspot/share/gc/epsilon/epsilonHeap.hpp b/src/hotspot/share/gc/epsilon/epsilonHeap.hpp
index f8aa9d7dbf1..e23e24a5afc 100644
--- a/src/hotspot/share/gc/epsilon/epsilonHeap.hpp
+++ b/src/hotspot/share/gc/epsilon/epsilonHeap.hpp
@@ -29,7 +29,6 @@
#include "gc/epsilon/epsilonBarrierSet.hpp"
#include "gc/epsilon/epsilonMonitoringSupport.hpp"
#include "gc/shared/collectedHeap.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "gc/shared/space.hpp"
#include "memory/virtualspace.hpp"
#include "services/memoryManager.hpp"
diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
index 425be474602..51c8a53b54a 100644
--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
+++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
@@ -23,12 +23,15 @@
*/
#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
+#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#ifdef ASSERT
@@ -42,11 +45,6 @@ void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
bs->gen_pre_barrier_stub(ce, this);
}
-void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
- G1BarrierSetAssembler* bs = (G1BarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
- bs->gen_post_barrier_stub(ce, this);
-}
-
void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
LIR_Opr pre_val, CodeEmitInfo* info) {
LIRGenerator* gen = access.gen();
@@ -114,6 +112,87 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
__ branch_destination(slow->continuation());
}
+class LIR_OpG1PostBarrier : public LIR_Op {
+ friend class LIR_OpVisitState;
+
+private:
+ LIR_Opr _addr;
+ LIR_Opr _new_val;
+ LIR_Opr _thread;
+ LIR_Opr _tmp1;
+ LIR_Opr _tmp2;
+
+public:
+ LIR_OpG1PostBarrier(LIR_Opr addr,
+ LIR_Opr new_val,
+ LIR_Opr thread,
+ LIR_Opr tmp1,
+ LIR_Opr tmp2)
+ : LIR_Op(lir_none, lir_none, nullptr),
+ _addr(addr),
+ _new_val(new_val),
+ _thread(thread),
+ _tmp1(tmp1),
+ _tmp2(tmp2)
+ {}
+
+ virtual void visit(LIR_OpVisitState* state) {
+ state->do_input(_addr);
+ state->do_input(_new_val);
+ state->do_input(_thread);
+
+ // Use temps to enforce different registers.
+ state->do_temp(_addr);
+ state->do_temp(_new_val);
+ state->do_temp(_thread);
+ state->do_temp(_tmp1);
+ state->do_temp(_tmp2);
+
+ if (_info != nullptr) {
+ state->do_info(_info);
+ }
+ }
+
+ virtual void emit_code(LIR_Assembler* ce) {
+ if (_info != nullptr) {
+ ce->add_debug_info_for_null_check_here(_info);
+ }
+
+ Register addr = _addr->as_pointer_register();
+ Register new_val = _new_val->as_pointer_register();
+ Register thread = _thread->as_pointer_register();
+ Register tmp1 = _tmp1->as_pointer_register();
+ Register tmp2 = _tmp2->as_pointer_register();
+
+ // This may happen for a store of x.a = x - we do not need a post barrier for those
+ // as the cross-region test will always exit early anyway.
+ // The post barrier implementations can assume that addr and new_val are different
+ // then.
+ if (addr == new_val) {
+ ce->masm()->block_comment(err_msg("same addr/new_val due to self-referential store with imprecise card mark %s", addr->name()));
+ return;
+ }
+
+ G1BarrierSetAssembler* bs_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ bs_asm->g1_write_barrier_post_c1(ce->masm(), addr, new_val, thread, tmp1, tmp2);
+ }
+
+ virtual void print_instr(outputStream* out) const {
+ _addr->print(out); out->print(" ");
+ _new_val->print(out); out->print(" ");
+ _thread->print(out); out->print(" ");
+ _tmp1->print(out); out->print(" ");
+ _tmp2->print(out); out->print(" ");
+ out->cr();
+ }
+
+#ifndef PRODUCT
+ virtual const char* name() const {
+ return "lir_g1_post_barrier";
+ }
+#endif // PRODUCT
+};
+
void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val) {
LIRGenerator* gen = access.gen();
DecoratorSet decorators = access.decorators();
@@ -150,29 +229,11 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_v
}
assert(addr->is_register(), "must be a register at this point");
- LIR_Opr xor_res = gen->new_pointer_register();
- LIR_Opr xor_shift_res = gen->new_pointer_register();
- if (two_operand_lir_form) {
- __ move(addr, xor_res);
- __ logical_xor(xor_res, new_val, xor_res);
- __ move(xor_res, xor_shift_res);
- __ unsigned_shift_right(xor_shift_res,
- LIR_OprFact::intConst(checked_cast(G1HeapRegion::LogOfHRGrainBytes)),
- xor_shift_res,
- LIR_Opr::illegalOpr());
- } else {
- __ logical_xor(addr, new_val, xor_res);
- __ unsigned_shift_right(xor_res,
- LIR_OprFact::intConst(checked_cast(G1HeapRegion::LogOfHRGrainBytes)),
- xor_shift_res,
- LIR_Opr::illegalOpr());
- }
-
- __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
-
- CodeStub* slow = new G1PostBarrierStub(addr, new_val);
- __ branch(lir_cond_notEqual, slow);
- __ branch_destination(slow->continuation());
+ __ append(new LIR_OpG1PostBarrier(addr,
+ new_val,
+ gen->getThreadPointer() /* thread */,
+ gen->new_pointer_register() /* tmp1 */,
+ gen->new_pointer_register() /* tmp2 */));
}
void G1BarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) {
@@ -207,20 +268,9 @@ class C1G1PreBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
}
};
-class C1G1PostBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
- virtual OopMapSet* generate_code(StubAssembler* sasm) {
- G1BarrierSetAssembler* bs = (G1BarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
- bs->generate_c1_post_barrier_runtime_stub(sasm);
- return nullptr;
- }
-};
-
bool G1BarrierSetC1::generate_c1_runtime_stubs(BufferBlob* buffer_blob) {
C1G1PreBarrierCodeGenClosure pre_code_gen_cl;
- C1G1PostBarrierCodeGenClosure post_code_gen_cl;
_pre_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, StubId::NO_STUBID, "g1_pre_barrier_slow",
false, &pre_code_gen_cl);
- _post_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, StubId::NO_STUBID, "g1_post_barrier_slow",
- false, &post_code_gen_cl);
- return _pre_barrier_c1_runtime_code_blob != nullptr && _post_barrier_c1_runtime_code_blob != nullptr;
+ return _pre_barrier_c1_runtime_code_blob != nullptr;
}
diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp
index 4baaf8ac58c..89f5676a2d2 100644
--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp
+++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp
@@ -91,40 +91,11 @@ class G1PreBarrierStub: public CodeStub {
#endif // PRODUCT
};
-class G1PostBarrierStub: public CodeStub {
- friend class G1BarrierSetC1;
- private:
- LIR_Opr _addr;
- LIR_Opr _new_val;
-
- public:
- // addr (the address of the object head) and new_val must be registers.
- G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) {
- FrameMap* f = Compilation::current()->frame_map();
- f->update_reserved_argument_area_size(2 * BytesPerWord);
- }
-
- LIR_Opr addr() const { return _addr; }
- LIR_Opr new_val() const { return _new_val; }
-
- virtual void emit_code(LIR_Assembler* e);
- virtual void visit(LIR_OpVisitState* visitor) {
- // don't pass in the code emit info since it's processed in the fast path
- visitor->do_slow_case();
- visitor->do_input(_addr);
- visitor->do_input(_new_val);
- }
-#ifndef PRODUCT
- virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
-#endif // PRODUCT
-};
-
class CodeBlob;
class G1BarrierSetC1 : public ModRefBarrierSetC1 {
protected:
CodeBlob* _pre_barrier_c1_runtime_code_blob;
- CodeBlob* _post_barrier_c1_runtime_code_blob;
virtual void pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
LIR_Opr pre_val, CodeEmitInfo* info);
@@ -134,11 +105,9 @@ class G1BarrierSetC1 : public ModRefBarrierSetC1 {
public:
G1BarrierSetC1()
- : _pre_barrier_c1_runtime_code_blob(nullptr),
- _post_barrier_c1_runtime_code_blob(nullptr) {}
+ : _pre_barrier_c1_runtime_code_blob(nullptr) {}
CodeBlob* pre_barrier_c1_runtime_code_blob() { return _pre_barrier_c1_runtime_code_blob; }
- CodeBlob* post_barrier_c1_runtime_code_blob() { return _post_barrier_c1_runtime_code_blob; }
virtual bool generate_c1_runtime_stubs(BufferBlob* buffer_blob);
};
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
index bca2255479b..61402301eb1 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
@@ -298,7 +298,13 @@ uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const {
nodes += 6;
}
if ((barrier_data & G1C2BarrierPost) != 0) {
- nodes += 60;
+ // Approximate the number of nodes needed; an if costs 4 nodes (Cmp, Bool,
+ // If, If projection), any other (Assembly) instruction is approximated with
+ // a cost of 1.
+ nodes += 4 // base cost for the card write containing getting base offset, address calculation and the card write;
+ + 6 // same region check: Uncompress (new_val) oop, xor, shr, (cmp), jmp
+ + 4 // new_val is null check
+ + (UseCondCardMark ? 4 : 0); // card not clean check.
}
return nodes;
}
@@ -386,8 +392,9 @@ public:
}
bool needs_liveness_data(const MachNode* mach) const {
- return G1PreBarrierStubC2::needs_barrier(mach) ||
- G1PostBarrierStubC2::needs_barrier(mach);
+ // Liveness data is only required to compute registers that must be preserved
+ // across the runtime call in the pre-barrier stub.
+ return G1BarrierStubC2::needs_pre_barrier(mach);
}
bool needs_livein_data() const {
@@ -401,10 +408,22 @@ static G1BarrierSetC2State* barrier_set_state() {
G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {}
+bool G1BarrierStubC2::needs_pre_barrier(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPre) != 0;
+}
+
+bool G1BarrierStubC2::needs_post_barrier(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPost) != 0;
+}
+
+bool G1BarrierStubC2::post_new_val_may_be_null(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPostNotNull) == 0;
+}
+
G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) {
- return (node->barrier_data() & G1C2BarrierPre) != 0;
+ return needs_pre_barrier(node);
}
G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) {
@@ -448,48 +467,6 @@ void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) {
bs->generate_c2_pre_barrier_stub(&masm, this);
}
-G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
-
-bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) {
- return (node->barrier_data() & G1C2BarrierPost) != 0;
-}
-
-G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) {
- G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node);
- if (!Compile::current()->output()->in_scratch_emit_size()) {
- barrier_set_state()->stubs()->append(stub);
- }
- return stub;
-}
-
-void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) {
- _thread = thread;
- _tmp1 = tmp1;
- _tmp2 = tmp2;
- _tmp3 = tmp3;
-}
-
-Register G1PostBarrierStubC2::thread() const {
- return _thread;
-}
-
-Register G1PostBarrierStubC2::tmp1() const {
- return _tmp1;
-}
-
-Register G1PostBarrierStubC2::tmp2() const {
- return _tmp2;
-}
-
-Register G1PostBarrierStubC2::tmp3() const {
- return _tmp3;
-}
-
-void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) {
- G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
- bs->generate_c2_post_barrier_stub(&masm, this);
-}
-
void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const {
return new (comp_arena) G1BarrierSetC2State(comp_arena);
}
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
index 5f85714d889..601d0f1138e 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
@@ -37,6 +37,10 @@ const int G1C2BarrierPostNotNull = 4;
class G1BarrierStubC2 : public BarrierStubC2 {
public:
+ static bool needs_pre_barrier(const MachNode* node);
+ static bool needs_post_barrier(const MachNode* node);
+ static bool post_new_val_may_be_null(const MachNode* node);
+
G1BarrierStubC2(const MachNode* node);
virtual void emit_code(MacroAssembler& masm) = 0;
};
@@ -64,27 +68,6 @@ public:
virtual void emit_code(MacroAssembler& masm);
};
-class G1PostBarrierStubC2 : public G1BarrierStubC2 {
-private:
- Register _thread;
- Register _tmp1;
- Register _tmp2;
- Register _tmp3;
-
-protected:
- G1PostBarrierStubC2(const MachNode* node);
-
-public:
- static bool needs_barrier(const MachNode* node);
- static G1PostBarrierStubC2* create(const MachNode* node);
- void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg);
- Register thread() const;
- Register tmp1() const;
- Register tmp2() const;
- Register tmp3() const;
- virtual void emit_code(MacroAssembler& masm);
-};
-
class G1BarrierSetC2: public CardTableBarrierSetC2 {
private:
void analyze_dominating_barriers() const;
diff --git a/src/hotspot/share/gc/g1/g1Allocator.cpp b/src/hotspot/share/gc/g1/g1Allocator.cpp
index 7f2916ae895..713bafd4782 100644
--- a/src/hotspot/share/gc/g1/g1Allocator.cpp
+++ b/src/hotspot/share/gc/g1/g1Allocator.cpp
@@ -262,9 +262,6 @@ HeapWord* G1Allocator::survivor_attempt_allocation(uint node_index,
}
}
}
- if (result != nullptr) {
- _g1h->dirty_young_block(result, *actual_word_size);
- }
return result;
}
diff --git a/src/hotspot/share/gc/g1/g1Analytics.cpp b/src/hotspot/share/gc/g1/g1Analytics.cpp
index 8fe0b25ceb7..6e7f46ca1d1 100644
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp
@@ -37,12 +37,10 @@
// They were chosen by running GCOld and SPECjbb on debris with different
// numbers of GC threads and choosing them based on the results
-static double cost_per_logged_card_ms_defaults[] = {
- 0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
-};
+static double cost_per_pending_card_ms_default = 0.01;
// all the same
-static double young_card_scan_to_merge_ratio_defaults[] = {
+static double young_card_merge_to_scan_ratio_defaults[] = {
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
};
@@ -78,8 +76,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_concurrent_gc_cpu_time_ms(),
_concurrent_refine_rate_ms_seq(TruncatedSeqLength),
_dirtied_cards_rate_ms_seq(TruncatedSeqLength),
- _dirtied_cards_in_thread_buffers_seq(TruncatedSeqLength),
- _card_scan_to_merge_ratio_seq(TruncatedSeqLength),
+ _card_merge_to_scan_ratio_seq(TruncatedSeqLength),
_cost_per_card_scan_ms_seq(TruncatedSeqLength),
_cost_per_card_merge_ms_seq(TruncatedSeqLength),
_cost_per_code_root_ms_seq(TruncatedSeqLength),
@@ -87,6 +84,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_pending_cards_seq(TruncatedSeqLength),
_card_rs_length_seq(TruncatedSeqLength),
_code_root_rs_length_seq(TruncatedSeqLength),
+ _merge_refinement_table_ms_seq(TruncatedSeqLength),
_constant_other_time_ms_seq(TruncatedSeqLength),
_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
_non_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
@@ -100,17 +98,17 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
uint index = MIN2(ParallelGCThreads - 1, 7u);
- // Start with inverse of maximum STW cost.
- _concurrent_refine_rate_ms_seq.add(1/cost_per_logged_card_ms_defaults[0]);
- // Some applications have very low rates for logging cards.
+ _concurrent_refine_rate_ms_seq.add(1 / cost_per_pending_card_ms_default);
+ // Some applications have very low rates for dirtying cards.
_dirtied_cards_rate_ms_seq.add(0.0);
- _card_scan_to_merge_ratio_seq.set_initial(young_card_scan_to_merge_ratio_defaults[index]);
+ _card_merge_to_scan_ratio_seq.set_initial(young_card_merge_to_scan_ratio_defaults[index]);
_cost_per_card_scan_ms_seq.set_initial(young_only_cost_per_card_scan_ms_defaults[index]);
_card_rs_length_seq.set_initial(0);
_code_root_rs_length_seq.set_initial(0);
_cost_per_byte_copied_ms_seq.set_initial(cost_per_byte_ms_defaults[index]);
+ _merge_refinement_table_ms_seq.add(0);
_constant_other_time_ms_seq.add(constant_other_time_ms_defaults[index]);
_young_other_cost_per_region_ms_seq.add(young_other_cost_per_region_ms_defaults[index]);
_non_young_other_cost_per_region_ms_seq.add(non_young_other_cost_per_region_ms_defaults[index]);
@@ -196,10 +194,6 @@ void G1Analytics::report_dirtied_cards_rate_ms(double cards_per_ms) {
_dirtied_cards_rate_ms_seq.add(cards_per_ms);
}
-void G1Analytics::report_dirtied_cards_in_thread_buffers(size_t cards) {
- _dirtied_cards_in_thread_buffers_seq.add(double(cards));
-}
-
void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_only_phase) {
_cost_per_card_scan_ms_seq.add(cost_per_card_ms, for_young_only_phase);
}
@@ -212,8 +206,8 @@ void G1Analytics::report_cost_per_code_root_scan_ms(double cost_per_code_root_ms
_cost_per_code_root_ms_seq.add(cost_per_code_root_ms, for_young_only_phase);
}
-void G1Analytics::report_card_scan_to_merge_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
- _card_scan_to_merge_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
+void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
+ _card_merge_to_scan_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
}
void G1Analytics::report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase) {
@@ -228,6 +222,10 @@ void G1Analytics::report_non_young_other_cost_per_region_ms(double other_cost_pe
_non_young_other_cost_per_region_ms_seq.add(other_cost_per_region_ms);
}
+void G1Analytics::report_merge_refinement_table_time_ms(double merge_refinement_table_time_ms) {
+ _merge_refinement_table_ms_seq.add(merge_refinement_table_time_ms);
+}
+
void G1Analytics::report_constant_other_time_ms(double constant_other_time_ms) {
_constant_other_time_ms_seq.add(constant_other_time_ms);
}
@@ -260,12 +258,8 @@ double G1Analytics::predict_dirtied_cards_rate_ms() const {
return predict_zero_bounded(&_dirtied_cards_rate_ms_seq);
}
-size_t G1Analytics::predict_dirtied_cards_in_thread_buffers() const {
- return predict_size(&_dirtied_cards_in_thread_buffers_seq);
-}
-
size_t G1Analytics::predict_scan_card_num(size_t card_rs_length, bool for_young_only_phase) const {
- return card_rs_length * predict_in_unit_interval(&_card_scan_to_merge_ratio_seq, for_young_only_phase);
+ return card_rs_length * predict_in_unit_interval(&_card_merge_to_scan_ratio_seq, for_young_only_phase);
}
double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_only_phase) const {
@@ -284,6 +278,10 @@ double G1Analytics::predict_object_copy_time_ms(size_t bytes_to_copy, bool for_y
return bytes_to_copy * predict_zero_bounded(&_cost_per_byte_copied_ms_seq, for_young_only_phase);
}
+double G1Analytics::predict_merge_refinement_table_time_ms() const {
+ return predict_zero_bounded(&_merge_refinement_table_ms_seq);
+}
+
double G1Analytics::predict_constant_other_time_ms() const {
return predict_zero_bounded(&_constant_other_time_ms_seq);
}
diff --git a/src/hotspot/share/gc/g1/g1Analytics.hpp b/src/hotspot/share/gc/g1/g1Analytics.hpp
index e5e2dd74101..1f609815632 100644
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -56,14 +56,13 @@ class G1Analytics: public CHeapObj {
TruncatedSeq _concurrent_refine_rate_ms_seq;
TruncatedSeq _dirtied_cards_rate_ms_seq;
- TruncatedSeq _dirtied_cards_in_thread_buffers_seq;
- // The ratio between the number of scanned cards and actually merged cards, for
- // young-only and mixed gcs.
- G1PhaseDependentSeq _card_scan_to_merge_ratio_seq;
+ // The ratio between the number of merged cards to actually scanned cards for
+ // card based remembered sets, for young-only and mixed gcs.
+ G1PhaseDependentSeq _card_merge_to_scan_ratio_seq;
// The cost to scan a card during young-only and mixed gcs in ms.
G1PhaseDependentSeq _cost_per_card_scan_ms_seq;
- // The cost to merge a card during young-only and mixed gcs in ms.
+ // The cost to merge a card from the remembered sets for non-young regions in ms.
G1PhaseDependentSeq _cost_per_card_merge_ms_seq;
// The cost to scan entries in the code root remembered set in ms.
G1PhaseDependentSeq _cost_per_code_root_ms_seq;
@@ -74,6 +73,8 @@ class G1Analytics: public CHeapObj {
G1PhaseDependentSeq _card_rs_length_seq;
G1PhaseDependentSeq _code_root_rs_length_seq;
+ // Prediction for merging the refinement table to the card table during GC.
+ TruncatedSeq _merge_refinement_table_ms_seq;
TruncatedSeq _constant_other_time_ms_seq;
TruncatedSeq _young_other_cost_per_region_ms_seq;
TruncatedSeq _non_young_other_cost_per_region_ms_seq;
@@ -149,14 +150,14 @@ public:
void report_alloc_rate_ms(double alloc_rate);
void report_concurrent_refine_rate_ms(double cards_per_ms);
void report_dirtied_cards_rate_ms(double cards_per_ms);
- void report_dirtied_cards_in_thread_buffers(size_t num_cards);
void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_only_phase);
void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_only_phase);
void report_cost_per_code_root_scan_ms(double cost_per_code_root_ms, bool for_young_only_phase);
- void report_card_scan_to_merge_ratio(double cards_per_entry_ratio, bool for_young_only_phase);
+ void report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_only_phase);
void report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase);
void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
void report_non_young_other_cost_per_region_ms(double other_cost_per_region_ms);
+ void report_merge_refinement_table_time_ms(double pending_card_merge_time_ms);
void report_constant_other_time_ms(double constant_other_time_ms);
void report_pending_cards(double pending_cards, bool for_young_only_phase);
void report_card_rs_length(double card_rs_length, bool for_young_only_phase);
@@ -167,7 +168,6 @@ public:
double predict_concurrent_refine_rate_ms() const;
double predict_dirtied_cards_rate_ms() const;
- size_t predict_dirtied_cards_in_thread_buffers() const;
// Predict how many of the given remembered set of length card_rs_length will add to
// the number of total cards scanned.
@@ -180,6 +180,7 @@ public:
double predict_object_copy_time_ms(size_t bytes_to_copy, bool for_young_only_phase) const;
+ double predict_merge_refinement_table_time_ms() const;
double predict_constant_other_time_ms() const;
double predict_young_other_time_ms(size_t young_num) const;
diff --git a/src/hotspot/share/gc/g1/g1Arguments.cpp b/src/hotspot/share/gc/g1/g1Arguments.cpp
index ee91c327337..5cbafd2ae94 100644
--- a/src/hotspot/share/gc/g1/g1Arguments.cpp
+++ b/src/hotspot/share/gc/g1/g1Arguments.cpp
@@ -68,6 +68,12 @@ void G1Arguments::initialize_alignments() {
if (FLAG_IS_DEFAULT(G1EagerReclaimRemSetThreshold)) {
FLAG_SET_ERGO(G1EagerReclaimRemSetThreshold, G1RemSetArrayOfCardsEntries);
}
+ // G1 prefers to use conditional card marking to avoid overwriting cards that
+ // have already been found to contain a to-collection set reference. This reduces
+ // refinement effort.
+ if (FLAG_IS_DEFAULT(UseCondCardMark)) {
+ FLAG_SET_ERGO(UseCondCardMark, true);
+ }
}
size_t G1Arguments::conservative_max_heap_alignment() {
@@ -241,9 +247,8 @@ void G1Arguments::initialize() {
// Verify that the maximum parallelism isn't too high to eventually overflow
// the refcount in G1CardSetContainer.
- uint max_parallel_refinement_threads = G1ConcRefinementThreads + G1DirtyCardQueueSet::num_par_ids();
uint const divisor = 3; // Safe divisor; we increment by 2 for each claim, but there is a small initial value.
- if (max_parallel_refinement_threads > UINT_MAX / divisor) {
+ if (G1ConcRefinementThreads > UINT_MAX / divisor) {
vm_exit_during_initialization("Too large parallelism for remembered sets.");
}
diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.cpp b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
index c56434340cd..ab7d6febf4c 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
@@ -32,12 +32,14 @@
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/shared/satbMarkQueue.hpp"
#include "logging/log.hpp"
+#include "memory/iterator.hpp"
#include "oops/access.inline.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/orderAccess.hpp"
+#include "runtime/threads.hpp"
#include "utilities/macros.hpp"
#ifdef COMPILER1
#include "gc/g1/c1/g1BarrierSetC1.hpp"
@@ -49,18 +51,38 @@
class G1BarrierSetC1;
class G1BarrierSetC2;
-G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
+G1BarrierSet::G1BarrierSet(G1CardTable* card_table,
+ G1CardTable* refinement_table) :
CardTableBarrierSet(make_barrier_set_assembler(),
make_barrier_set_c1(),
make_barrier_set_c2(),
card_table,
BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)),
_satb_mark_queue_buffer_allocator("SATB Buffer Allocator", G1SATBBufferSize),
- _dirty_card_queue_buffer_allocator("DC Buffer Allocator", G1UpdateBufferSize),
_satb_mark_queue_set(&_satb_mark_queue_buffer_allocator),
- _dirty_card_queue_set(&_dirty_card_queue_buffer_allocator)
+ _refinement_table(refinement_table)
{}
+G1BarrierSet::~G1BarrierSet() {
+ delete _refinement_table;
+}
+
+void G1BarrierSet::swap_global_card_table() {
+ G1CardTable* temp = static_cast(_card_table);
+ _card_table = _refinement_table;
+ _refinement_table = temp;
+}
+
+void G1BarrierSet::update_card_table_base(Thread* thread) {
+#ifdef ASSERT
+ {
+ ResourceMark rm;
+ assert(thread->is_Java_thread(), "may only update card table base of JavaThreads, not %s", thread->name());
+ }
+#endif
+ G1ThreadLocalData::set_byte_map_base(thread, _card_table->byte_map_base());
+}
+
template void
G1BarrierSet::write_ref_array_pre_work(T* dst, size_t count) {
G1SATBMarkQueueSet& queue_set = G1BarrierSet::satb_mark_queue_set();
@@ -89,28 +111,14 @@ void G1BarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_u
}
}
-void G1BarrierSet::write_ref_field_post_slow(volatile CardValue* byte) {
- // In the slow path, we know a card is not young
- assert(*byte != G1CardTable::g1_young_card_val(), "slow path invoked without filtering");
- OrderAccess::storeload();
- if (*byte != G1CardTable::dirty_card_val()) {
- *byte = G1CardTable::dirty_card_val();
- Thread* thr = Thread::current();
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thr);
- G1BarrierSet::dirty_card_queue_set().enqueue(queue, byte);
- }
-}
-
void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
if (mr.is_empty()) {
return;
}
- volatile CardValue* byte = _card_table->byte_for(mr.start());
- CardValue* last_byte = _card_table->byte_for(mr.last());
- // skip young gen cards
- if (*byte == G1CardTable::g1_young_card_val()) {
- // MemRegion should not span multiple regions for the young gen.
+ // Skip writes to young gen.
+ if (G1CollectedHeap::heap()->heap_region_containing(mr.start())->is_young()) {
+ // MemRegion should not span multiple regions for arrays in young gen.
DEBUG_ONLY(G1HeapRegion* containing_hr = G1CollectedHeap::heap()->heap_region_containing(mr.start());)
assert(containing_hr->is_young(), "it should be young");
assert(containing_hr->is_in(mr.start()), "it should contain start");
@@ -118,16 +126,25 @@ void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
return;
}
- OrderAccess::storeload();
- // Enqueue if necessary.
- G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
+ // We need to make sure that we get the start/end byte information for the area
+ // to mark from the same card table to avoid getting confused in the mark loop
+ // further below - we might execute while the global card table is being switched.
+ //
+ // It does not matter which card table we write to: at worst we may write to the
+ // new card table (after the switching), which means that we will catch the
+ // marks next time.
+ // If we write to the old card table (after the switching, then the refinement
+ // table) the oncoming handshake will do the memory synchronization.
+ CardTable* card_table = AtomicAccess::load(&_card_table);
+
+ volatile CardValue* byte = card_table->byte_for(mr.start());
+ CardValue* last_byte = card_table->byte_for(mr.last());
+
+ // Dirty cards only if necessary.
for (; byte <= last_byte; byte++) {
CardValue bv = *byte;
- assert(bv != G1CardTable::g1_young_card_val(), "Invalid card");
- if (bv != G1CardTable::dirty_card_val()) {
+ if (bv == G1CardTable::clean_card_val()) {
*byte = G1CardTable::dirty_card_val();
- qset.enqueue(queue, byte);
}
}
}
@@ -148,14 +165,15 @@ void G1BarrierSet::on_thread_attach(Thread* thread) {
assert(!satbq.is_active(), "SATB queue should not be active");
assert(satbq.buffer() == nullptr, "SATB queue should not have a buffer");
assert(satbq.index() == 0, "SATB queue index should be zero");
- G1DirtyCardQueue& dirtyq = G1ThreadLocalData::dirty_card_queue(thread);
- assert(dirtyq.buffer() == nullptr, "Dirty Card queue should not have a buffer");
- assert(dirtyq.index() == 0, "Dirty Card queue index should be zero");
-
// If we are creating the thread during a marking cycle, we should
// set the active field of the SATB queue to true. That involves
// copying the global is_active value to this thread's queue.
satbq.set_active(_satb_mark_queue_set.is_active());
+
+ if (thread->is_Java_thread()) {
+ assert(Threads_lock->is_locked(), "must be, synchronization with refinement.");
+ update_card_table_base(thread);
+ }
}
void G1BarrierSet::on_thread_detach(Thread* thread) {
@@ -165,14 +183,13 @@ void G1BarrierSet::on_thread_detach(Thread* thread) {
SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(thread);
G1BarrierSet::satb_mark_queue_set().flush_queue(queue);
}
- {
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
- G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
- qset.flush_queue(queue);
- qset.record_detached_refinement_stats(queue.refinement_stats());
- }
{
G1RegionPinCache& cache = G1ThreadLocalData::pin_count_cache(thread);
cache.flush();
}
}
+
+void G1BarrierSet::print_on(outputStream* st) const {
+ _card_table->print_on(st, "Card");
+ _refinement_table->print_on(st, "Refinement");
+}
diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.hpp b/src/hotspot/share/gc/g1/g1BarrierSet.hpp
index 2b1074fcd7a..40e87c373b7 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSet.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,32 +25,65 @@
#ifndef SHARE_GC_G1_G1BARRIERSET_HPP
#define SHARE_GC_G1_G1BARRIERSET_HPP
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
#include "gc/shared/bufferNode.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
class G1CardTable;
+class Thread;
-// This barrier is specialized to use a logging barrier to support
-// snapshot-at-the-beginning marking.
-
+// This barrier set is specialized to manage two card tables:
+// * one the mutator is currently working on ("card table")
+// * one the refinement threads or GC during pause are working on ("refinement table")
+//
+// The card table acts like a regular card table where the mutator dirties cards
+// containing potentially interesting references.
+//
+// When the amount of dirty cards on the card table exceeds a threshold, G1 swaps
+// the card tables and has the refinement threads reduce them by "refining"
+// them.
+// I.e. refinement looks at all dirty cards on the refinement table, and updates
+// the remembered sets accordingly, clearing the cards on the refinement table.
+//
+// Meanwhile the mutator continues dirtying the now empty card table.
+//
+// This separation of data the mutator and refinement threads are working on
+// removes the need for any fine-grained (per mutator write) synchronization between
+// them, keeping the write barrier simple.
+//
+// The refinement threads mark cards in the current collection set specially on the
+// card table - this is fine wrt synchronization with the mutator, because at
+// most the mutator will overwrite it again if there is a race, as G1 will scan the
+// entire card either way during the GC pause.
+//
+// During garbage collection, if the refinement table is known to be non-empty, G1
+// merges it back (and cleaning it) to the card table which is scanned for dirty
+// cards.
+//
class G1BarrierSet: public CardTableBarrierSet {
friend class VMStructs;
private:
BufferNode::Allocator _satb_mark_queue_buffer_allocator;
- BufferNode::Allocator _dirty_card_queue_buffer_allocator;
G1SATBMarkQueueSet _satb_mark_queue_set;
- G1DirtyCardQueueSet _dirty_card_queue_set;
+
+ G1CardTable* _refinement_table;
+
+ public:
+ G1BarrierSet(G1CardTable* card_table, G1CardTable* refinement_table);
+ virtual ~G1BarrierSet();
static G1BarrierSet* g1_barrier_set() {
return barrier_set_cast(BarrierSet::barrier_set());
}
- public:
- G1BarrierSet(G1CardTable* table);
- ~G1BarrierSet() { }
+ G1CardTable* refinement_table() const { return _refinement_table; }
+
+ // Swap the global card table references, without synchronization.
+ void swap_global_card_table();
+
+ // Update the given thread's card table (byte map) base to the current card table's.
+ void update_card_table_base(Thread* thread);
virtual bool card_mark_must_follow_store() const {
return true;
@@ -74,9 +107,8 @@ class G1BarrierSet: public CardTableBarrierSet {
inline void write_region(MemRegion mr);
void write_region(JavaThread* thread, MemRegion mr);
- template
+ template
void write_ref_field_post(T* field);
- void write_ref_field_post_slow(volatile CardValue* byte);
virtual void on_thread_create(Thread* thread);
virtual void on_thread_destroy(Thread* thread);
@@ -87,9 +119,7 @@ class G1BarrierSet: public CardTableBarrierSet {
return g1_barrier_set()->_satb_mark_queue_set;
}
- static G1DirtyCardQueueSet& dirty_card_queue_set() {
- return g1_barrier_set()->_dirty_card_queue_set;
- }
+ virtual void print_on(outputStream* st) const;
// Callbacks for runtime accesses.
template
diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp b/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
index 9678da190af..0888fc58937 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
@@ -75,9 +75,8 @@ inline void G1BarrierSet::write_region(MemRegion mr) {
template
inline void G1BarrierSet::write_ref_field_post(T* field) {
volatile CardValue* byte = _card_table->byte_for(field);
- if (*byte != G1CardTable::g1_young_card_val()) {
- // Take a slow path for cards in old
- write_ref_field_post_slow(byte);
+ if (*byte == G1CardTable::clean_card_val()) {
+ *byte = G1CardTable::dirty_card_val();
}
}
@@ -127,7 +126,7 @@ inline void G1BarrierSet::AccessBarrier::
oop_store_not_in_heap(T* addr, oop new_value) {
// Apply SATB barriers for all non-heap references, to allow
// concurrent scanning of such references.
- G1BarrierSet *bs = barrier_set_cast(BarrierSet::barrier_set());
+ G1BarrierSet *bs = g1_barrier_set();
bs->write_ref_field_pre(addr);
Raw::oop_store(addr, new_value);
}
diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
index 205829bba1a..24ade277afe 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
@@ -29,17 +29,17 @@
#include "utilities/macros.hpp"
void G1BarrierSetRuntime::write_ref_array_pre_oop_entry(oop* dst, size_t length) {
- G1BarrierSet *bs = barrier_set_cast(BarrierSet::barrier_set());
+ G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
bs->write_ref_array_pre(dst, length, false);
}
void G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry(narrowOop* dst, size_t length) {
- G1BarrierSet *bs = barrier_set_cast(BarrierSet::barrier_set());
+ G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
bs->write_ref_array_pre(dst, length, false);
}
void G1BarrierSetRuntime::write_ref_array_post_entry(HeapWord* dst, size_t length) {
- G1BarrierSet *bs = barrier_set_cast(BarrierSet::barrier_set());
+ G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
bs->G1BarrierSet::write_ref_array(dst, length);
}
@@ -53,14 +53,6 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_pre_entry(oopDesc* orig, Jav
G1BarrierSet::satb_mark_queue_set().enqueue_known_active(queue, orig);
JRT_END
-// G1 post write barrier slowpath
-JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTable::CardValue* card_addr,
- JavaThread* thread))
- assert(thread == JavaThread::current(), "pre-condition");
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
- G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr);
-JRT_END
-
JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size))
HeapAccess<>::clone(src, dst, size);
JRT_END
diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
index 27287a0624b..ba7bc4d90f4 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,6 @@ public:
// C2 slow-path runtime calls.
static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread);
- static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread);
static address clone_addr();
};
diff --git a/src/hotspot/share/gc/g1/g1CardTable.cpp b/src/hotspot/share/gc/g1/g1CardTable.cpp
index 303b8cda91f..6df178d49c5 100644
--- a/src/hotspot/share/gc/g1/g1CardTable.cpp
+++ b/src/hotspot/share/gc/g1/g1CardTable.cpp
@@ -28,18 +28,37 @@
#include "logging/log.hpp"
#include "runtime/os.hpp"
-void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
- CardValue *const first = byte_for(mr.start());
- CardValue *const last = byte_after(mr.last());
+void G1CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
+ if (mr.is_empty()) {
+ return;
+ }
+ CardValue* start = byte_for(mr.start());
+ CardValue* end = byte_for(mr.last());
- memset_with_concurrent_readers(first, g1_young_gen, pointer_delta(last, first, sizeof(CardValue)));
-}
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ G1HeapRegion* r = g1h->heap_region_containing(mr.start());
-#ifndef PRODUCT
-void G1CardTable::verify_g1_young_region(MemRegion mr) {
- verify_region(mr, g1_young_gen, true);
+ assert(r == g1h->heap_region_containing(mr.last()), "MemRegion crosses region");
+
+ bool failures = false;
+ for (CardValue* curr = start; curr <= end; ++curr) {
+ CardValue curr_val = *curr;
+ bool failed = (val_equals) ? (curr_val != val) : (curr_val == val);
+ if (failed) {
+ if (!failures) {
+ log_error(gc, verify)("== CT verification failed: [" PTR_FORMAT "," PTR_FORMAT "] r: %d (%s) %sexpecting value: %d",
+ p2i(start), p2i(end), r->hrm_index(), r->get_short_type_str(),
+ (val_equals) ? "" : "not ", val);
+ failures = true;
+ }
+ log_error(gc, verify)("== card " PTR_FORMAT " [" PTR_FORMAT "," PTR_FORMAT "], val: %d",
+ p2i(curr), p2i(addr_for(curr)),
+ p2i((HeapWord*) (((size_t) addr_for(curr)) + _card_size)),
+ (int) curr_val);
+ }
+ }
+ guarantee(!failures, "there should not have been any failures");
}
-#endif
void G1CardTableChangedListener::on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
// Default value for a clean card on the card table is -1. So we cannot take advantage of the zero_filled parameter.
@@ -74,6 +93,5 @@ void G1CardTable::initialize(G1RegionToSpaceMapper* mapper) {
}
bool G1CardTable::is_in_young(const void* p) const {
- volatile CardValue* card = byte_for(p);
- return *card == G1CardTable::g1_young_card_val();
+ return G1CollectedHeap::heap()->heap_region_containing(p)->is_young();
}
diff --git a/src/hotspot/share/gc/g1/g1CardTable.hpp b/src/hotspot/share/gc/g1/g1CardTable.hpp
index 16133029a11..060e5459778 100644
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp
+++ b/src/hotspot/share/gc/g1/g1CardTable.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -52,8 +52,6 @@ class G1CardTable : public CardTable {
public:
enum G1CardValues {
- g1_young_gen = CT_MR_BS_last_reserved << 1,
-
// During evacuation we use the card table to consolidate the cards we need to
// scan for roots onto the card table from the various sources. Further it is
// used to record already completely scanned cards to avoid re-scanning them
@@ -63,18 +61,43 @@ public:
// The merge at the start of each evacuation round simply sets cards to dirty
// that are clean; scanned cards are set to 0x1.
//
- // This means that the LSB determines what to do with the card during evacuation
- // given the following possible values:
+ // This means that the LSB determines whether the card is clean or non-clean
+ // (LSB is 1 -> clean, LSB is 0 -> non-clean) given the following possible values:
//
- // 11111111 - clean, do not scan
- // 00000001 - already scanned, do not scan
+ // xxxxxxx1 - clean, already scanned, do not scan again (during GC only).
+ // 00000100 - dirty, needs to be scanned, dirty from remembered set (during GC only)
+ // 00000010 - dirty, needs to be scanned, contains reference to collection set.
// 00000000 - dirty, needs to be scanned.
//
- g1_card_already_scanned = 0x1
+ // g1_to_cset_card and g1_from_remset_card are both used for optimization and
+ // needed for more accurate prediction of card generation rate.
+ //
+ // g1_to_cset_card allows to separate dirty card generation rate by the mutator
+ // (which just dirties cards) from cards that will be scanned during next garbage
+ // collection anyway.
+ // Further it allows the optimization to not refine them, assuming that their
+ // references to young gen does not change, and not add this card to any other
+ // remembered set.
+ // This color is sticky during mutator time: refinement threads encountering
+ // this card on the refinement table will just copy it over to the regular card
+ // table without re-refining this card. This saves on refinement effort spent
+ // on that card because most of the time already found interesting references
+ // stay interesting.
+ //
+ // g1_from_remset_card allows separation of cards generated by the mutator from
+ // cards in the remembered set, again to make mutator dirty card generation
+ // prediction more accurate.
+ //
+ // More accurate prediction allow better (less wasteful) refinement control.
+ g1_dirty_card = dirty_card,
+ g1_card_already_scanned = 0x1,
+ g1_to_cset_card = 0x2,
+ g1_from_remset_card = 0x4
};
static const size_t WordAllClean = SIZE_MAX;
static const size_t WordAllDirty = 0;
+ static const size_t WordAllFromRemset = (SIZE_MAX / 255) * g1_from_remset_card;
STATIC_ASSERT(BitsPerByte == 8);
static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned;
@@ -83,27 +106,27 @@ public:
_listener.set_card_table(this);
}
- static CardValue g1_young_card_val() { return g1_young_gen; }
static CardValue g1_scanned_card_val() { return g1_card_already_scanned; }
- void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
- void g1_mark_as_young(const MemRegion& mr);
+ void verify_region(MemRegion mr, CardValue val, bool val_equals) override;
size_t index_for_cardvalue(CardValue const* p) const {
return pointer_delta(p, _byte_map, sizeof(CardValue));
}
- // Mark the given card as Dirty if it is Clean. Returns whether the card was
+ // Mark the given card as From Remset if it is Clean. Returns whether the card was
// Clean before this operation. This result may be inaccurate as it does not
// perform the dirtying atomically.
- inline bool mark_clean_as_dirty(CardValue* card);
+ inline bool mark_clean_as_from_remset(CardValue* card);
- // Change Clean cards in a (large) area on the card table as Dirty, preserving
- // already scanned cards. Assumes that most cards in that area are Clean.
- inline void mark_range_dirty(size_t start_card_index, size_t num_cards);
+ // Change Clean cards in a (large) area on the card table as From_Remset, preserving
+ // cards already marked otherwise. Assumes that most cards in that area are Clean.
+ // Not atomic.
+ inline size_t mark_clean_range_as_from_remset(size_t start_card_index, size_t num_cards);
- // Change the given range of dirty cards to "which". All of these cards must be Dirty.
- inline void change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which);
+ // Change the given range of dirty cards to "which". All of these cards must be non-clean.
+ // Returns the number of pending cards found.
+ inline size_t change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which);
inline uint region_idx_for(CardValue* p);
diff --git a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp
index 03bce7d50d7..370dc22ded0 100644
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,25 +28,39 @@
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1HeapRegion.hpp"
+#include "utilities/population_count.hpp"
inline uint G1CardTable::region_idx_for(CardValue* p) {
size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue));
return (uint)(card_idx >> G1HeapRegion::LogCardsPerRegion);
}
-inline bool G1CardTable::mark_clean_as_dirty(CardValue* card) {
+inline bool G1CardTable::mark_clean_as_from_remset(CardValue* card) {
CardValue value = *card;
if (value == clean_card_val()) {
- *card = dirty_card_val();
+ *card = g1_from_remset_card;
return true;
}
return false;
}
-inline void G1CardTable::mark_range_dirty(size_t start_card_index, size_t num_cards) {
+// Returns bits from a where mask is 0, and bits from b where mask is 1.
+//
+// Example:
+// a = 0xAAAAAAAA
+// b = 0xBBBBBBBB
+// mask = 0xFF00FF00
+// result = 0xBBAABBAA
+inline size_t blend(size_t a, size_t b, size_t mask) {
+ return (a & ~mask) | (b & mask);
+}
+
+inline size_t G1CardTable::mark_clean_range_as_from_remset(size_t start_card_index, size_t num_cards) {
assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
+ size_t result = 0;
+
size_t const num_chunks = num_cards / sizeof(size_t);
size_t* cur_word = (size_t*)&_byte_map[start_card_index];
@@ -54,31 +68,33 @@ inline void G1CardTable::mark_range_dirty(size_t start_card_index, size_t num_ca
while (cur_word < end_word_map) {
size_t value = *cur_word;
if (value == WordAllClean) {
- *cur_word = WordAllDirty;
- } else if (value == WordAllDirty) {
- // do nothing.
+ *cur_word = WordAllFromRemset;
+ result += sizeof(size_t);
+ } else if ((value & WordAlreadyScanned) == 0) {
+ // Do nothing if there is no "Clean" card in it.
} else {
- // There is a mix of cards in there. Tread slowly.
- CardValue* cur = (CardValue*)cur_word;
- for (size_t i = 0; i < sizeof(size_t); i++) {
- CardValue value = *cur;
- if (value == clean_card_val()) {
- *cur = dirty_card_val();
- }
- cur++;
- }
+ // There is a mix of cards in there. Tread "slowly".
+ size_t clean_card_mask = (value & WordAlreadyScanned) * 0xff; // All "Clean" cards have 0xff, all other places 0x00 now.
+ result += population_count(clean_card_mask) / BitsPerByte;
+ *cur_word = blend(value, WordAllFromRemset, clean_card_mask);
}
cur_word++;
}
+ return result;
}
-inline void G1CardTable::change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which) {
+inline size_t G1CardTable::change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which) {
+ size_t result = 0;
for (CardValue* i_card = start_card; i_card < end_card; ++i_card) {
CardValue value = *i_card;
- assert(value == dirty_card_val(),
+ assert((value & g1_card_already_scanned) == 0,
"Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start_card), p2i(end_card));
+ if (value == g1_dirty_card) {
+ result++;
+ }
*i_card = which;
}
+ return result;
}
#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */
diff --git a/src/hotspot/share/gc/g1/g1CardTableClaimTable.cpp b/src/hotspot/share/gc/g1/g1CardTableClaimTable.cpp
new file mode 100644
index 00000000000..e0cadbdd907
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1CardTableClaimTable.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "gc/g1/g1CardTableClaimTable.inline.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1HeapRegion.inline.hpp"
+#include "gc/shared/workerThread.hpp"
+#include "memory/allocation.hpp"
+#include "utilities/checkedCast.hpp"
+#include "utilities/powerOfTwo.hpp"
+
+G1CardTableClaimTable::G1CardTableClaimTable(uint chunks_per_region) :
+ _max_reserved_regions(0),
+ _card_claims(nullptr),
+ _cards_per_chunk(checked_cast(G1HeapRegion::CardsPerRegion / chunks_per_region))
+{
+ guarantee(chunks_per_region > 0, "%u chunks per region", chunks_per_region);
+}
+
+G1CardTableClaimTable::~G1CardTableClaimTable() {
+ FREE_C_HEAP_ARRAY(uint, _card_claims);
+}
+
+void G1CardTableClaimTable::initialize(uint max_reserved_regions) {
+ assert(_card_claims == nullptr, "Must not be initialized twice");
+ _card_claims = NEW_C_HEAP_ARRAY(uint, max_reserved_regions, mtGC);
+ _max_reserved_regions = max_reserved_regions;
+ reset_all_to_unclaimed();
+}
+
+void G1CardTableClaimTable::reset_all_to_unclaimed() {
+ for (uint i = 0; i < _max_reserved_regions; i++) {
+ _card_claims[i] = 0;
+ }
+}
+
+void G1CardTableClaimTable::reset_all_to_claimed() {
+ for (uint i = 0; i < _max_reserved_regions; i++) {
+ _card_claims[i] = (uint)G1HeapRegion::CardsPerRegion;
+ }
+}
+
+void G1CardTableClaimTable::heap_region_iterate_from_worker_offset(G1HeapRegionClosure* cl, uint worker_id, uint max_workers) {
+ // Every worker will actually look at all regions, skipping over regions that
+ // are completed.
+ const size_t n_regions = _max_reserved_regions;
+ const uint start_index = (uint)(worker_id * n_regions / max_workers);
+
+ for (uint count = 0; count < n_regions; count++) {
+ const uint index = (start_index + count) % n_regions;
+ assert(index < n_regions, "sanity");
+ // Skip over fully processed regions
+ if (!has_unclaimed_cards(index)) {
+ continue;
+ }
+ G1HeapRegion* r = G1CollectedHeap::heap()->region_at(index);
+ bool res = cl->do_heap_region(r);
+ if (res) {
+ return;
+ }
+ }
+}
+
+G1CardTableChunkClaimer::G1CardTableChunkClaimer(G1CardTableClaimTable* scan_state, uint region_idx) :
+ _claim_values(scan_state),
+ _region_idx(region_idx),
+ _cur_claim(0) {
+ guarantee(size() <= G1HeapRegion::CardsPerRegion, "Should not claim more space than possible.");
+}
+
+G1ChunkScanner::G1ChunkScanner(CardValue* const start_card, CardValue* const end_card) :
+ _start_card(start_card),
+ _end_card(end_card) {
+ assert(is_word_aligned(start_card), "precondition");
+ assert(is_word_aligned(end_card), "precondition");
+}
diff --git a/src/hotspot/share/gc/g1/g1CardTableClaimTable.hpp b/src/hotspot/share/gc/g1/g1CardTableClaimTable.hpp
new file mode 100644
index 00000000000..4f524b83f97
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1CardTableClaimTable.hpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
+#define SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
+
+#include "gc/g1/g1CardTable.hpp"
+#include "memory/allocation.hpp"
+
+class G1HeapRegionClosure;
+
+// Helper class representing claim values for the cards in the card table corresponding
+// to a region.
+// I.e. for every region this class stores an atomic counter that represents the
+// number of cards from 0 to the number of cards per region already claimed for
+// this region.
+// If the claimed value is >= the number of cards of a region, the region can be
+// considered fully claimed.
+//
+// Claiming works on full region (all cards in region) or a range of contiguous cards
+// (chunk). Chunk size is given at construction time.
+class G1CardTableClaimTable : public CHeapObj {
+ uint _max_reserved_regions;
+
+ // Card table iteration claim values for every heap region, from 0 (completely unclaimed)
+ // to (>=) G1HeapRegion::CardsPerRegion (completely claimed).
+ uint volatile* _card_claims;
+
+ uint _cards_per_chunk; // For conversion between card index and chunk index.
+
+ // Claim increment number of cards, returning the previous claim value.
+ inline uint claim_cards(uint region, uint increment);
+
+public:
+ G1CardTableClaimTable(uint chunks_per_region);
+ ~G1CardTableClaimTable();
+
+ // Allocates the data structure and initializes the claims to unclaimed.
+ void initialize(uint max_reserved_regions);
+
+ void reset_all_to_unclaimed();
+ void reset_all_to_claimed();
+
+ inline bool has_unclaimed_cards(uint region);
+ inline void reset_to_unclaimed(uint region);
+
+ // Claims all cards in that region, returning the previous claim value.
+ inline uint claim_all_cards(uint region);
+
+ // Claim a single chunk in that region, returning the previous claim value.
+ inline uint claim_chunk(uint region);
+ inline uint cards_per_chunk() const;
+
+ size_t max_reserved_regions() { return _max_reserved_regions; }
+
+ void heap_region_iterate_from_worker_offset(G1HeapRegionClosure* cl, uint worker_id, uint max_workers);
+};
+
+// Helper class to claim dirty chunks within the card table for a given region.
+class G1CardTableChunkClaimer {
+ G1CardTableClaimTable* _claim_values;
+
+ uint _region_idx;
+ uint _cur_claim;
+
+public:
+ G1CardTableChunkClaimer(G1CardTableClaimTable* claim_table, uint region_idx);
+
+ inline bool has_next();
+
+ inline uint value() const;
+ inline uint size() const;
+};
+
+// Helper class to locate consecutive dirty cards inside a range of cards.
+class G1ChunkScanner {
+ using Word = size_t;
+ using CardValue = G1CardTable::CardValue;
+
+ CardValue* const _start_card;
+ CardValue* const _end_card;
+
+ static const size_t ExpandedToScanMask = G1CardTable::WordAlreadyScanned;
+ static const size_t ToScanMask = G1CardTable::g1_card_already_scanned;
+
+ inline bool is_card_dirty(const CardValue* const card) const;
+
+ inline bool is_word_aligned(const void* const addr) const;
+
+ inline CardValue* find_first_dirty_card(CardValue* i_card) const;
+ inline CardValue* find_first_non_dirty_card(CardValue* i_card) const;
+
+public:
+ G1ChunkScanner(CardValue* const start_card, CardValue* const end_card);
+
+ template
+ void on_dirty_cards(Func&& f) {
+ for (CardValue* cur_card = _start_card; cur_card < _end_card; /* empty */) {
+ CardValue* dirty_l = find_first_dirty_card(cur_card);
+ CardValue* dirty_r = find_first_non_dirty_card(dirty_l);
+
+ assert(dirty_l <= dirty_r, "inv");
+
+ if (dirty_l == dirty_r) {
+ assert(dirty_r == _end_card, "finished the entire chunk");
+ return;
+ }
+
+ f(dirty_l, dirty_r);
+
+ cur_card = dirty_r + 1;
+ }
+ }
+};
+
+#endif // SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
diff --git a/src/hotspot/share/gc/g1/g1CardTableClaimTable.inline.hpp b/src/hotspot/share/gc/g1/g1CardTableClaimTable.inline.hpp
new file mode 100644
index 00000000000..d682f0d17ae
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1CardTableClaimTable.inline.hpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
+#define SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
+
+#include "gc/g1/g1CardTableClaimTable.hpp"
+
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1HeapRegion.inline.hpp"
+#include "runtime/atomicAccess.hpp"
+
+bool G1CardTableClaimTable::has_unclaimed_cards(uint region) {
+ assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
+ return AtomicAccess::load(&_card_claims[region]) < G1HeapRegion::CardsPerRegion;
+}
+
+void G1CardTableClaimTable::reset_to_unclaimed(uint region) {
+ assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
+ AtomicAccess::store(&_card_claims[region], 0u);
+}
+
+uint G1CardTableClaimTable::claim_cards(uint region, uint increment) {
+ assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
+ return AtomicAccess::fetch_then_add(&_card_claims[region], increment, memory_order_relaxed);
+}
+
+uint G1CardTableClaimTable::claim_chunk(uint region) {
+ assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
+ return AtomicAccess::fetch_then_add(&_card_claims[region], cards_per_chunk(), memory_order_relaxed);
+}
+
+uint G1CardTableClaimTable::claim_all_cards(uint region) {
+ return claim_cards(region, (uint)G1HeapRegion::CardsPerRegion);
+}
+
+uint G1CardTableClaimTable::cards_per_chunk() const { return _cards_per_chunk; }
+
+bool G1CardTableChunkClaimer::has_next() {
+ _cur_claim = _claim_values->claim_chunk(_region_idx);
+ return (_cur_claim < G1HeapRegion::CardsPerRegion);
+}
+
+uint G1CardTableChunkClaimer::value() const { return _cur_claim; }
+uint G1CardTableChunkClaimer::size() const { return _claim_values->cards_per_chunk(); }
+
+bool G1ChunkScanner::is_card_dirty(const CardValue* const card) const {
+ return (*card & ToScanMask) == 0;
+}
+
+bool G1ChunkScanner::is_word_aligned(const void* const addr) const {
+ return ((uintptr_t)addr) % sizeof(Word) == 0;
+}
+
+G1CardTable::CardValue* G1ChunkScanner::find_first_dirty_card(CardValue* i_card) const {
+ while (!is_word_aligned(i_card)) {
+ if (is_card_dirty(i_card)) {
+ return i_card;
+ }
+ i_card++;
+ }
+
+ for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
+ Word word_value = *reinterpret_cast(i_card);
+ bool has_dirty_cards_in_word = (~word_value & ExpandedToScanMask) != 0;
+
+ if (has_dirty_cards_in_word) {
+ for (uint i = 0; i < sizeof(Word); ++i) {
+ if (is_card_dirty(i_card)) {
+ return i_card;
+ }
+ i_card++;
+ }
+ ShouldNotReachHere();
+ }
+ }
+
+ return _end_card;
+}
+
+G1CardTable::CardValue* G1ChunkScanner::find_first_non_dirty_card(CardValue* i_card) const {
+ while (!is_word_aligned(i_card)) {
+ if (!is_card_dirty(i_card)) {
+ return i_card;
+ }
+ i_card++;
+ }
+
+ for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
+ Word word_value = *reinterpret_cast(i_card);
+ bool all_cards_dirty = (word_value & ExpandedToScanMask) == 0;
+
+ if (!all_cards_dirty) {
+ for (uint i = 0; i < sizeof(Word); ++i) {
+ if (!is_card_dirty(i_card)) {
+ return i_card;
+ }
+ i_card++;
+ }
+ ShouldNotReachHere();
+ }
+ }
+
+ return _end_card;
+}
+
+#endif // SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
index 4f7eaa36c2d..ed21c9aa370 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@@ -38,7 +38,6 @@
#include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1EvacStats.inline.hpp"
#include "gc/g1/g1FullCollector.hpp"
#include "gc/g1/g1GCCounters.hpp"
@@ -60,10 +59,10 @@
#include "gc/g1/g1ParScanThreadState.inline.hpp"
#include "gc/g1/g1PeriodicGCTask.hpp"
#include "gc/g1/g1Policy.hpp"
-#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1RegionPinCache.inline.hpp"
#include "gc/g1/g1RegionToSpaceMapper.hpp"
#include "gc/g1/g1RemSet.hpp"
+#include "gc/g1/g1ReviseYoungLengthTask.hpp"
#include "gc/g1/g1RootClosures.hpp"
#include "gc/g1/g1RootProcessor.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
@@ -111,6 +110,7 @@
#include "runtime/init.hpp"
#include "runtime/java.hpp"
#include "runtime/orderAccess.hpp"
+#include "runtime/threads.hpp"
#include "runtime/threadSMR.hpp"
#include "runtime/vmThread.hpp"
#include "utilities/align.hpp"
@@ -146,7 +146,7 @@ void G1CollectedHeap::run_batch_task(G1BatchedTask* cl) {
workers()->run_task(cl, num_workers);
}
-uint G1CollectedHeap::get_chunks_per_region() {
+uint G1CollectedHeap::get_chunks_per_region_for_scan() {
uint log_region_size = G1HeapRegion::LogOfHRGrainBytes;
// Limit the expected input values to current known possible values of the
// (log) region size. Adjust as necessary after testing if changing the permissible
@@ -156,6 +156,18 @@ uint G1CollectedHeap::get_chunks_per_region() {
return 1u << (log_region_size / 2 - 4);
}
+uint G1CollectedHeap::get_chunks_per_region_for_merge() {
+ uint log_region_size = G1HeapRegion::LogOfHRGrainBytes;
+ // Limit the expected input values to current known possible values of the
+ // (log) region size. Adjust as necessary after testing if changing the permissible
+ // values for region size.
+ assert(log_region_size >= 20 && log_region_size <= 29,
+ "expected value in [20,29], but got %u", log_region_size);
+
+ uint half_log_region_size = (log_region_size + 1) / 2;
+ return 1 << (half_log_region_size - 9);
+}
+
G1HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index,
MemRegion mr) {
return new G1HeapRegion(hrs_index, bot(), mr, &_card_set_config);
@@ -614,7 +626,6 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t min_word_size,
assert_heap_not_locked();
if (result != nullptr) {
assert(*actual_word_size != 0, "Actual size must have been set here");
- dirty_young_block(result, *actual_word_size);
} else {
*actual_word_size = 0;
}
@@ -809,11 +820,27 @@ void G1CollectedHeap::prepare_for_mutator_after_full_collection(size_t allocatio
}
void G1CollectedHeap::abort_refinement() {
- // Discard all remembered set updates and reset refinement statistics.
- G1BarrierSet::dirty_card_queue_set().abandon_logs_and_stats();
- assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
- "DCQS should be empty");
- concurrent_refine()->get_and_reset_refinement_stats();
+ G1ConcurrentRefineSweepState& sweep_state = concurrent_refine()->sweep_state();
+ if (sweep_state.is_in_progress()) {
+
+ if (!sweep_state.are_java_threads_synched()) {
+ // Synchronize Java threads with global card table that has already been swapped.
+ class SwapThreadCardTableClosure : public ThreadClosure {
+ public:
+
+ virtual void do_thread(Thread* t) {
+ G1BarrierSet* bs = G1BarrierSet::g1_barrier_set();
+ bs->update_card_table_base(t);
+ }
+ } cl;
+ Threads::java_threads_do(&cl);
+ }
+
+ // Record any available refinement statistics.
+ policy()->record_refinement_stats(sweep_state.stats());
+ sweep_state.complete_work(false /* concurrent */, false /* print_log */);
+ }
+ sweep_state.reset_stats();
}
void G1CollectedHeap::verify_after_full_collection() {
@@ -825,6 +852,7 @@ void G1CollectedHeap::verify_after_full_collection() {
}
_hrm.verify_optional();
_verifier->verify_region_sets_optional();
+ _verifier->verify_card_tables_clean(true /* both_card_tables */);
_verifier->verify_after_gc();
_verifier->verify_bitmap_clear(false /* above_tams_only */);
@@ -1168,8 +1196,13 @@ G1CollectedHeap::G1CollectedHeap() :
_service_thread(nullptr),
_periodic_gc_task(nullptr),
_free_arena_memory_task(nullptr),
+ _revise_young_length_task(nullptr),
_workers(nullptr),
- _card_table(nullptr),
+ _refinement_epoch(0),
+ _last_synchronized_start(0),
+ _last_refinement_epoch_start(0),
+ _yield_duration_in_refinement_epoch(0),
+ _last_safepoint_refinement_epoch(0),
_collection_pause_end(Ticks::now()),
_old_set("Old Region Set", new OldRegionSetChecker()),
_humongous_set("Humongous Region Set", new HumongousRegionSetChecker()),
@@ -1200,7 +1233,7 @@ G1CollectedHeap::G1CollectedHeap() :
_rem_set(nullptr),
_card_set_config(),
_card_set_freelist_pool(G1CardSetConfiguration::num_mem_object_types()),
- _young_regions_cset_group(card_set_config(), &_card_set_freelist_pool, 1u /* group_id */),
+ _young_regions_cset_group(card_set_config(), &_card_set_freelist_pool, G1CSetCandidateGroup::YoungRegionId),
_cm(nullptr),
_cm_thread(nullptr),
_cr(nullptr),
@@ -1289,7 +1322,7 @@ G1RegionToSpaceMapper* G1CollectedHeap::create_aux_memory_mapper(const char* des
jint G1CollectedHeap::initialize_concurrent_refinement() {
jint ecode = JNI_OK;
- _cr = G1ConcurrentRefine::create(policy(), &ecode);
+ _cr = G1ConcurrentRefine::create(this, &ecode);
return ecode;
}
@@ -1345,18 +1378,12 @@ jint G1CollectedHeap::initialize() {
initialize_reserved_region(heap_rs);
// Create the barrier set for the entire reserved region.
- G1CardTable* ct = new G1CardTable(_reserved);
- G1BarrierSet* bs = new G1BarrierSet(ct);
+ G1CardTable* card_table = new G1CardTable(_reserved);
+ G1CardTable* refinement_table = new G1CardTable(_reserved);
+
+ G1BarrierSet* bs = new G1BarrierSet(card_table, refinement_table);
bs->initialize();
assert(bs->is_a(BarrierSet::G1BarrierSet), "sanity");
- BarrierSet::set_barrier_set(bs);
- _card_table = ct;
-
- {
- G1SATBMarkQueueSet& satbqs = bs->satb_mark_queue_set();
- satbqs.set_process_completed_buffers_threshold(G1SATBProcessCompletedThreshold);
- satbqs.set_buffer_enqueue_threshold_percentage(G1SATBBufferEnqueueingThresholdPercent);
- }
// Create space mappers.
size_t page_size = heap_rs.page_size();
@@ -1391,12 +1418,26 @@ jint G1CollectedHeap::initialize() {
G1CardTable::compute_size(heap_rs.size() / HeapWordSize),
G1CardTable::heap_map_factor());
+ G1RegionToSpaceMapper* refinement_cards_storage =
+ create_aux_memory_mapper("Refinement Card Table",
+ G1CardTable::compute_size(heap_rs.size() / HeapWordSize),
+ G1CardTable::heap_map_factor());
+
size_t bitmap_size = G1CMBitMap::compute_size(heap_rs.size());
G1RegionToSpaceMapper* bitmap_storage =
create_aux_memory_mapper("Mark Bitmap", bitmap_size, G1CMBitMap::heap_map_factor());
- _hrm.initialize(heap_storage, bitmap_storage, bot_storage, cardtable_storage);
- _card_table->initialize(cardtable_storage);
+ _hrm.initialize(heap_storage, bitmap_storage, bot_storage, cardtable_storage, refinement_cards_storage);
+ card_table->initialize(cardtable_storage);
+ refinement_table->initialize(refinement_cards_storage);
+
+ BarrierSet::set_barrier_set(bs);
+
+ {
+ G1SATBMarkQueueSet& satbqs = bs->satb_mark_queue_set();
+ satbqs.set_process_completed_buffers_threshold(G1SATBProcessCompletedThreshold);
+ satbqs.set_buffer_enqueue_threshold_percentage(G1SATBBufferEnqueueingThresholdPercent);
+ }
// 6843694 - ensure that the maximum region index can fit
// in the remembered set structures.
@@ -1408,7 +1449,7 @@ jint G1CollectedHeap::initialize() {
guarantee((uintptr_t)(heap_rs.base()) >= G1CardTable::card_size(), "Java heap must not start within the first card.");
G1FromCardCache::initialize(max_num_regions());
// Also create a G1 rem set.
- _rem_set = new G1RemSet(this, _card_table);
+ _rem_set = new G1RemSet(this);
_rem_set->initialize(max_num_regions());
size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1;
@@ -1467,6 +1508,11 @@ jint G1CollectedHeap::initialize() {
_free_arena_memory_task = new G1MonotonicArenaFreeMemoryTask("Card Set Free Memory Task");
_service_thread->register_task(_free_arena_memory_task);
+ if (policy()->use_adaptive_young_list_length()) {
+ _revise_young_length_task = new G1ReviseYoungLengthTask("Revise Young Length List Task");
+ _service_thread->register_task(_revise_young_length_task);
+ }
+
// Here we allocate the dummy G1HeapRegion that is required by the
// G1AllocRegion class.
G1HeapRegion* dummy_region = _hrm.get_dummy_region();
@@ -1495,6 +1541,7 @@ jint G1CollectedHeap::initialize() {
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_parallel_workers);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_mark);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_refine);
+ CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_refine_control);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_service);
G1InitLogger::print();
@@ -1519,12 +1566,35 @@ void G1CollectedHeap::stop() {
void G1CollectedHeap::safepoint_synchronize_begin() {
SuspendibleThreadSet::synchronize();
+
+ _last_synchronized_start = os::elapsed_counter();
}
void G1CollectedHeap::safepoint_synchronize_end() {
+ jlong now = os::elapsed_counter();
+ jlong synchronize_duration = now - _last_synchronized_start;
+
+ if (_last_safepoint_refinement_epoch == _refinement_epoch) {
+ _yield_duration_in_refinement_epoch += synchronize_duration;
+ } else {
+ _last_refinement_epoch_start = now;
+ _last_safepoint_refinement_epoch = _refinement_epoch;
+ _yield_duration_in_refinement_epoch = 0;
+ }
+
SuspendibleThreadSet::desynchronize();
}
+void G1CollectedHeap::set_last_refinement_epoch_start(jlong epoch_start, jlong last_yield_duration) {
+ _last_refinement_epoch_start = epoch_start;
+ guarantee(_yield_duration_in_refinement_epoch >= last_yield_duration, "should be");
+ _yield_duration_in_refinement_epoch -= last_yield_duration;
+}
+
+jlong G1CollectedHeap::yield_duration_in_refinement_epoch() {
+ return _yield_duration_in_refinement_epoch;
+}
+
void G1CollectedHeap::post_initialize() {
CollectedHeap::post_initialize();
ref_processing_init();
@@ -2336,6 +2406,7 @@ void G1CollectedHeap::gc_epilogue(bool full) {
&_collection_set_candidates_card_set_stats);
update_perf_counter_cpu_time();
+ _refinement_epoch++;
}
uint G1CollectedHeap::uncommit_regions(uint region_limit) {
@@ -2468,7 +2539,6 @@ void G1CollectedHeap::verify_before_young_collection(G1HeapVerifier::G1VerifyTyp
Ticks start = Ticks::now();
_verifier->prepare_for_verify();
_verifier->verify_region_sets_optional();
- _verifier->verify_dirty_young_regions();
_verifier->verify_before_gc();
verify_numa_regions("GC Start");
phase_times()->record_verify_before_time_ms((Ticks::now() - start).seconds() * MILLIUNITS);
@@ -2734,6 +2804,11 @@ void G1CollectedHeap::free_region(G1HeapRegion* hr, G1FreeRegionList* free_list)
if (free_list != nullptr) {
free_list->add_ordered(hr);
}
+ if (VerifyDuringGC) {
+ // Card and refinement table must be clear for freed regions.
+ card_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
+ refinement_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
+ }
}
void G1CollectedHeap::retain_region(G1HeapRegion* hr) {
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
index 0bb16edaf78..43839cc48d5 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
@@ -50,7 +50,6 @@
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/gcHeapSummary.hpp"
#include "gc/shared/plab.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "gc/shared/taskqueue.hpp"
#include "memory/allocation.hpp"
#include "memory/iterator.hpp"
@@ -76,6 +75,7 @@ class G1GCPhaseTimes;
class G1HeapSizingPolicy;
class G1NewTracer;
class G1RemSet;
+class G1ReviseYoungLengthTask;
class G1ServiceTask;
class G1ServiceThread;
class GCMemoryManager;
@@ -172,9 +172,23 @@ private:
G1ServiceThread* _service_thread;
G1ServiceTask* _periodic_gc_task;
G1MonotonicArenaFreeMemoryTask* _free_arena_memory_task;
+ G1ReviseYoungLengthTask* _revise_young_length_task;
WorkerThreads* _workers;
- G1CardTable* _card_table;
+
+ // The current epoch for refinement, i.e. the number of times the card tables
+ // have been swapped by a garbage collection.
+ // Used for detecting whether concurrent refinement has been interrupted by a
+ // garbage collection.
+ size_t _refinement_epoch;
+
+ // The following members are for tracking safepoint durations between garbage
+ // collections.
+ jlong _last_synchronized_start;
+
+ jlong _last_refinement_epoch_start;
+ jlong _yield_duration_in_refinement_epoch; // Time spent in safepoints since beginning of last refinement epoch.
+ size_t _last_safepoint_refinement_epoch; // Refinement epoch before last safepoint.
Ticks _collection_pause_end;
@@ -542,12 +556,17 @@ public:
void run_batch_task(G1BatchedTask* cl);
// Return "optimal" number of chunks per region we want to use for claiming areas
- // within a region to claim.
+ // within a region to claim during card table scanning.
// The returned value is a trade-off between granularity of work distribution and
// memory usage and maintenance costs of that table.
// Testing showed that 64 for 1M/2M region, 128 for 4M/8M regions, 256 for 16/32M regions,
// and so on seems to be such a good trade-off.
- static uint get_chunks_per_region();
+ static uint get_chunks_per_region_for_scan();
+ // Return "optimal" number of chunks per region we want to use for claiming areas
+ // within a region to claim during card table merging.
+ // This is much smaller than for scanning as the merge work is much smaller.
+ // Currently 1 for 1M regions, 2 for 2/4M regions, 4 for 8/16M regions and so on.
+ static uint get_chunks_per_region_for_merge();
G1Allocator* allocator() {
return _allocator;
@@ -688,11 +707,6 @@ public:
// Add the given region to the retained regions collection set candidates.
void retain_region(G1HeapRegion* hr);
- // It dirties the cards that cover the block so that the post
- // write barrier never queues anything when updating objects on this
- // block. It is assumed (and in fact we assert) that the block
- // belongs to a young region.
- inline void dirty_young_block(HeapWord* start, size_t word_size);
// Frees a humongous region by collapsing it into individual regions
// and calling free_region() for each of them. The freed regions
@@ -906,6 +920,10 @@ public:
void safepoint_synchronize_begin() override;
void safepoint_synchronize_end() override;
+ jlong last_refinement_epoch_start() const { return _last_refinement_epoch_start; }
+ void set_last_refinement_epoch_start(jlong epoch_start, jlong last_yield_duration);
+ jlong yield_duration_in_refinement_epoch();
+
// Does operations required after initialization has been done.
void post_initialize() override;
@@ -1070,7 +1088,16 @@ public:
}
G1CardTable* card_table() const {
- return _card_table;
+ return static_cast(G1BarrierSet::g1_barrier_set()->card_table());
+ }
+
+ G1CardTable* refinement_table() const {
+ return G1BarrierSet::g1_barrier_set()->refinement_table();
+ }
+
+ G1CardTable::CardValue* card_table_base() const {
+ assert(card_table() != nullptr, "must be");
+ return card_table()->byte_map_base();
}
// Iteration functions.
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
index 3370ff9938f..fdc8585dbc0 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
@@ -149,30 +149,6 @@ inline void G1CollectedHeap::old_set_remove(G1HeapRegion* hr) {
_old_set.remove(hr);
}
-// It dirties the cards that cover the block so that the post
-// write barrier never queues anything when updating objects on this
-// block. It is assumed (and in fact we assert) that the block
-// belongs to a young region.
-inline void
-G1CollectedHeap::dirty_young_block(HeapWord* start, size_t word_size) {
- assert_heap_not_locked();
-
- // Assign the containing region to containing_hr so that we don't
- // have to keep calling heap_region_containing() in the
- // asserts below.
- DEBUG_ONLY(G1HeapRegion* containing_hr = heap_region_containing(start);)
- assert(word_size > 0, "pre-condition");
- assert(containing_hr->is_in(start), "it should contain start");
- assert(containing_hr->is_young(), "it should be young");
- assert(!containing_hr->is_humongous(), "it should not be humongous");
-
- HeapWord* end = start + word_size;
- assert(containing_hr->is_in(end - 1), "it should also contain end - 1");
-
- MemRegion mr(start, end);
- card_table()->g1_mark_as_young(mr);
-}
-
inline G1ScannerTasksQueueSet* G1CollectedHeap::task_queues() const {
return _task_queues;
}
diff --git a/src/hotspot/share/gc/g1/g1CollectionSet.cpp b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
index d501ee5b47b..abfb620d626 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
@@ -308,7 +308,8 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
guarantee(target_pause_time_ms > 0.0,
"target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
- size_t pending_cards = _policy->pending_cards_at_gc_start();
+ bool in_young_only_phase = _policy->collector_state()->in_young_only_phase();
+ size_t pending_cards = _policy->analytics()->predict_pending_cards(in_young_only_phase);
log_trace(gc, ergo, cset)("Start choosing CSet. Pending cards: %zu target pause time: %1.2fms",
pending_cards, target_pause_time_ms);
@@ -323,10 +324,8 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
verify_young_cset_indices();
- size_t num_young_cards = _g1h->young_regions_cardset()->occupied();
- _policy->record_card_rs_length(num_young_cards);
-
- double predicted_base_time_ms = _policy->predict_base_time_ms(pending_cards, num_young_cards);
+ size_t card_rs_length = _policy->analytics()->predict_card_rs_length(in_young_only_phase);
+ double predicted_base_time_ms = _policy->predict_base_time_ms(pending_cards, card_rs_length);
// Base time already includes the whole remembered set related time, so do not add that here
// again.
double predicted_eden_time = _policy->predict_young_region_other_time_ms(eden_region_length) +
diff --git a/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp b/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp
index ccb52922c09..47340fad768 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSetCandidates.cpp
@@ -27,7 +27,7 @@
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "utilities/growableArray.hpp"
-uint G1CSetCandidateGroup::_next_group_id = 2;
+uint G1CSetCandidateGroup::_next_group_id = G1CSetCandidateGroup::InitialId;
G1CSetCandidateGroup::G1CSetCandidateGroup(G1CardSetConfiguration* config, G1MonotonicArenaFreePool* card_set_freelist_pool, uint group_id) :
_candidates(4, mtGCCardSet),
diff --git a/src/hotspot/share/gc/g1/g1CollectionSetCandidates.hpp b/src/hotspot/share/gc/g1/g1CollectionSetCandidates.hpp
index 02a4d5f6d76..0f4e92968fa 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSetCandidates.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSetCandidates.hpp
@@ -73,14 +73,21 @@ class G1CSetCandidateGroup : public CHeapObj{
size_t _reclaimable_bytes;
double _gc_efficiency;
- // The _group_id is primarily used when printing out per-region liveness information,
- // making it easier to associate regions with their assigned G1CSetCandidateGroup, if any.
- // Note:
- // * _group_id 0 is reserved for special G1CSetCandidateGroups that hold only a single region,
- // such as G1CSetCandidateGroups for retained regions.
- // * _group_id 1 is reserved for the G1CSetCandidateGroup that contains all young regions.
+public:
+ // The _group_id uniquely identifies a candidate group when printing, making it
+ // easier to associate regions with their assigned G1CSetCandidateGroup, if any.
+ // Special values for the id:
+ // * id 0 is reserved for regions that do not have a remembered set.
+ // * id 1 is reserved for the G1CollectionSetCandidate that contains all young regions.
+ // * other ids are handed out incrementally, starting from InitialId.
+ static const uint NoRemSetId = 0;
+ static const uint YoungRegionId = 1;
+ static const uint InitialId = 2;
+
+private:
const uint _group_id;
static uint _next_group_id;
+
public:
G1CSetCandidateGroup();
G1CSetCandidateGroup(G1CardSetConfiguration* config, G1MonotonicArenaFreePool* card_set_freelist_pool, uint group_id);
@@ -95,8 +102,6 @@ public:
G1CardSet* card_set() { return &_card_set; }
const G1CardSet* card_set() const { return &_card_set; }
- uint group_id() const { return _group_id; }
-
void calculate_efficiency();
double liveness_percent() const;
@@ -127,8 +132,10 @@ public:
return _candidates.end();
}
+ uint group_id() const { return _group_id; }
+
static void reset_next_group_id() {
- _next_group_id = 2;
+ _next_group_id = InitialId;
}
};
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
index 6d30a93dafb..97386cb9720 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
@@ -27,13 +27,15 @@
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BatchedTask.hpp"
#include "gc/g1/g1CardSetMemory.hpp"
+#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectionSetChooser.hpp"
#include "gc/g1/g1CollectorState.hpp"
#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
#include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
#include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/g1/g1HeapRegionManager.hpp"
#include "gc/g1/g1HeapRegionPrinter.hpp"
@@ -482,7 +484,7 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
// _finger set in set_non_marking_state
- _worker_id_offset(G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
+ _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
_max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
// _num_active_tasks set in set_non_marking_state()
// _tasks set inside the constructor
@@ -1140,7 +1142,7 @@ void G1ConcurrentMark::mark_from_roots() {
// worker threads may currently exist and more may not be
// available.
active_workers = _concurrent_workers->set_active_workers(active_workers);
- log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->max_workers());
+ log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
_num_concurrent_workers = active_workers;
@@ -1186,179 +1188,6 @@ void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
}
}
-// Update per-region liveness info based on CM stats. Then, reclaim empty
-// regions right away and select certain regions (e.g. sparse ones) for remset
-// rebuild.
-class G1UpdateRegionLivenessAndSelectForRebuildTask : public WorkerTask {
- G1CollectedHeap* _g1h;
- G1ConcurrentMark* _cm;
- G1HeapRegionClaimer _hrclaimer;
-
- uint volatile _total_selected_for_rebuild;
-
- // Reclaimed empty regions
- G1FreeRegionList _cleanup_list;
-
- struct G1OnRegionClosure : public G1HeapRegionClosure {
- G1CollectedHeap* _g1h;
- G1ConcurrentMark* _cm;
- // The number of regions actually selected for rebuild.
- uint _num_selected_for_rebuild;
-
- size_t _freed_bytes;
- uint _num_old_regions_removed;
- uint _num_humongous_regions_removed;
- G1FreeRegionList* _local_cleanup_list;
-
- G1OnRegionClosure(G1CollectedHeap* g1h,
- G1ConcurrentMark* cm,
- G1FreeRegionList* local_cleanup_list) :
- _g1h(g1h),
- _cm(cm),
- _num_selected_for_rebuild(0),
- _freed_bytes(0),
- _num_old_regions_removed(0),
- _num_humongous_regions_removed(0),
- _local_cleanup_list(local_cleanup_list) {}
-
- void reclaim_empty_region(G1HeapRegion* hr) {
- assert(!hr->has_pinned_objects(), "precondition");
- assert(hr->used() > 0, "precondition");
-
- _freed_bytes += hr->used();
- hr->set_containing_set(nullptr);
- hr->clear_cardtable();
- _cm->clear_statistics(hr);
- G1HeapRegionPrinter::mark_reclaim(hr);
- }
-
- void reclaim_empty_humongous_region(G1HeapRegion* hr) {
- assert(hr->is_starts_humongous(), "precondition");
-
- auto on_humongous_region = [&] (G1HeapRegion* hr) {
- assert(hr->is_humongous(), "precondition");
-
- reclaim_empty_region(hr);
- _num_humongous_regions_removed++;
- _g1h->free_humongous_region(hr, _local_cleanup_list);
- };
-
- _g1h->humongous_obj_regions_iterate(hr, on_humongous_region);
- }
-
- void reclaim_empty_old_region(G1HeapRegion* hr) {
- assert(hr->is_old(), "precondition");
-
- reclaim_empty_region(hr);
- _num_old_regions_removed++;
- _g1h->free_region(hr, _local_cleanup_list);
- }
-
- bool do_heap_region(G1HeapRegion* hr) override {
- G1RemSetTrackingPolicy* tracker = _g1h->policy()->remset_tracker();
- if (hr->is_starts_humongous()) {
- // The liveness of this humongous obj decided by either its allocation
- // time (allocated after conc-mark-start, i.e. live) or conc-marking.
- const bool is_live = _cm->top_at_mark_start(hr) == hr->bottom()
- || _cm->contains_live_object(hr->hrm_index())
- || hr->has_pinned_objects();
- if (is_live) {
- const bool selected_for_rebuild = tracker->update_humongous_before_rebuild(hr);
- auto on_humongous_region = [&] (G1HeapRegion* hr) {
- if (selected_for_rebuild) {
- _num_selected_for_rebuild++;
- }
- _cm->update_top_at_rebuild_start(hr);
- };
-
- _g1h->humongous_obj_regions_iterate(hr, on_humongous_region);
- } else {
- reclaim_empty_humongous_region(hr);
- }
- } else if (hr->is_old()) {
- uint region_idx = hr->hrm_index();
- hr->note_end_of_marking(_cm->top_at_mark_start(hr), _cm->live_bytes(region_idx), _cm->incoming_refs(region_idx));
-
- const bool is_live = hr->live_bytes() != 0
- || hr->has_pinned_objects();
- if (is_live) {
- const bool selected_for_rebuild = tracker->update_old_before_rebuild(hr);
- if (selected_for_rebuild) {
- _num_selected_for_rebuild++;
- }
- _cm->update_top_at_rebuild_start(hr);
- } else {
- reclaim_empty_old_region(hr);
- }
- }
-
- return false;
- }
- };
-
-public:
- G1UpdateRegionLivenessAndSelectForRebuildTask(G1CollectedHeap* g1h,
- G1ConcurrentMark* cm,
- uint num_workers) :
- WorkerTask("G1 Update Region Liveness and Select For Rebuild"),
- _g1h(g1h),
- _cm(cm),
- _hrclaimer(num_workers),
- _total_selected_for_rebuild(0),
- _cleanup_list("Empty Regions After Mark List") {}
-
- ~G1UpdateRegionLivenessAndSelectForRebuildTask() {
- if (!_cleanup_list.is_empty()) {
- log_debug(gc)("Reclaimed %u empty regions", _cleanup_list.length());
- // And actually make them available.
- _g1h->prepend_to_freelist(&_cleanup_list);
- }
- }
-
- void work(uint worker_id) override {
- G1FreeRegionList local_cleanup_list("Local Cleanup List");
- G1OnRegionClosure on_region_cl(_g1h, _cm, &local_cleanup_list);
- _g1h->heap_region_par_iterate_from_worker_offset(&on_region_cl, &_hrclaimer, worker_id);
-
- AtomicAccess::add(&_total_selected_for_rebuild, on_region_cl._num_selected_for_rebuild);
-
- // Update the old/humongous region sets
- _g1h->remove_from_old_gen_sets(on_region_cl._num_old_regions_removed,
- on_region_cl._num_humongous_regions_removed);
-
- {
- MutexLocker x(G1RareEvent_lock, Mutex::_no_safepoint_check_flag);
- _g1h->decrement_summary_bytes(on_region_cl._freed_bytes);
-
- _cleanup_list.add_ordered(&local_cleanup_list);
- assert(local_cleanup_list.is_empty(), "post-condition");
- }
- }
-
- uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; }
-
- static uint desired_num_workers(uint num_regions) {
- const uint num_regions_per_worker = 384;
- return (num_regions + num_regions_per_worker - 1) / num_regions_per_worker;
- }
-};
-
-class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
- G1CollectedHeap* _g1h;
-
-public:
- G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) :
- _g1h(g1h) {
- }
-
- virtual bool do_heap_region(G1HeapRegion* r) {
- // Update the remset tracking state from updating to complete
- // if remembered sets have been rebuilt.
- _g1h->policy()->remset_tracker()->update_after_rebuild(r);
- return false;
- }
-};
-
class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
G1CollectedHeap* _g1h;
public:
@@ -1506,6 +1335,20 @@ void G1ConcurrentMark::compute_new_sizes() {
_g1h->monitoring_support()->update_sizes();
}
+class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
+ G1CollectedHeap* _g1h;
+
+public:
+ G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
+
+ bool do_heap_region(G1HeapRegion* r) override {
+ // Update the remset tracking state from updating to complete
+ // if remembered sets have been rebuilt.
+ _g1h->policy()->remset_tracker()->update_after_rebuild(r);
+ return false;
+ }
+};
+
void G1ConcurrentMark::cleanup() {
assert_at_safepoint_on_vm_thread();
@@ -1809,6 +1652,8 @@ class G1RemarkThreadsClosure : public ThreadClosure {
};
class G1CMRemarkTask : public WorkerTask {
+ // For Threads::possibly_parallel_threads_do
+ ThreadsClaimTokenScope _threads_claim_token_scope;
G1ConcurrentMark* _cm;
public:
void work(uint worker_id) {
@@ -1832,7 +1677,7 @@ public:
}
G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
- WorkerTask("Par Remark"), _cm(cm) {
+ WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
_cm->terminator()->reset_for_reuse(active_workers);
}
};
@@ -1851,8 +1696,6 @@ void G1ConcurrentMark::finalize_marking() {
// through the task.
{
- StrongRootsScope srs(active_workers);
-
G1CMRemarkTask remarkTask(this, active_workers);
// We will start all available threads, even if we decide that the
// active_workers will be fewer. The extra ones will just bail out
@@ -3052,11 +2895,9 @@ bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
size_t remset_bytes = r->rem_set()->mem_size();
size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
const char* remset_type = r->rem_set()->get_short_state_str();
- uint cset_group_id = 0;
-
- if (r->rem_set()->has_cset_group()) {
- cset_group_id = r->rem_set()->cset_group_id();
- }
+ uint cset_group_id = r->rem_set()->has_cset_group()
+ ? r->rem_set()->cset_group_id()
+ : G1CSetCandidateGroup::NoRemSetId;
_total_used_bytes += used_bytes;
_total_capacity_bytes += capacity_bytes;
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
index 4977da4729d..752082ce629 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
@@ -580,6 +580,8 @@ public:
// TARS for the given region during remembered set rebuilding.
inline HeapWord* top_at_rebuild_start(G1HeapRegion* r) const;
+ uint worker_id_offset() const { return _worker_id_offset; }
+
// Clear statistics gathered during the concurrent cycle for the given region after
// it has been reclaimed.
void clear_statistics(G1HeapRegion* r);
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMarkRemarkTasks.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMarkRemarkTasks.cpp
new file mode 100644
index 00000000000..fdef4214622
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMarkRemarkTasks.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
+#include "gc/g1/g1ConcurrentRefine.hpp"
+#include "gc/g1/g1HeapRegion.inline.hpp"
+#include "gc/g1/g1HeapRegionPrinter.hpp"
+#include "gc/g1/g1RemSetTrackingPolicy.hpp"
+#include "logging/log.hpp"
+#include "runtime/atomicAccess.hpp"
+#include "runtime/mutexLocker.hpp"
+
+struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public G1HeapRegionClosure {
+ G1CollectedHeap* _g1h;
+ G1ConcurrentMark* _cm;
+ // The number of regions actually selected for rebuild.
+ uint _num_selected_for_rebuild;
+
+ size_t _freed_bytes;
+ uint _num_old_regions_removed;
+ uint _num_humongous_regions_removed;
+ G1FreeRegionList* _local_cleanup_list;
+
+ G1OnRegionClosure(G1CollectedHeap* g1h,
+ G1ConcurrentMark* cm,
+ G1FreeRegionList* local_cleanup_list) :
+ _g1h(g1h),
+ _cm(cm),
+ _num_selected_for_rebuild(0),
+ _freed_bytes(0),
+ _num_old_regions_removed(0),
+ _num_humongous_regions_removed(0),
+ _local_cleanup_list(local_cleanup_list) {}
+
+ void reclaim_empty_region_common(G1HeapRegion* hr) {
+ assert(!hr->has_pinned_objects(), "precondition");
+ assert(hr->used() > 0, "precondition");
+
+ _freed_bytes += hr->used();
+ hr->set_containing_set(nullptr);
+ hr->clear_both_card_tables();
+ _cm->clear_statistics(hr);
+ G1HeapRegionPrinter::mark_reclaim(hr);
+ _g1h->concurrent_refine()->notify_region_reclaimed(hr);
+ }
+
+ void reclaim_empty_humongous_region(G1HeapRegion* hr) {
+ assert(hr->is_starts_humongous(), "precondition");
+
+ auto on_humongous_region = [&] (G1HeapRegion* hr) {
+ assert(hr->is_humongous(), "precondition");
+
+ _num_humongous_regions_removed++;
+ reclaim_empty_region_common(hr);
+ _g1h->free_humongous_region(hr, _local_cleanup_list);
+ };
+
+ _g1h->humongous_obj_regions_iterate(hr, on_humongous_region);
+ }
+
+ void reclaim_empty_old_region(G1HeapRegion* hr) {
+ assert(hr->is_old(), "precondition");
+
+ _num_old_regions_removed++;
+ reclaim_empty_region_common(hr);
+ _g1h->free_region(hr, _local_cleanup_list);
+ }
+
+ bool do_heap_region(G1HeapRegion* hr) override {
+ G1RemSetTrackingPolicy* tracker = _g1h->policy()->remset_tracker();
+ if (hr->is_starts_humongous()) {
+ // The liveness of this humongous obj decided by either its allocation
+ // time (allocated after conc-mark-start, i.e. live) or conc-marking.
+ const bool is_live = _cm->top_at_mark_start(hr) == hr->bottom()
+ || _cm->contains_live_object(hr->hrm_index())
+ || hr->has_pinned_objects();
+ if (is_live) {
+ const bool selected_for_rebuild = tracker->update_humongous_before_rebuild(hr);
+ auto on_humongous_region = [&] (G1HeapRegion* hr) {
+ if (selected_for_rebuild) {
+ _num_selected_for_rebuild++;
+ }
+ _cm->update_top_at_rebuild_start(hr);
+ };
+
+ _g1h->humongous_obj_regions_iterate(hr, on_humongous_region);
+ } else {
+ reclaim_empty_humongous_region(hr);
+ }
+ } else if (hr->is_old()) {
+ uint region_idx = hr->hrm_index();
+ hr->note_end_of_marking(_cm->top_at_mark_start(hr), _cm->live_bytes(region_idx), _cm->incoming_refs(region_idx));
+
+ const bool is_live = hr->live_bytes() != 0
+ || hr->has_pinned_objects();
+ if (is_live) {
+ const bool selected_for_rebuild = tracker->update_old_before_rebuild(hr);
+ if (selected_for_rebuild) {
+ _num_selected_for_rebuild++;
+ }
+ _cm->update_top_at_rebuild_start(hr);
+ } else {
+ reclaim_empty_old_region(hr);
+ }
+ }
+
+ return false;
+ }
+};
+
+G1UpdateRegionLivenessAndSelectForRebuildTask::G1UpdateRegionLivenessAndSelectForRebuildTask(G1CollectedHeap* g1h,
+ G1ConcurrentMark* cm,
+ uint num_workers) :
+ WorkerTask("G1 Update Region Liveness and Select For Rebuild"),
+ _g1h(g1h),
+ _cm(cm),
+ _hrclaimer(num_workers),
+ _total_selected_for_rebuild(0),
+ _cleanup_list("Empty Regions After Mark List") {}
+
+G1UpdateRegionLivenessAndSelectForRebuildTask::~G1UpdateRegionLivenessAndSelectForRebuildTask() {
+ if (!_cleanup_list.is_empty()) {
+ log_debug(gc)("Reclaimed %u empty regions", _cleanup_list.length());
+ // And actually make them available.
+ _g1h->prepend_to_freelist(&_cleanup_list);
+ }
+}
+
+void G1UpdateRegionLivenessAndSelectForRebuildTask::work(uint worker_id) {
+ G1FreeRegionList local_cleanup_list("Local Cleanup List");
+ G1OnRegionClosure on_region_cl(_g1h, _cm, &local_cleanup_list);
+ _g1h->heap_region_par_iterate_from_worker_offset(&on_region_cl, &_hrclaimer, worker_id);
+
+ AtomicAccess::add(&_total_selected_for_rebuild, on_region_cl._num_selected_for_rebuild);
+
+ // Update the old/humongous region sets
+ _g1h->remove_from_old_gen_sets(on_region_cl._num_old_regions_removed,
+ on_region_cl._num_humongous_regions_removed);
+
+ {
+ MutexLocker x(G1RareEvent_lock, Mutex::_no_safepoint_check_flag);
+ _g1h->decrement_summary_bytes(on_region_cl._freed_bytes);
+
+ _cleanup_list.add_ordered(&local_cleanup_list);
+ assert(local_cleanup_list.is_empty(), "post-condition");
+ }
+}
+
+uint G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(uint num_regions) {
+ const uint num_regions_per_worker = 384;
+ return (num_regions + num_regions_per_worker - 1) / num_regions_per_worker;
+}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMarkRemarkTasks.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMarkRemarkTasks.hpp
new file mode 100644
index 00000000000..161f0b4b9f5
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMarkRemarkTasks.hpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1CONCURRENTMARKREMARKTASKS_HPP
+#define SHARE_GC_G1_G1CONCURRENTMARKREMARKTASKS_HPP
+
+#include "gc/g1/g1HeapRegion.hpp"
+#include "gc/g1/g1HeapRegionManager.hpp"
+#include "gc/g1/g1HeapRegionSet.hpp"
+#include "gc/shared/workerThread.hpp"
+
+class G1CollectedHeap;
+class G1ConcurrentMark;
+
+// Update per-region liveness info based on CM stats. Then, reclaim empty
+// regions right away and select certain regions (e.g. sparse ones) for remset
+// rebuild.
+class G1UpdateRegionLivenessAndSelectForRebuildTask : public WorkerTask {
+ G1CollectedHeap* _g1h;
+ G1ConcurrentMark* _cm;
+ G1HeapRegionClaimer _hrclaimer;
+
+ uint volatile _total_selected_for_rebuild;
+
+ // Reclaimed empty regions
+ G1FreeRegionList _cleanup_list;
+
+ struct G1OnRegionClosure;
+
+public:
+ G1UpdateRegionLivenessAndSelectForRebuildTask(G1CollectedHeap* g1h,
+ G1ConcurrentMark* cm,
+ uint num_workers);
+
+ ~G1UpdateRegionLivenessAndSelectForRebuildTask();
+
+ void work(uint worker_id) override;
+
+ uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; }
+
+ static uint desired_num_workers(uint num_regions);
+};
+
+#endif /* SHARE_GC_G1_G1CONCURRENTMARKREMARKTASKS_HPP */
+
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRebuildAndScrub.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRebuildAndScrub.cpp
index 0633e18411d..cd560a41333 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRebuildAndScrub.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRebuildAndScrub.cpp
@@ -245,7 +245,7 @@ class G1RebuildRSAndScrubTask : public WorkerTask {
G1RebuildRSAndScrubRegionClosure(G1ConcurrentMark* cm, bool should_rebuild_remset, uint worker_id) :
_cm(cm),
_bitmap(_cm->mark_bitmap()),
- _rebuild_closure(G1CollectedHeap::heap(), worker_id),
+ _rebuild_closure(G1CollectedHeap::heap(), worker_id + cm->worker_id_offset()),
_should_rebuild_remset(should_rebuild_remset),
_processed_words(0) { }
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
index 84776b7a4b1..ed6a9ad4292 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
@@ -22,15 +22,20 @@
*
*/
+#include "gc/g1/g1Analytics.hpp"
#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1CardTableClaimTable.inline.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectionSet.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
+#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/shared/gc_globals.hpp"
+#include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/workerThread.hpp"
#include "logging/log.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/iterator.hpp"
@@ -38,17 +43,15 @@
#include "runtime/mutexLocker.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/ticks.hpp"
#include
-G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
+G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread() {
G1ConcurrentRefineThread* result = nullptr;
- if (initializing || !InjectGCWorkerCreationFailure) {
- result = G1ConcurrentRefineThread::create(_cr, worker_id);
- }
+ result = G1ConcurrentRefineThread::create(_cr);
if (result == nullptr || result->osthread() == nullptr) {
- log_warning(gc)("Failed to create refinement thread %u, no more %s",
- worker_id,
+ log_warning(gc)("Failed to create refinement control thread, no more %s",
result == nullptr ? "memory" : "OS threads");
if (result != nullptr) {
delete result;
@@ -60,106 +63,392 @@ G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thr
G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl(uint max_num_threads) :
_cr(nullptr),
- _threads(max_num_threads)
+ _control_thread(nullptr),
+ _workers(nullptr),
+ _max_num_threads(max_num_threads)
{}
G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
- while (_threads.is_nonempty()) {
- delete _threads.pop();
- }
-}
-
-bool G1ConcurrentRefineThreadControl::ensure_threads_created(uint worker_id, bool initializing) {
- assert(worker_id < max_num_threads(), "precondition");
-
- while ((uint)_threads.length() <= worker_id) {
- G1ConcurrentRefineThread* rt = create_refinement_thread(_threads.length(), initializing);
- if (rt == nullptr) {
- return false;
- }
- _threads.push(rt);
- }
-
- return true;
+ delete _control_thread;
+ delete _workers;
}
jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr) {
assert(cr != nullptr, "G1ConcurrentRefine must not be null");
_cr = cr;
- if (max_num_threads() > 0) {
- _threads.push(create_refinement_thread(0, true));
- if (_threads.at(0) == nullptr) {
- vm_shutdown_during_initialization("Could not allocate primary refinement thread");
+ if (is_refinement_enabled()) {
+ _control_thread = create_refinement_thread();
+ if (_control_thread == nullptr) {
+ vm_shutdown_during_initialization("Could not allocate refinement control thread");
return JNI_ENOMEM;
}
-
- if (!UseDynamicNumberOfGCThreads) {
- if (!ensure_threads_created(max_num_threads() - 1, true)) {
- vm_shutdown_during_initialization("Could not allocate refinement threads");
- return JNI_ENOMEM;
- }
- }
+ _workers = new WorkerThreads("G1 Refinement Workers", max_num_threads());
+ _workers->initialize_workers();
}
-
return JNI_OK;
}
#ifdef ASSERT
-void G1ConcurrentRefineThreadControl::assert_current_thread_is_primary_refinement_thread() const {
- assert(Thread::current() == _threads.at(0), "Not primary thread");
+void G1ConcurrentRefineThreadControl::assert_current_thread_is_control_refinement_thread() const {
+ assert(Thread::current() == _control_thread, "Not refinement control thread");
}
#endif // ASSERT
-bool G1ConcurrentRefineThreadControl::activate(uint worker_id) {
- if (ensure_threads_created(worker_id, false)) {
- _threads.at(worker_id)->activate();
- return true;
- }
+void G1ConcurrentRefineThreadControl::activate() {
+ _control_thread->activate();
+}
- return false;
+void G1ConcurrentRefineThreadControl::run_task(WorkerTask* task, uint num_workers) {
+ assert(num_workers >= 1, "must be");
+
+ WithActiveWorkers w(_workers, num_workers);
+ _workers->run_task(task);
+}
+
+void G1ConcurrentRefineThreadControl::control_thread_do(ThreadClosure* tc) {
+ if (is_refinement_enabled()) {
+ tc->do_thread(_control_thread);
+ }
}
void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
- for (G1ConcurrentRefineThread* t : _threads) {
- tc->do_thread(t);
+ if (is_refinement_enabled()) {
+ _workers->threads_do(tc);
}
}
void G1ConcurrentRefineThreadControl::stop() {
- for (G1ConcurrentRefineThread* t : _threads) {
- t->stop();
+ if (is_refinement_enabled()) {
+ _control_thread->stop();
}
}
+G1ConcurrentRefineSweepState::G1ConcurrentRefineSweepState(uint max_reserved_regions) :
+ _state(State::Idle),
+ _sweep_table(new G1CardTableClaimTable(G1CollectedHeap::get_chunks_per_region_for_merge())),
+ _stats()
+{
+ _sweep_table->initialize(max_reserved_regions);
+}
+
+G1ConcurrentRefineSweepState::~G1ConcurrentRefineSweepState() {
+ delete _sweep_table;
+}
+
+void G1ConcurrentRefineSweepState::set_state_start_time() {
+ _state_start[static_cast(_state)] = Ticks::now();
+}
+
+Tickspan G1ConcurrentRefineSweepState::get_duration(State start, State end) {
+ return _state_start[static_cast(end)] - _state_start[static_cast(start)];
+}
+
+void G1ConcurrentRefineSweepState::reset_stats() {
+ stats()->reset();
+}
+
+void G1ConcurrentRefineSweepState::add_yield_during_sweep_duration(jlong duration) {
+ stats()->inc_yield_during_sweep_duration(duration);
+}
+
+bool G1ConcurrentRefineSweepState::advance_state(State next_state) {
+ bool result = is_in_progress();
+ if (result) {
+ _state = next_state;
+ } else {
+ _state = State::Idle;
+ }
+ return result;
+}
+
+void G1ConcurrentRefineSweepState::assert_state(State expected) {
+ assert(_state == expected, "must be %s but is %s", state_name(expected), state_name(_state));
+}
+
+void G1ConcurrentRefineSweepState::start_work() {
+ assert_state(State::Idle);
+
+ set_state_start_time();
+
+ _stats.reset();
+
+ _state = State::SwapGlobalCT;
+}
+
+bool G1ConcurrentRefineSweepState::swap_global_card_table() {
+ assert_state(State::SwapGlobalCT);
+
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine Global Card Table Swap");
+ set_state_start_time();
+
+ {
+ // We can't have any new threads being in the process of created while we
+ // swap the card table because we read the current card table state during
+ // initialization.
+ // A safepoint may occur during that time, so leave the STS temporarily.
+ SuspendibleThreadSetLeaver sts_leave;
+
+ MutexLocker mu(Threads_lock);
+ // A GC that advanced the epoch might have happened, which already switched
+ // The global card table. Do nothing.
+ if (is_in_progress()) {
+ G1BarrierSet::g1_barrier_set()->swap_global_card_table();
+ }
+ }
+
+ return advance_state(State::SwapJavaThreadsCT);
+}
+
+bool G1ConcurrentRefineSweepState::swap_java_threads_ct() {
+ assert_state(State::SwapJavaThreadsCT);
+
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine Java Thread CT swap");
+
+ set_state_start_time();
+
+ {
+ // Need to leave the STS to avoid potential deadlock in the handshake.
+ SuspendibleThreadSetLeaver sts;
+
+ class G1SwapThreadCardTableClosure : public HandshakeClosure {
+ public:
+ G1SwapThreadCardTableClosure() : HandshakeClosure("G1 Java Thread CT swap") { }
+
+ virtual void do_thread(Thread* thread) {
+ G1BarrierSet* bs = G1BarrierSet::g1_barrier_set();
+ bs->update_card_table_base(thread);
+ }
+ } cl;
+ Handshake::execute(&cl);
+ }
+
+ return advance_state(State::SynchronizeGCThreads);
+ }
+
+bool G1ConcurrentRefineSweepState::swap_gc_threads_ct() {
+ assert_state(State::SynchronizeGCThreads);
+
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine GC Thread CT swap");
+
+ set_state_start_time();
+
+ {
+ class RendezvousGCThreads: public VM_Operation {
+ public:
+ VMOp_Type type() const { return VMOp_G1RendezvousGCThreads; }
+
+ virtual bool evaluate_at_safepoint() const {
+ // We only care about synchronizing the GC threads.
+ // Leave the Java threads running.
+ return false;
+ }
+
+ virtual bool skip_thread_oop_barriers() const {
+ fatal("Concurrent VMOps should not call this");
+ return true;
+ }
+
+ void doit() {
+ // Light weight "handshake" of the GC threads for memory synchronization;
+ // both changes to the Java heap need to be synchronized as well as the
+ // previous global card table reference change, so that no GC thread
+ // accesses the wrong card table.
+ // For example in the rebuild remset process the marking threads write
+ // marks into the card table, and that card table reference must be the
+ // correct one.
+ SuspendibleThreadSet::synchronize();
+ SuspendibleThreadSet::desynchronize();
+ };
+ } op;
+
+ SuspendibleThreadSetLeaver sts_leave;
+ VMThread::execute(&op);
+ }
+
+ return advance_state(State::SnapshotHeap);
+}
+
+void G1ConcurrentRefineSweepState::snapshot_heap(bool concurrent) {
+ if (concurrent) {
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine Snapshot Heap");
+
+ assert_state(State::SnapshotHeap);
+
+ set_state_start_time();
+
+ snapshot_heap_inner();
+
+ advance_state(State::SweepRT);
+ } else {
+ assert_state(State::Idle);
+ assert_at_safepoint();
+
+ snapshot_heap_inner();
+ }
+}
+
+void G1ConcurrentRefineSweepState::sweep_refinement_table_start() {
+ assert_state(State::SweepRT);
+
+ set_state_start_time();
+}
+
+bool G1ConcurrentRefineSweepState::sweep_refinement_table_step() {
+ assert_state(State::SweepRT);
+
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine Table Step");
+
+ G1ConcurrentRefine* cr = G1CollectedHeap::heap()->concurrent_refine();
+
+ G1ConcurrentRefineSweepTask task(_sweep_table, &_stats, cr->num_threads_wanted());
+ cr->run_with_refinement_workers(&task);
+
+ if (task.sweep_completed()) {
+ advance_state(State::CompleteRefineWork);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool G1ConcurrentRefineSweepState::complete_work(bool concurrent, bool print_log) {
+ if (concurrent) {
+ assert_state(State::CompleteRefineWork);
+ } else {
+ // May have been forced to complete at any other time.
+ assert(is_in_progress() && _state != State::CompleteRefineWork, "must be but is %s", state_name(_state));
+ }
+
+ set_state_start_time();
+
+ if (print_log) {
+ G1ConcurrentRefineStats* s = &_stats;
+
+ log_debug(gc, refine)("Refinement took %.2fms (pre-sweep %.2fms card refine %.2f) "
+ "(scanned %zu clean %zu (%.2f%%) not_clean %zu (%.2f%%) not_parsable %zu "
+ "refers_to_cset %zu (%.2f%%) still_refers_to_cset %zu (%.2f%%) no_cross_region %zu pending %zu)",
+ get_duration(State::Idle, _state).seconds() * 1000.0,
+ get_duration(State::Idle, State::SweepRT).seconds() * 1000.0,
+ TimeHelper::counter_to_millis(s->refine_duration()),
+ s->cards_scanned(),
+ s->cards_clean(),
+ percent_of(s->cards_clean(), s->cards_scanned()),
+ s->cards_not_clean(),
+ percent_of(s->cards_not_clean(), s->cards_scanned()),
+ s->cards_not_parsable(),
+ s->cards_refer_to_cset(),
+ percent_of(s->cards_refer_to_cset(), s->cards_not_clean()),
+ s->cards_already_refer_to_cset(),
+ percent_of(s->cards_already_refer_to_cset(), s->cards_not_clean()),
+ s->cards_no_cross_region(),
+ s->cards_pending()
+ );
+ }
+
+ bool has_sweep_rt_work = _state == State::SweepRT;
+
+ advance_state(State::Idle);
+ return has_sweep_rt_work;
+}
+
+void G1ConcurrentRefineSweepState::snapshot_heap_inner() {
+ // G1CollectedHeap::heap_region_iterate() below will only visit currently committed
+ // regions. Initialize all entries in the state table here and later in this method
+ // selectively enable regions that we are interested. This way regions committed
+ // later will be automatically excluded from iteration.
+ // Their refinement table must be completely empty anyway.
+ _sweep_table->reset_all_to_claimed();
+
+ class SnapshotRegionsClosure : public G1HeapRegionClosure {
+ G1CardTableClaimTable* _sweep_table;
+
+ public:
+ SnapshotRegionsClosure(G1CardTableClaimTable* sweep_table) : G1HeapRegionClosure(), _sweep_table(sweep_table) { }
+
+ bool do_heap_region(G1HeapRegion* r) override {
+ if (!r->is_free()) {
+ // Need to scan all parts of non-free regions, so reset the claim.
+ // No need for synchronization: we are only interested in regions
+ // that were allocated before the handshake; the handshake makes such
+ // regions' metadata visible to all threads, and we do not care about
+ // humongous regions that were allocated afterwards.
+ _sweep_table->reset_to_unclaimed(r->hrm_index());
+ }
+ return false;
+ }
+ } cl(_sweep_table);
+ G1CollectedHeap::heap()->heap_region_iterate(&cl);
+}
+
+bool G1ConcurrentRefineSweepState::is_in_progress() const {
+ return _state != State::Idle;
+}
+
+bool G1ConcurrentRefineSweepState::are_java_threads_synched() const {
+ return _state > State::SwapJavaThreadsCT || !is_in_progress();
+}
+
uint64_t G1ConcurrentRefine::adjust_threads_period_ms() const {
// Instead of a fixed value, this could be a command line option. But then
// we might also want to allow configuration of adjust_threads_wait_ms().
- return 50;
+
+ // Use a prime number close to 50ms, different to other components that derive
+ // their wait time from the try_get_available_bytes_estimate() call to minimize
+ // interference.
+ return 53;
}
static size_t minimum_pending_cards_target() {
- // One buffer per thread.
- return ParallelGCThreads * G1UpdateBufferSize;
+ return ParallelGCThreads * G1PerThreadPendingCardThreshold;
}
-G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
- _policy(policy),
- _threads_wanted(0),
+G1ConcurrentRefine::G1ConcurrentRefine(G1CollectedHeap* g1h) :
+ _policy(g1h->policy()),
+ _num_threads_wanted(0),
_pending_cards_target(PendingCardsTargetUninitialized),
_last_adjust(),
_needs_adjust(false),
- _threads_needed(policy, adjust_threads_period_ms()),
+ _heap_was_locked(false),
+ _threads_needed(g1h->policy(), adjust_threads_period_ms()),
_thread_control(G1ConcRefinementThreads),
- _dcqs(G1BarrierSet::dirty_card_queue_set())
-{}
+ _sweep_state(g1h->max_num_regions())
+{ }
jint G1ConcurrentRefine::initialize() {
return _thread_control.initialize(this);
}
-G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
- G1ConcurrentRefine* cr = new G1ConcurrentRefine(policy);
+G1ConcurrentRefineSweepState& G1ConcurrentRefine::sweep_state_for_merge() {
+ bool has_sweep_claims = sweep_state().complete_work(false /* concurrent */);
+ if (has_sweep_claims) {
+ log_debug(gc, refine)("Continue existing work");
+ } else {
+ // Refinement has been interrupted without having a snapshot. There may
+ // be a mix of already swapped and not-swapped card tables assigned to threads,
+ // so they might have already dirtied the swapped card tables.
+ // Conservatively scan all (non-free, non-committed) region's card tables,
+ // creating the snapshot right now.
+ log_debug(gc, refine)("Create work from scratch");
+
+ sweep_state().snapshot_heap(false /* concurrent */);
+ }
+ return sweep_state();
+}
+
+void G1ConcurrentRefine::run_with_refinement_workers(WorkerTask* task) {
+ _thread_control.run_task(task, num_threads_wanted());
+}
+
+void G1ConcurrentRefine::notify_region_reclaimed(G1HeapRegion* r) {
+ assert_at_safepoint();
+ if (_sweep_state.is_in_progress()) {
+ _sweep_state.sweep_table()->claim_all_cards(r->hrm_index());
+ }
+}
+
+G1ConcurrentRefine* G1ConcurrentRefine::create(G1CollectedHeap* g1h, jint* ecode) {
+ G1ConcurrentRefine* cr = new G1ConcurrentRefine(g1h);
*ecode = cr->initialize();
if (*ecode != 0) {
delete cr;
@@ -176,25 +465,31 @@ G1ConcurrentRefine::~G1ConcurrentRefine() {
}
void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
+ worker_threads_do(tc);
+ control_thread_do(tc);
+}
+
+void G1ConcurrentRefine::worker_threads_do(ThreadClosure *tc) {
_thread_control.worker_threads_do(tc);
}
-void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
- size_t processed_logged_cards,
- size_t predicted_thread_buffer_cards,
+void G1ConcurrentRefine::control_thread_do(ThreadClosure *tc) {
+ _thread_control.control_thread_do(tc);
+}
+
+void G1ConcurrentRefine::update_pending_cards_target(double pending_cards_time_ms,
+ size_t processed_pending_cards,
double goal_ms) {
size_t minimum = minimum_pending_cards_target();
- if ((processed_logged_cards < minimum) || (logged_cards_time_ms == 0.0)) {
- log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu",
- _pending_cards_target);
+ if ((processed_pending_cards < minimum) || (pending_cards_time_ms == 0.0)) {
+ log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu (processed %zu minimum %zu time %1.2f)",
+ _pending_cards_target, processed_pending_cards, minimum, pending_cards_time_ms);
return;
}
// Base the pending cards budget on the measured rate.
- double rate = processed_logged_cards / logged_cards_time_ms;
- size_t budget = static_cast(goal_ms * rate);
- // Deduct predicted cards in thread buffers to get target.
- size_t new_target = budget - MIN2(budget, predicted_thread_buffer_cards);
+ double rate = processed_pending_cards / pending_cards_time_ms;
+ size_t new_target = static_cast(goal_ms * rate);
// Add some hysteresis with previous values.
if (is_pending_cards_target_initialized()) {
new_target = (new_target + _pending_cards_target) / 2;
@@ -205,46 +500,36 @@ void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms
log_debug(gc, ergo, refine)("New pending cards target: %zu", new_target);
}
-void G1ConcurrentRefine::adjust_after_gc(double logged_cards_time_ms,
- size_t processed_logged_cards,
- size_t predicted_thread_buffer_cards,
+void G1ConcurrentRefine::adjust_after_gc(double pending_cards_time_ms,
+ size_t processed_pending_cards,
double goal_ms) {
- if (!G1UseConcRefinement) return;
+ if (!G1UseConcRefinement) {
+ return;
+ }
- update_pending_cards_target(logged_cards_time_ms,
- processed_logged_cards,
- predicted_thread_buffer_cards,
+ update_pending_cards_target(pending_cards_time_ms,
+ processed_pending_cards,
goal_ms);
- if (_thread_control.max_num_threads() == 0) {
- // If no refinement threads then the mutator threshold is the target.
- _dcqs.set_mutator_refinement_threshold(_pending_cards_target);
- } else {
- // Provisionally make the mutator threshold unlimited, to be updated by
- // the next periodic adjustment. Because card state may have changed
- // drastically, record that adjustment is needed and kick the primary
- // thread, in case it is waiting.
- _dcqs.set_mutator_refinement_threshold(SIZE_MAX);
+ if (_thread_control.is_refinement_enabled()) {
_needs_adjust = true;
if (is_pending_cards_target_initialized()) {
- _thread_control.activate(0);
+ _thread_control.activate();
}
}
}
-// Wake up the primary thread less frequently when the time available until
-// the next GC is longer. But don't increase the wait time too rapidly.
-// This reduces the number of primary thread wakeups that just immediately
-// go back to waiting, while still being responsive to behavior changes.
-static uint64_t compute_adjust_wait_time_ms(double available_ms) {
- return static_cast(sqrt(available_ms) * 4.0);
-}
-
uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
- assert_current_thread_is_primary_refinement_thread();
+ assert_current_thread_is_control_refinement_thread();
if (is_pending_cards_target_initialized()) {
- double available_ms = _threads_needed.predicted_time_until_next_gc_ms();
- uint64_t wait_time_ms = compute_adjust_wait_time_ms(available_ms);
- return MAX2(wait_time_ms, adjust_threads_period_ms());
+ // Retry asap when the cause for not getting a prediction was that we temporarily
+ // did not get the heap lock. Otherwise we might wait for too long until we get
+ // back here.
+ if (_heap_was_locked) {
+ return 1;
+ }
+ double available_time_ms = _threads_needed.predicted_time_until_next_gc_ms();
+
+ return _policy->adjust_wait_time_ms(available_time_ms, adjust_threads_period_ms());
} else {
// If target not yet initialized then wait forever (until explicitly
// activated). This happens during startup, when we don't bother with
@@ -253,185 +538,74 @@ uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
}
}
-class G1ConcurrentRefine::RemSetSamplingClosure : public G1HeapRegionClosure {
- size_t _sampled_code_root_rs_length;
+bool G1ConcurrentRefine::adjust_num_threads_periodically() {
+ assert_current_thread_is_control_refinement_thread();
-public:
- RemSetSamplingClosure() :
- _sampled_code_root_rs_length(0) {}
-
- bool do_heap_region(G1HeapRegion* r) override {
- G1HeapRegionRemSet* rem_set = r->rem_set();
- _sampled_code_root_rs_length += rem_set->code_roots_list_length();
- return false;
- }
-
- size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
-};
-
-// Adjust the target length (in regions) of the young gen, based on the
-// current length of the remembered sets.
-//
-// At the end of the GC G1 determines the length of the young gen based on
-// how much time the next GC can take, and when the next GC may occur
-// according to the MMU.
-//
-// The assumption is that a significant part of the GC is spent on scanning
-// the remembered sets (and many other components), so this thread constantly
-// reevaluates the prediction for the remembered set scanning costs, and potentially
-// resizes the young gen. This may do a premature GC or even increase the young
-// gen size to keep pause time length goal.
-void G1ConcurrentRefine::adjust_young_list_target_length() {
- if (_policy->use_adaptive_young_list_length()) {
- G1CollectedHeap* g1h = G1CollectedHeap::heap();
- G1CollectionSet* cset = g1h->collection_set();
- RemSetSamplingClosure cl;
- cset->iterate(&cl);
-
- size_t card_rs_length = g1h->young_regions_cardset()->occupied();
-
- size_t sampled_code_root_rs_length = cl.sampled_code_root_rs_length();
- _policy->revise_young_list_target_length(card_rs_length, sampled_code_root_rs_length);
- }
-}
-
-bool G1ConcurrentRefine::adjust_threads_periodically() {
- assert_current_thread_is_primary_refinement_thread();
-
- // Check whether it's time to do a periodic adjustment.
+ _heap_was_locked = false;
+ // Check whether it's time to do a periodic adjustment if there is no explicit
+ // request pending. We might have spuriously woken up.
if (!_needs_adjust) {
Tickspan since_adjust = Ticks::now() - _last_adjust;
- if (since_adjust.milliseconds() >= adjust_threads_period_ms()) {
- _needs_adjust = true;
+ if (since_adjust.milliseconds() < adjust_threads_period_ms()) {
+ _num_threads_wanted = 0;
+ return false;
}
}
- // If needed, try to adjust threads wanted.
- if (_needs_adjust) {
- // Getting used young bytes requires holding Heap_lock. But we can't use
- // normal lock and block until available. Blocking on the lock could
- // deadlock with a GC VMOp that is holding the lock and requesting a
- // safepoint. Instead try to lock, and if fail then skip adjustment for
- // this iteration of the thread, do some refinement work, and retry the
- // adjustment later.
- if (Heap_lock->try_lock()) {
- size_t used_bytes = _policy->estimate_used_young_bytes_locked();
- Heap_lock->unlock();
- adjust_young_list_target_length();
- size_t young_bytes = _policy->young_list_target_length() * G1HeapRegion::GrainBytes;
- size_t available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
- adjust_threads_wanted(available_bytes);
- _needs_adjust = false;
- _last_adjust = Ticks::now();
- return true;
- }
+ // Reset pending request.
+ _needs_adjust = false;
+ size_t available_bytes = 0;
+ if (_policy->try_get_available_bytes_estimate(available_bytes)) {
+ adjust_threads_wanted(available_bytes);
+ _last_adjust = Ticks::now();
+ } else {
+ _heap_was_locked = true;
+ // Defer adjustment to next time.
+ _needs_adjust = true;
}
- return false;
-}
-
-bool G1ConcurrentRefine::is_in_last_adjustment_period() const {
- return _threads_needed.predicted_time_until_next_gc_ms() <= adjust_threads_period_ms();
+ return (_num_threads_wanted > 0) && !heap_was_locked();
}
void G1ConcurrentRefine::adjust_threads_wanted(size_t available_bytes) {
- assert_current_thread_is_primary_refinement_thread();
- size_t num_cards = _dcqs.num_cards();
- size_t mutator_threshold = SIZE_MAX;
- uint old_wanted = AtomicAccess::load(&_threads_wanted);
+ assert_current_thread_is_control_refinement_thread();
- _threads_needed.update(old_wanted,
+ G1Policy* policy = G1CollectedHeap::heap()->policy();
+ const G1Analytics* analytics = policy->analytics();
+
+ size_t num_cards = policy->current_pending_cards();
+
+ _threads_needed.update(_num_threads_wanted,
available_bytes,
num_cards,
_pending_cards_target);
uint new_wanted = _threads_needed.threads_needed();
if (new_wanted > _thread_control.max_num_threads()) {
- // If running all the threads can't reach goal, turn on refinement by
- // mutator threads. Using target as the threshold may be stronger
- // than required, but will do the most to get us under goal, and we'll
- // reevaluate with the next adjustment.
- mutator_threshold = _pending_cards_target;
+ // Bound the wanted threads by maximum available.
new_wanted = _thread_control.max_num_threads();
- } else if (is_in_last_adjustment_period()) {
- // If very little time remains until GC, enable mutator refinement. If
- // the target has been reached, this keeps the number of pending cards on
- // target even if refinement threads deactivate in the meantime. And if
- // the target hasn't been reached, this prevents things from getting
- // worse.
- mutator_threshold = _pending_cards_target;
}
- AtomicAccess::store(&_threads_wanted, new_wanted);
- _dcqs.set_mutator_refinement_threshold(mutator_threshold);
- log_debug(gc, refine)("Concurrent refinement: wanted %u, cards: %zu, "
- "predicted: %zu, time: %1.2fms",
+
+ _num_threads_wanted = new_wanted;
+
+ log_debug(gc, refine)("Concurrent refinement: wanted %u, pending cards: %zu (pending-from-gc %zu), "
+ "predicted: %zu, goal %zu, time-until-next-gc: %1.2fms pred-refine-rate %1.2fc/ms log-rate %1.2fc/ms",
new_wanted,
num_cards,
+ G1CollectedHeap::heap()->policy()->pending_cards_from_gc(),
_threads_needed.predicted_cards_at_next_gc(),
- _threads_needed.predicted_time_until_next_gc_ms());
- // Activate newly wanted threads. The current thread is the primary
- // refinement thread, so is already active.
- for (uint i = MAX2(old_wanted, 1u); i < new_wanted; ++i) {
- if (!_thread_control.activate(i)) {
- // Failed to allocate and activate thread. Stop trying to activate, and
- // instead use mutator threads to make up the gap.
- AtomicAccess::store(&_threads_wanted, i);
- _dcqs.set_mutator_refinement_threshold(_pending_cards_target);
- break;
- }
- }
-}
-
-void G1ConcurrentRefine::reduce_threads_wanted() {
- assert_current_thread_is_primary_refinement_thread();
- if (!_needs_adjust) { // Defer if adjustment request is active.
- uint wanted = AtomicAccess::load(&_threads_wanted);
- if (wanted > 0) {
- AtomicAccess::store(&_threads_wanted, --wanted);
- }
- // If very little time remains until GC, enable mutator refinement. If
- // the target has been reached, this keeps the number of pending cards on
- // target even as refinement threads deactivate in the meantime.
- if (is_in_last_adjustment_period()) {
- _dcqs.set_mutator_refinement_threshold(_pending_cards_target);
- }
- }
-}
-
-bool G1ConcurrentRefine::is_thread_wanted(uint worker_id) const {
- return worker_id < AtomicAccess::load(&_threads_wanted);
+ _pending_cards_target,
+ _threads_needed.predicted_time_until_next_gc_ms(),
+ analytics->predict_concurrent_refine_rate_ms(),
+ analytics->predict_dirtied_cards_rate_ms()
+ );
}
bool G1ConcurrentRefine::is_thread_adjustment_needed() const {
- assert_current_thread_is_primary_refinement_thread();
+ assert_current_thread_is_control_refinement_thread();
return _needs_adjust;
}
void G1ConcurrentRefine::record_thread_adjustment_needed() {
- assert_current_thread_is_primary_refinement_thread();
+ assert_current_thread_is_control_refinement_thread();
_needs_adjust = true;
}
-
-G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() {
- struct CollectStats : public ThreadClosure {
- G1ConcurrentRefineStats _total_stats;
- virtual void do_thread(Thread* t) {
- G1ConcurrentRefineThread* crt = static_cast(t);
- G1ConcurrentRefineStats& stats = *crt->refinement_stats();
- _total_stats += stats;
- stats.reset();
- }
- } collector;
- threads_do(&collector);
- return collector._total_stats;
-}
-
-uint G1ConcurrentRefine::worker_id_offset() {
- return G1DirtyCardQueueSet::num_par_ids();
-}
-
-bool G1ConcurrentRefine::try_refinement_step(uint worker_id,
- size_t stop_at,
- G1ConcurrentRefineStats* stats) {
- uint adjusted_id = worker_id + worker_id_offset();
- return _dcqs.refine_completed_buffer_concurrently(adjusted_id, stop_at, stats);
-}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
index dd0b62a22ea..5e96ed738fd 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,23 +34,28 @@
#include "utilities/macros.hpp"
// Forward decl
+class G1CardTableClaimTable;
+class G1CollectedHeap;
class G1ConcurrentRefine;
class G1ConcurrentRefineThread;
-class G1DirtyCardQueueSet;
+class G1HeapRegion;
class G1Policy;
class ThreadClosure;
+class WorkerTask;
+class WorkerThreads;
// Helper class for refinement thread management. Used to start, stop and
// iterate over them.
class G1ConcurrentRefineThreadControl {
G1ConcurrentRefine* _cr;
- GrowableArrayCHeap _threads;
+ G1ConcurrentRefineThread* _control_thread;
+
+ WorkerThreads* _workers;
+ uint _max_num_threads;
// Create the refinement thread for the given worker id.
// If initializing is true, ignore InjectGCWorkerCreationFailure.
- G1ConcurrentRefineThread* create_refinement_thread(uint worker_id, bool initializing);
-
- bool ensure_threads_created(uint worker_id, bool initializing);
+ G1ConcurrentRefineThread* create_refinement_thread();
NONCOPYABLE(G1ConcurrentRefineThreadControl);
@@ -60,21 +65,119 @@ public:
jint initialize(G1ConcurrentRefine* cr);
- void assert_current_thread_is_primary_refinement_thread() const NOT_DEBUG_RETURN;
+ void assert_current_thread_is_control_refinement_thread() const NOT_DEBUG_RETURN;
- uint max_num_threads() const { return _threads.capacity(); }
+ uint max_num_threads() const { return _max_num_threads; }
+ bool is_refinement_enabled() const { return _max_num_threads > 0; }
- // Activate the indicated thread. If the thread has not yet been allocated,
- // allocate and then activate. If allocation is needed and fails, return
- // false. Otherwise return true.
- // precondition: worker_id < max_num_threads().
- // precondition: current thread is not the designated worker.
- bool activate(uint worker_id);
+ // Activate the control thread.
+ void activate();
+ void run_task(WorkerTask* task, uint num_workers);
+
+ void control_thread_do(ThreadClosure* tc);
void worker_threads_do(ThreadClosure* tc);
void stop();
};
+// Tracks the current state of re-examining the dirty cards from idle to completion
+// (and reset back to idle).
+//
+// The process steps are as follows:
+//
+// 1) Swap global card table pointers
+//
+// 2) Swap Java Thread's card table pointers
+//
+// 3) Synchronize GC Threads
+// Ensures memory visibility
+//
+// After this point mutator threads should not mark the refinement table.
+//
+// 4) Snapshot the heap
+// Determines which regions need to be swept.
+//
+// 5) Sweep Refinement table
+// Examines non-Clean cards on the refinement table.
+//
+// 6) Completion Work
+// Calculates statistics about the process to be used in various parts of
+// the garbage collection.
+//
+// All but step 4 are interruptible by safepoints. In case of a garbage collection,
+// the garbage collection will interrupt this process, and go to Idle state.
+//
+class G1ConcurrentRefineSweepState {
+
+ enum class State : uint {
+ Idle, // Refinement is doing nothing.
+ SwapGlobalCT, // Swap global card table.
+ SwapJavaThreadsCT, // Swap java thread's card tables.
+ SynchronizeGCThreads, // Synchronize GC thread's memory view.
+ SnapshotHeap, // Take a snapshot of the region's top() values.
+ SweepRT, // Sweep the refinement table for pending (dirty) cards.
+ CompleteRefineWork, // Cleanup of refinement work, reset to idle.
+ Last
+ } _state;
+
+ static const char* state_name(State state) {
+ static const char* _state_names[] = {
+ "Idle",
+ "Swap Global Card Table",
+ "Swap JavaThread Card Table",
+ "Synchronize GC Threads",
+ "Snapshot Heap",
+ "Sweep Refinement Table",
+ "Complete Sweep Work"
+ };
+
+ return _state_names[static_cast(state)];
+ }
+
+ // Current heap snapshot.
+ G1CardTableClaimTable* _sweep_table;
+
+ // Start times for all states.
+ Ticks _state_start[static_cast(State::Last)];
+
+ void set_state_start_time();
+ Tickspan get_duration(State start, State end);
+
+ G1ConcurrentRefineStats _stats;
+
+ // Advances the state to next_state if not interrupted by a changed epoch. Returns
+ // to Idle otherwise.
+ bool advance_state(State next_state);
+
+ void assert_state(State expected);
+
+ void snapshot_heap_inner();
+
+public:
+ G1ConcurrentRefineSweepState(uint max_reserved_regions);
+ ~G1ConcurrentRefineSweepState();
+
+ void start_work();
+
+ bool swap_global_card_table();
+ bool swap_java_threads_ct();
+ bool swap_gc_threads_ct();
+ void snapshot_heap(bool concurrent = true);
+ void sweep_refinement_table_start();
+ bool sweep_refinement_table_step();
+
+ bool complete_work(bool concurrent, bool print_log = true);
+
+ G1CardTableClaimTable* sweep_table() { return _sweep_table; }
+ G1ConcurrentRefineStats* stats() { return &_stats; }
+ void reset_stats();
+
+ void add_yield_during_sweep_duration(jlong duration);
+
+ bool is_in_progress() const;
+ bool are_java_threads_synched() const;
+};
+
// Controls concurrent refinement.
//
// Mutator threads produce dirty cards, which need to be examined for updates
@@ -84,49 +187,43 @@ public:
// pending dirty cards at the start of a GC can be processed within that time
// budget.
//
-// Concurrent refinement is performed by a combination of dedicated threads
-// and by mutator threads as they produce dirty cards. If configured to not
-// have any dedicated threads (-XX:G1ConcRefinementThreads=0) then all
-// concurrent refinement work is performed by mutator threads. When there are
-// dedicated threads, they generally do most of the concurrent refinement
-// work, to minimize throughput impact of refinement work on mutator threads.
+// Concurrent refinement is performed by a set of dedicated threads. If configured
+// to not have any dedicated threads (-XX:G1ConcRefinementThreads=0) then no
+// refinement work is performed at all.
//
// This class determines the target number of dirty cards pending for the next
// GC. It also owns the dedicated refinement threads and controls their
// activation in order to achieve that target.
//
-// There are two kinds of dedicated refinement threads, a single primary
-// thread and some number of secondary threads. When active, all refinement
-// threads take buffers of dirty cards from the dirty card queue and process
-// them. Between buffers they query this owning object to find out whether
-// they should continue running, deactivating themselves if not.
+// There are two kinds of dedicated refinement threads, a single control
+// thread and some number of refinement worker threads.
+// The control thread determines whether there is need to do work, and then starts
+// an appropriate number of refinement worker threads to get back to the target
+// number of pending dirty cards.
+//
+// The control wakes up periodically whether there is need to do refinement
+// work, starting the refinement process as necessary.
//
-// The primary thread drives the control system that determines how many
-// refinement threads should be active. If inactive, it wakes up periodically
-// to recalculate the number of active threads needed, and activates
-// additional threads as necessary. While active it also periodically
-// recalculates the number wanted and activates more threads if needed. It
-// also reduces the number of wanted threads when the target has been reached,
-// triggering deactivations.
class G1ConcurrentRefine : public CHeapObj {
G1Policy* _policy;
- volatile uint _threads_wanted;
+ volatile uint _num_threads_wanted;
size_t _pending_cards_target;
Ticks _last_adjust;
Ticks _last_deactivate;
bool _needs_adjust;
+ bool _heap_was_locked; // The heap has been locked the last time we tried to adjust the number of refinement threads.
+
G1ConcurrentRefineThreadsNeeded _threads_needed;
G1ConcurrentRefineThreadControl _thread_control;
- G1DirtyCardQueueSet& _dcqs;
- G1ConcurrentRefine(G1Policy* policy);
+ G1ConcurrentRefineSweepState _sweep_state;
- static uint worker_id_offset();
+ G1ConcurrentRefine(G1CollectedHeap* g1h);
jint initialize();
- void assert_current_thread_is_primary_refinement_thread() const {
- _thread_control.assert_current_thread_is_primary_refinement_thread();
+ void assert_current_thread_is_control_refinement_thread() const {
+ _thread_control.assert_current_thread_is_control_refinement_thread();
}
// For the first few collection cycles we don't have a target (and so don't
@@ -138,16 +235,11 @@ class G1ConcurrentRefine : public CHeapObj {
return _pending_cards_target != PendingCardsTargetUninitialized;
}
- void update_pending_cards_target(double logged_cards_scan_time_ms,
- size_t processed_logged_cards,
- size_t predicted_thread_buffer_cards,
+ void update_pending_cards_target(double pending_cards_scan_time_ms,
+ size_t processed_pending_cards,
double goal_ms);
uint64_t adjust_threads_period_ms() const;
- bool is_in_last_adjustment_period() const;
-
- class RemSetSamplingClosure; // Helper class for adjusting young length.
- void adjust_young_list_target_length();
void adjust_threads_wanted(size_t available_bytes);
@@ -156,67 +248,66 @@ class G1ConcurrentRefine : public CHeapObj {
public:
~G1ConcurrentRefine();
+ G1ConcurrentRefineSweepState& sweep_state() { return _sweep_state; }
+
+ G1ConcurrentRefineSweepState& sweep_state_for_merge();
+
+ void run_with_refinement_workers(WorkerTask* task);
+
+ void notify_region_reclaimed(G1HeapRegion* r);
+
// Returns a G1ConcurrentRefine instance if succeeded to create/initialize the
// G1ConcurrentRefine instance. Otherwise, returns null with error code.
- static G1ConcurrentRefine* create(G1Policy* policy, jint* ecode);
+ static G1ConcurrentRefine* create(G1CollectedHeap* g1h, jint* ecode);
// Stop all the refinement threads.
void stop();
// Called at the end of a GC to prepare for refinement during the next
// concurrent phase. Updates the target for the number of pending dirty
- // cards. Updates the mutator refinement threshold. Ensures the primary
- // refinement thread (if it exists) is active, so it will adjust the number
+ // cards. Updates the mutator refinement threshold. Ensures the refinement
+ // control thread (if it exists) is active, so it will adjust the number
// of running threads.
- void adjust_after_gc(double logged_cards_scan_time_ms,
- size_t processed_logged_cards,
- size_t predicted_thread_buffer_cards,
+ void adjust_after_gc(double pending_cards_scan_time_ms,
+ size_t processed_pending_cards,
double goal_ms);
// Target number of pending dirty cards at the start of the next GC.
size_t pending_cards_target() const { return _pending_cards_target; }
- // May recalculate the number of refinement threads that should be active in
- // order to meet the pending cards target. Returns true if adjustment was
- // performed, and clears any pending request. Returns false if the
- // adjustment period has not expired, or because a timed or requested
- // adjustment could not be performed immediately and so was deferred.
- // precondition: current thread is the primary refinement thread.
- bool adjust_threads_periodically();
+ // Recalculates the number of refinement threads that should be active in
+ // order to meet the pending cards target.
+ // Returns true if it could recalculate the number of threads and
+ // refinement threads should be started.
+ // Returns false if the adjustment period has not expired, or because a timed
+ // or requested adjustment could not be performed immediately and so was deferred.
+ bool adjust_num_threads_periodically();
- // The amount of time (in ms) the primary refinement thread should sleep
+ // The amount of time (in ms) the refinement control thread should sleep
// when it is inactive. It requests adjustment whenever it is reactivated.
- // precondition: current thread is the primary refinement thread.
+ // precondition: current thread is the refinement control thread.
uint64_t adjust_threads_wait_ms() const;
// Record a request for thread adjustment as soon as possible.
- // precondition: current thread is the primary refinement thread.
+ // precondition: current thread is the refinement control thread.
void record_thread_adjustment_needed();
// Test whether there is a pending request for thread adjustment.
- // precondition: current thread is the primary refinement thread.
+ // precondition: current thread is the refinement control thread.
bool is_thread_adjustment_needed() const;
- // Reduce the number of active threads wanted.
- // precondition: current thread is the primary refinement thread.
- void reduce_threads_wanted();
+ // Indicate that last refinement adjustment had been deferred due to not
+ // obtaining the heap lock.
+ bool heap_was_locked() const { return _heap_was_locked; }
- // Test whether the thread designated by worker_id should be active.
- bool is_thread_wanted(uint worker_id) const;
-
- // Return total of concurrent refinement stats for the
- // ConcurrentRefineThreads. Also reset the stats for the threads.
- G1ConcurrentRefineStats get_and_reset_refinement_stats();
-
- // Perform a single refinement step; called by the refinement
- // threads. Returns true if there was refinement work available.
- // Updates stats.
- bool try_refinement_step(uint worker_id,
- size_t stop_at,
- G1ConcurrentRefineStats* stats);
+ uint num_threads_wanted() const { return _num_threads_wanted; }
+ uint max_num_threads() const { return _thread_control.max_num_threads(); }
// Iterate over all concurrent refinement threads applying the given closure.
void threads_do(ThreadClosure *tc);
+ // Iterate over specific refinement threads applying the given closure.
+ void worker_threads_do(ThreadClosure *tc);
+ void control_thread_do(ThreadClosure *tc);
};
#endif // SHARE_GC_G1_G1CONCURRENTREFINE_HPP
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp
index 7f0bcc5b50f..83a09c55a3f 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp
@@ -23,41 +23,33 @@
*/
#include "gc/g1/g1ConcurrentRefineStats.hpp"
+#include "runtime/atomicAccess.hpp"
+#include "runtime/timer.hpp"
G1ConcurrentRefineStats::G1ConcurrentRefineStats() :
- _refinement_time(),
- _refined_cards(0),
- _precleaned_cards(0),
- _dirtied_cards(0)
+ _sweep_duration(0),
+ _yield_during_sweep_duration(0),
+ _cards_scanned(0),
+ _cards_clean(0),
+ _cards_not_parsable(0),
+ _cards_already_refer_to_cset(0),
+ _cards_refer_to_cset(0),
+ _cards_no_cross_region(0),
+ _refine_duration(0)
{}
-double G1ConcurrentRefineStats::refinement_rate_ms() const {
- // Report 0 when no time recorded because no refinement performed.
- double secs = refinement_time().seconds();
- return (secs > 0) ? (refined_cards() / (secs * MILLIUNITS)) : 0.0;
-}
+void G1ConcurrentRefineStats::add_atomic(G1ConcurrentRefineStats* other) {
+ AtomicAccess::add(&_sweep_duration, other->_sweep_duration, memory_order_relaxed);
+ AtomicAccess::add(&_yield_during_sweep_duration, other->_yield_during_sweep_duration, memory_order_relaxed);
-G1ConcurrentRefineStats&
-G1ConcurrentRefineStats::operator+=(const G1ConcurrentRefineStats& other) {
- _refinement_time += other._refinement_time;
- _refined_cards += other._refined_cards;
- _precleaned_cards += other._precleaned_cards;
- _dirtied_cards += other._dirtied_cards;
- return *this;
-}
+ AtomicAccess::add(&_cards_scanned, other->_cards_scanned, memory_order_relaxed);
+ AtomicAccess::add(&_cards_clean, other->_cards_clean, memory_order_relaxed);
+ AtomicAccess::add(&_cards_not_parsable, other->_cards_not_parsable, memory_order_relaxed);
+ AtomicAccess::add(&_cards_already_refer_to_cset, other->_cards_already_refer_to_cset, memory_order_relaxed);
+ AtomicAccess::add(&_cards_refer_to_cset, other->_cards_refer_to_cset, memory_order_relaxed);
+ AtomicAccess::add(&_cards_no_cross_region, other->_cards_no_cross_region, memory_order_relaxed);
-template
-static T clipped_sub(T x, T y) {
- return (x < y) ? T() : (x - y);
-}
-
-G1ConcurrentRefineStats&
-G1ConcurrentRefineStats::operator-=(const G1ConcurrentRefineStats& other) {
- _refinement_time = clipped_sub(_refinement_time, other._refinement_time);
- _refined_cards = clipped_sub(_refined_cards, other._refined_cards);
- _precleaned_cards = clipped_sub(_precleaned_cards, other._precleaned_cards);
- _dirtied_cards = clipped_sub(_dirtied_cards, other._dirtied_cards);
- return *this;
+ AtomicAccess::add(&_refine_duration, other->_refine_duration, memory_order_relaxed);
}
void G1ConcurrentRefineStats::reset() {
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp
index ae576778a07..ce22f4317df 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,47 +33,56 @@
// Used for collecting per-thread statistics and for summaries over a
// collection of threads.
class G1ConcurrentRefineStats : public CHeapObj {
- Tickspan _refinement_time;
- size_t _refined_cards;
- size_t _precleaned_cards;
- size_t _dirtied_cards;
+ jlong _sweep_duration; // Time spent sweeping the table finding non-clean cards
+ // and refining them.
+ jlong _yield_during_sweep_duration; // Time spent yielding during the sweep (not doing the sweep).
+
+ size_t _cards_scanned; // Total number of cards scanned.
+ size_t _cards_clean; // Number of cards found clean.
+ size_t _cards_not_parsable; // Number of cards we could not parse and left unrefined.
+ size_t _cards_already_refer_to_cset;// Number of cards marked found to be already young.
+ size_t _cards_refer_to_cset; // Number of dirty cards that were recently found to contain a to-cset reference.
+ size_t _cards_no_cross_region; // Number of dirty cards that were dirtied, but then cleaned again by the mutator.
+
+ jlong _refine_duration; // Time spent during actual refinement.
public:
G1ConcurrentRefineStats();
- // Time spent performing concurrent refinement.
- Tickspan refinement_time() const { return _refinement_time; }
+ // Time spent performing sweeping the refinement table (includes actual refinement,
+ // but not yield time).
+ jlong sweep_duration() const { return _sweep_duration - _yield_during_sweep_duration; }
+ jlong yield_during_sweep_duration() const { return _yield_during_sweep_duration; }
+ jlong refine_duration() const { return _refine_duration; }
// Number of refined cards.
- size_t refined_cards() const { return _refined_cards; }
+ size_t refined_cards() const { return cards_not_clean(); }
- // Refinement rate, in cards per ms.
- double refinement_rate_ms() const;
+ size_t cards_scanned() const { return _cards_scanned; }
+ size_t cards_clean() const { return _cards_clean; }
+ size_t cards_not_clean() const { return _cards_scanned - _cards_clean; }
+ size_t cards_not_parsable() const { return _cards_not_parsable; }
+ size_t cards_already_refer_to_cset() const { return _cards_already_refer_to_cset; }
+ size_t cards_refer_to_cset() const { return _cards_refer_to_cset; }
+ size_t cards_no_cross_region() const { return _cards_no_cross_region; }
+ // Number of cards that were marked dirty and in need of refinement. This includes cards recently
+ // found to refer to the collection set as they originally were dirty.
+ size_t cards_pending() const { return cards_not_clean() - _cards_already_refer_to_cset; }
- // Number of cards for which refinement was skipped because some other
- // thread had already refined them.
- size_t precleaned_cards() const { return _precleaned_cards; }
+ size_t cards_to_cset() const { return _cards_already_refer_to_cset + _cards_refer_to_cset; }
- // Number of cards marked dirty and in need of refinement.
- size_t dirtied_cards() const { return _dirtied_cards; }
+ void inc_sweep_time(jlong t) { _sweep_duration += t; }
+ void inc_yield_during_sweep_duration(jlong t) { _yield_during_sweep_duration += t; }
+ void inc_refine_duration(jlong t) { _refine_duration += t; }
- void inc_refinement_time(Tickspan t) { _refinement_time += t; }
- void inc_refined_cards(size_t cards) { _refined_cards += cards; }
- void inc_precleaned_cards(size_t cards) { _precleaned_cards += cards; }
- void inc_dirtied_cards(size_t cards) { _dirtied_cards += cards; }
+ void inc_cards_scanned(size_t increment) { _cards_scanned += increment; }
+ void inc_cards_clean(size_t increment) { _cards_clean += increment; }
+ void inc_cards_not_parsable() { _cards_not_parsable++; }
+ void inc_cards_already_refer_to_cset() { _cards_already_refer_to_cset++; }
+ void inc_cards_refer_to_cset() { _cards_refer_to_cset++; }
+ void inc_cards_no_cross_region() { _cards_no_cross_region++; }
- G1ConcurrentRefineStats& operator+=(const G1ConcurrentRefineStats& other);
- G1ConcurrentRefineStats& operator-=(const G1ConcurrentRefineStats& other);
-
- friend G1ConcurrentRefineStats operator+(G1ConcurrentRefineStats x,
- const G1ConcurrentRefineStats& y) {
- return x += y;
- }
-
- friend G1ConcurrentRefineStats operator-(G1ConcurrentRefineStats x,
- const G1ConcurrentRefineStats& y) {
- return x -= y;
- }
+ void add_atomic(G1ConcurrentRefineStats* other);
void reset();
};
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
new file mode 100644
index 00000000000..ca5bc9ebe5f
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "gc/g1/g1CardTableClaimTable.inline.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
+
+class G1RefineRegionClosure : public G1HeapRegionClosure {
+ using CardValue = G1CardTable::CardValue;
+
+ G1RemSet* _rem_set;
+ G1CardTableClaimTable* _scan_state;
+
+ uint _worker_id;
+
+ size_t _num_collections_at_start;
+
+ bool has_work(G1HeapRegion* r) {
+ return _scan_state->has_unclaimed_cards(r->hrm_index());
+ }
+
+ void verify_card_pair_refers_to_same_card(CardValue* source_card, CardValue* dest_card) {
+#ifdef ASSERT
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ G1HeapRegion* refinement_r = g1h->heap_region_containing(g1h->refinement_table()->addr_for(source_card));
+ G1HeapRegion* card_r = g1h->heap_region_containing(g1h->card_table()->addr_for(dest_card));
+ size_t refinement_i = g1h->refinement_table()->index_for_cardvalue(source_card);
+ size_t card_i = g1h->card_table()->index_for_cardvalue(dest_card);
+
+ assert(refinement_r == card_r, "not same region source %u (%zu) dest %u (%zu) ", refinement_r->hrm_index(), refinement_i, card_r->hrm_index(), card_i);
+ assert(refinement_i == card_i, "indexes are not same %zu %zu", refinement_i, card_i);
+#endif
+ }
+
+ void do_dirty_card(CardValue* source_card, CardValue* dest_card) {
+ verify_card_pair_refers_to_same_card(source_card, dest_card);
+
+ G1RemSet::RefineResult res = _rem_set->refine_card_concurrently(source_card, _worker_id);
+ // Gather statistics based on the result.
+ switch (res) {
+ case G1RemSet::HasRefToCSet: {
+ *dest_card = G1CardTable::g1_to_cset_card;
+ _refine_stats.inc_cards_refer_to_cset();
+ break;
+ }
+ case G1RemSet::AlreadyToCSet: {
+ *dest_card = G1CardTable::g1_to_cset_card;
+ _refine_stats.inc_cards_already_refer_to_cset();
+ break;
+ }
+ case G1RemSet::NoCrossRegion: {
+ _refine_stats.inc_cards_no_cross_region();
+ break;
+ }
+ case G1RemSet::CouldNotParse: {
+ // Could not refine - redirty with the original value.
+ *dest_card = *source_card;
+ _refine_stats.inc_cards_not_parsable();
+ break;
+ }
+ case G1RemSet::HasRefToOld : break; // Nothing special to do.
+ }
+ // Clean card on source card table.
+ *source_card = G1CardTable::clean_card_val();
+ }
+
+ void do_claimed_block(CardValue* dirty_l, CardValue* dirty_r, CardValue* dest_card) {
+ for (CardValue* source = dirty_l; source < dirty_r; ++source, ++dest_card) {
+ do_dirty_card(source, dest_card);
+ }
+ }
+
+public:
+ bool _completed;
+ G1ConcurrentRefineStats _refine_stats;
+
+ G1RefineRegionClosure(uint worker_id, G1CardTableClaimTable* scan_state) :
+ G1HeapRegionClosure(),
+ _rem_set(G1CollectedHeap::heap()->rem_set()),
+ _scan_state(scan_state),
+ _worker_id(worker_id),
+ _completed(true),
+ _refine_stats() { }
+
+ bool do_heap_region(G1HeapRegion* r) override {
+
+ if (!has_work(r)) {
+ return false;
+ }
+
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+ if (r->is_young()) {
+ if (_scan_state->claim_all_cards(r->hrm_index()) == 0) {
+ // Clear the pre-dirtying information.
+ r->clear_refinement_table();
+ }
+ return false;
+ }
+
+ G1CardTable* card_table = g1h->card_table();
+ G1CardTable* refinement_table = g1h->refinement_table();
+
+ G1CardTableChunkClaimer claim(_scan_state, r->hrm_index());
+
+ size_t const region_card_base_idx = (size_t)r->hrm_index() << G1HeapRegion::LogCardsPerRegion;
+
+ while (claim.has_next()) {
+ size_t const start_idx = region_card_base_idx + claim.value();
+ CardValue* const start_card = refinement_table->byte_for_index(start_idx);
+ CardValue* const end_card = start_card + claim.size();
+
+ CardValue* dest_card = card_table->byte_for_index(start_idx);
+
+ G1ChunkScanner scanner{start_card, end_card};
+
+ size_t num_dirty_cards = 0;
+ scanner.on_dirty_cards([&] (CardValue* dirty_l, CardValue* dirty_r) {
+ jlong refine_start = os::elapsed_counter();
+
+ do_claimed_block(dirty_l, dirty_r, dest_card + pointer_delta(dirty_l, start_card, sizeof(CardValue)));
+ num_dirty_cards += pointer_delta(dirty_r, dirty_l, sizeof(CardValue));
+
+ _refine_stats.inc_refine_duration(os::elapsed_counter() - refine_start);
+ });
+
+ if (VerifyDuringGC) {
+ for (CardValue* i = start_card; i < end_card; ++i) {
+ guarantee(*i == G1CardTable::clean_card_val(), "must be");
+ }
+ }
+
+ _refine_stats.inc_cards_scanned(claim.size());
+ _refine_stats.inc_cards_clean(claim.size() - num_dirty_cards);
+
+ if (SuspendibleThreadSet::should_yield()) {
+ _completed = false;
+ break;
+ }
+ }
+
+ return !_completed;
+ }
+};
+
+G1ConcurrentRefineSweepTask::G1ConcurrentRefineSweepTask(G1CardTableClaimTable* scan_state,
+ G1ConcurrentRefineStats* stats,
+ uint max_workers) :
+ WorkerTask("G1 Refine Task"),
+ _scan_state(scan_state),
+ _stats(stats),
+ _max_workers(max_workers),
+ _sweep_completed(true)
+{ }
+
+void G1ConcurrentRefineSweepTask::work(uint worker_id) {
+ jlong start = os::elapsed_counter();
+
+ G1RefineRegionClosure sweep_cl(worker_id, _scan_state);
+ _scan_state->heap_region_iterate_from_worker_offset(&sweep_cl, worker_id, _max_workers);
+
+ if (!sweep_cl._completed) {
+ _sweep_completed = false;
+ }
+
+ sweep_cl._refine_stats.inc_sweep_time(os::elapsed_counter() - start);
+ _stats->add_atomic(&sweep_cl._refine_stats);
+}
+
+bool G1ConcurrentRefineSweepTask::sweep_completed() const { return _sweep_completed; }
\ No newline at end of file
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.hpp
new file mode 100644
index 00000000000..bf24c5ae850
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP
+#define SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP
+
+#include "gc/g1/g1ConcurrentRefineStats.hpp"
+#include "gc/shared/workerThread.hpp"
+
+class G1CardTableClaimTable;
+
+class G1ConcurrentRefineSweepTask : public WorkerTask {
+ G1CardTableClaimTable* _scan_state;
+ G1ConcurrentRefineStats* _stats;
+ uint _max_workers;
+ bool _sweep_completed;
+
+public:
+
+ G1ConcurrentRefineSweepTask(G1CardTableClaimTable* scan_state, G1ConcurrentRefineStats* stats, uint max_workers);
+
+ void work(uint worker_id) override;
+
+ bool sweep_completed() const;
+};
+
+#endif /* SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP */
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp
index 2fa19d46093..eccfe466d48 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp
@@ -23,10 +23,13 @@
*/
#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1CardTableClaimTable.inline.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
+#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
#include "logging/log.hpp"
#include "runtime/cpuTimeCounters.hpp"
@@ -38,60 +41,61 @@
#include "utilities/globalDefinitions.hpp"
#include "utilities/ticks.hpp"
-G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
+G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr) :
ConcurrentGCThread(),
- _notifier(Mutex::nosafepoint, FormatBuffer<>("G1 Refine#%d", worker_id), true),
+ _notifier(Mutex::nosafepoint, "G1 Refine Control", true),
_requested_active(false),
- _refinement_stats(),
- _worker_id(worker_id),
_cr(cr)
{
- // set name
- set_name("G1 Refine#%d", worker_id);
+ set_name("G1 Refine Control");
}
void G1ConcurrentRefineThread::run_service() {
- while (wait_for_completed_buffers()) {
+ while (wait_for_work()) {
SuspendibleThreadSetJoiner sts_join;
- G1ConcurrentRefineStats active_stats_start = _refinement_stats;
report_active("Activated");
while (!should_terminate()) {
if (sts_join.should_yield()) {
- report_inactive("Paused", _refinement_stats - active_stats_start);
+ report_inactive("Paused");
sts_join.yield();
// Reset after yield rather than accumulating across yields, else a
// very long running thread could overflow.
- active_stats_start = _refinement_stats;
report_active("Resumed");
- } else if (maybe_deactivate()) {
- break;
+ }
+ // Look if we want to do refinement. If we don't then don't do any refinement
+ // this. This thread may have just woken up but no threads are currently
+ // needed, which is common. In this case we want to just go back to
+ // waiting, with a minimum of fuss; in particular, don't do any "premature"
+ // refinement. However, adjustment may be pending but temporarily
+ // blocked. In that case we wait for adjustment to succeed.
+ Ticks adjust_start = Ticks::now();
+ if (cr()->adjust_num_threads_periodically()) {
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine Cycle");
+ do_refinement();
} else {
- do_refinement_step();
+ log_debug(gc, refine)("Concurrent Refine Adjust Only (#threads wanted: %u adjustment_needed: %s wait_for_heap_lock: %s) %.2fms",
+ cr()->num_threads_wanted(),
+ BOOL_TO_STR(cr()->is_thread_adjustment_needed()),
+ BOOL_TO_STR(cr()->heap_was_locked()),
+ (Ticks::now() - adjust_start).seconds() * MILLIUNITS);
+
+ deactivate();
+ break;
}
}
- report_inactive("Deactivated", _refinement_stats - active_stats_start);
+ report_inactive("Deactivated");
update_perf_counter_cpu_time();
}
- log_debug(gc, refine)("Stopping %d", _worker_id);
+ log_debug(gc, refine)("Stopping %s", name());
}
void G1ConcurrentRefineThread::report_active(const char* reason) const {
- log_trace(gc, refine)("%s worker %u, current: %zu",
- reason,
- _worker_id,
- G1BarrierSet::dirty_card_queue_set().num_cards());
+ log_trace(gc, refine)("%s active (%s)", name(), reason);
}
-void G1ConcurrentRefineThread::report_inactive(const char* reason,
- const G1ConcurrentRefineStats& stats) const {
- log_trace(gc, refine)
- ("%s worker %u, cards: %zu, refined %zu, rate %1.2fc/ms",
- reason,
- _worker_id,
- G1BarrierSet::dirty_card_queue_set().num_cards(),
- stats.refined_cards(),
- stats.refinement_rate_ms());
+void G1ConcurrentRefineThread::report_inactive(const char* reason) const {
+ log_trace(gc, refine)("%s inactive (%s)", name(), reason);
}
void G1ConcurrentRefineThread::activate() {
@@ -103,21 +107,12 @@ void G1ConcurrentRefineThread::activate() {
}
}
-bool G1ConcurrentRefineThread::maybe_deactivate() {
+bool G1ConcurrentRefineThread::deactivate() {
assert(this == Thread::current(), "precondition");
- if (cr()->is_thread_wanted(_worker_id)) {
- return false;
- } else {
- MutexLocker ml(&_notifier, Mutex::_no_safepoint_check_flag);
- bool requested = _requested_active;
- _requested_active = false;
- return !requested; // Deactivate only if not recently requested active.
- }
-}
-
-bool G1ConcurrentRefineThread::try_refinement_step(size_t stop_at) {
- assert(this == Thread::current(), "precondition");
- return _cr->try_refinement_step(_worker_id, stop_at, &_refinement_stats);
+ MutexLocker ml(&_notifier, Mutex::_no_safepoint_check_flag);
+ bool requested = _requested_active;
+ _requested_active = false;
+ return !requested; // Deactivate only if not recently requested active.
}
void G1ConcurrentRefineThread::stop_service() {
@@ -128,23 +123,9 @@ jlong G1ConcurrentRefineThread::cpu_time() {
return os::thread_cpu_time(this);
}
-// The (single) primary thread drives the controller for the refinement threads.
-class G1PrimaryConcurrentRefineThread final : public G1ConcurrentRefineThread {
- bool wait_for_completed_buffers() override;
- bool maybe_deactivate() override;
- void do_refinement_step() override;
- // Updates jstat cpu usage for all refinement threads.
- void update_perf_counter_cpu_time() override;
-
-public:
- G1PrimaryConcurrentRefineThread(G1ConcurrentRefine* cr) :
- G1ConcurrentRefineThread(cr, 0)
- {}
-};
-
-// When inactive, the primary thread periodically wakes up and requests
-// adjustment of the number of active refinement threads.
-bool G1PrimaryConcurrentRefineThread::wait_for_completed_buffers() {
+// When inactive, the control thread periodically wakes up to check if there is
+// refinement work pending.
+bool G1ConcurrentRefineThread::wait_for_work() {
assert(this == Thread::current(), "precondition");
MonitorLocker ml(notifier(), Mutex::_no_safepoint_check_flag);
if (!requested_active() && !should_terminate()) {
@@ -157,78 +138,115 @@ bool G1PrimaryConcurrentRefineThread::wait_for_completed_buffers() {
return !should_terminate();
}
-bool G1PrimaryConcurrentRefineThread::maybe_deactivate() {
- // Don't deactivate while needing to adjust the number of active threads.
- return !cr()->is_thread_adjustment_needed() &&
- G1ConcurrentRefineThread::maybe_deactivate();
+void G1ConcurrentRefineThread::do_refinement() {
+ G1ConcurrentRefineSweepState& state = _cr->sweep_state();
+
+ state.start_work();
+
+ // Swap card tables.
+
+ // 1. Global card table
+ if (!state.swap_global_card_table()) {
+ log_debug(gc, refine)("GC pause after Global Card Table Swap");
+ return;
+ }
+
+ // 2. Java threads
+ if (!state.swap_java_threads_ct()) {
+ log_debug(gc, refine)("GC pause after Java Thread CT swap");
+ return;
+ }
+
+ // 3. GC threads
+ if (!state.swap_gc_threads_ct()) {
+ log_debug(gc, refine)("GC pause after GC Thread CT swap");
+ return;
+ }
+
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ jlong epoch_yield_duration = g1h->yield_duration_in_refinement_epoch();
+ jlong next_epoch_start = os::elapsed_counter();
+
+ jlong total_yield_during_sweep_duration = 0;
+
+ // 4. Snapshot heap.
+ state.snapshot_heap();
+
+ // 5. Sweep refinement table until done
+ bool interrupted_by_gc = false;
+
+ log_info(gc, task)("Concurrent Refine Sweep Using %u of %u Workers", _cr->num_threads_wanted(), _cr->max_num_threads());
+
+ state.sweep_refinement_table_start();
+ while (true) {
+ bool completed = state.sweep_refinement_table_step();
+
+ if (completed) {
+ break;
+ }
+
+ if (SuspendibleThreadSet::should_yield()) {
+ jlong yield_during_sweep_start = os::elapsed_counter();
+ SuspendibleThreadSet::yield();
+
+ // The yielding may have completed the task, check.
+ if (!state.is_in_progress()) {
+ log_debug(gc, refine)("GC completed sweeping, aborting concurrent operation");
+ interrupted_by_gc = true;
+ break;
+ } else {
+ jlong yield_during_sweep_duration = os::elapsed_counter() - yield_during_sweep_start;
+ log_debug(gc, refine)("Yielded from card table sweeping for %.2fms, no GC inbetween, continue",
+ TimeHelper::counter_to_millis(yield_during_sweep_duration));
+ total_yield_during_sweep_duration += yield_during_sweep_duration;
+ }
+ }
+ }
+
+ if (!interrupted_by_gc) {
+ GCTraceTime(Info, gc, refine) tm("Concurrent Refine Complete Work");
+
+ state.add_yield_during_sweep_duration(total_yield_during_sweep_duration);
+
+ state.complete_work(true);
+
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ G1Policy* policy = g1h->policy();
+ G1ConcurrentRefineStats* stats = state.stats();
+ policy->record_refinement_stats(stats);
+
+ {
+ // The young gen revising mechanism reads the predictor and the values set
+ // here. Avoid inconsistencies by locking.
+ MutexLocker x(G1ReviseYoungLength_lock, Mutex::_no_safepoint_check_flag);
+ policy->record_dirtying_stats(TimeHelper::counter_to_millis(G1CollectedHeap::heap()->last_refinement_epoch_start()),
+ TimeHelper::counter_to_millis(next_epoch_start),
+ stats->cards_pending(),
+ TimeHelper::counter_to_millis(epoch_yield_duration),
+ 0 /* pending_cards_from_gc */,
+ stats->cards_to_cset());
+ G1CollectedHeap::heap()->set_last_refinement_epoch_start(next_epoch_start, epoch_yield_duration);
+ }
+ stats->reset();
+ }
}
-void G1PrimaryConcurrentRefineThread::do_refinement_step() {
- // Try adjustment first. If it succeeds then don't do any refinement this
- // round. This thread may have just woken up but no threads are currently
- // needed, which is common. In this case we want to just go back to
- // waiting, with a minimum of fuss; in particular, don't do any "premature"
- // refinement. However, adjustment may be pending but temporarily
- // blocked. In that case we *do* try refinement, rather than possibly
- // uselessly spinning while waiting for adjustment to succeed.
- if (!cr()->adjust_threads_periodically()) {
- // No adjustment, so try refinement, with the target as a cuttoff.
- if (!try_refinement_step(cr()->pending_cards_target())) {
- // Refinement was cut off, so proceed with fewer threads.
- cr()->reduce_threads_wanted();
+void G1ConcurrentRefineThread::update_perf_counter_cpu_time() {
+ // The control thread is responsible for updating the CPU time for all workers.
+ if (UsePerfData) {
+ {
+ ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine);
+ cr()->worker_threads_do(&tttc);
+ }
+ {
+ ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine_control);
+ cr()->control_thread_do(&tttc);
}
}
}
-void G1PrimaryConcurrentRefineThread::update_perf_counter_cpu_time() {
- if (UsePerfData) {
- ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine);
- cr()->threads_do(&tttc);
- }
-}
-
-class G1SecondaryConcurrentRefineThread final : public G1ConcurrentRefineThread {
- bool wait_for_completed_buffers() override;
- void do_refinement_step() override;
- void update_perf_counter_cpu_time() override { /* Nothing to do. The primary thread does all the work. */ }
-
-public:
- G1SecondaryConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
- G1ConcurrentRefineThread(cr, worker_id)
- {
- assert(worker_id > 0, "precondition");
- }
-};
-
-bool G1SecondaryConcurrentRefineThread::wait_for_completed_buffers() {
- assert(this == Thread::current(), "precondition");
- MonitorLocker ml(notifier(), Mutex::_no_safepoint_check_flag);
- while (!requested_active() && !should_terminate()) {
- ml.wait();
- }
- return !should_terminate();
-}
-
-void G1SecondaryConcurrentRefineThread::do_refinement_step() {
- assert(this == Thread::current(), "precondition");
- // Secondary threads ignore the target and just drive the number of pending
- // dirty cards down. The primary thread is responsible for noticing the
- // target has been reached and reducing the number of wanted threads. This
- // makes the control of wanted threads all under the primary, while avoiding
- // useless spinning by secondary threads until the primary thread notices.
- // (Useless spinning is still possible if there are no pending cards, but
- // that should rarely happen.)
- try_refinement_step(0);
-}
-
-G1ConcurrentRefineThread*
-G1ConcurrentRefineThread::create(G1ConcurrentRefine* cr, uint worker_id) {
- G1ConcurrentRefineThread* crt;
- if (worker_id == 0) {
- crt = new (std::nothrow) G1PrimaryConcurrentRefineThread(cr);
- } else {
- crt = new (std::nothrow) G1SecondaryConcurrentRefineThread(cr, worker_id);
- }
+G1ConcurrentRefineThread* G1ConcurrentRefineThread::create(G1ConcurrentRefine* cr) {
+ G1ConcurrentRefineThread* crt = new (std::nothrow) G1ConcurrentRefineThread(cr);
if (crt != nullptr) {
crt->create_and_start();
}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp
index b1e34e4b78d..8e635247cd3 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp
@@ -33,8 +33,8 @@
// Forward Decl.
class G1ConcurrentRefine;
-// One or more G1 Concurrent Refinement Threads may be active if concurrent
-// refinement is in progress.
+// Concurrent refinement control thread watching card mark accrual on the card table
+// and starting refinement work.
class G1ConcurrentRefineThread: public ConcurrentGCThread {
friend class VMStructs;
friend class G1CollectedHeap;
@@ -42,43 +42,34 @@ class G1ConcurrentRefineThread: public ConcurrentGCThread {
Monitor _notifier;
bool _requested_active;
- G1ConcurrentRefineStats _refinement_stats;
-
uint _worker_id;
G1ConcurrentRefine* _cr;
NONCOPYABLE(G1ConcurrentRefineThread);
-protected:
- G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id);
+ G1ConcurrentRefineThread(G1ConcurrentRefine* cr);
Monitor* notifier() { return &_notifier; }
bool requested_active() const { return _requested_active; }
// Returns !should_terminate().
// precondition: this is the current thread.
- virtual bool wait_for_completed_buffers() = 0;
+ bool wait_for_work();
// Deactivate if appropriate. Returns true if deactivated.
// precondition: this is the current thread.
- virtual bool maybe_deactivate();
+ bool deactivate();
- // Attempt to do some refinement work.
- // precondition: this is the current thread.
- virtual void do_refinement_step() = 0;
+ // Swap card table and do a complete re-examination/refinement pass over the
+ // refinement table.
+ void do_refinement();
// Update concurrent refine threads cpu time stats.
- virtual void update_perf_counter_cpu_time() = 0;
-
- // Helper for do_refinement_step implementations. Try to perform some
- // refinement work, limited by stop_at. Returns true if any refinement work
- // was performed, false if no work available per stop_at.
- // precondition: this is the current thread.
- bool try_refinement_step(size_t stop_at);
+ void update_perf_counter_cpu_time();
void report_active(const char* reason) const;
- void report_inactive(const char* reason, const G1ConcurrentRefineStats& stats) const;
+ void report_inactive(const char* reason) const;
G1ConcurrentRefine* cr() const { return _cr; }
@@ -86,23 +77,12 @@ protected:
void stop_service() override;
public:
- static G1ConcurrentRefineThread* create(G1ConcurrentRefine* cr, uint worker_id);
- virtual ~G1ConcurrentRefineThread() = default;
-
- uint worker_id() const { return _worker_id; }
+ static G1ConcurrentRefineThread* create(G1ConcurrentRefine* cr);
// Activate this thread.
// precondition: this is not the current thread.
void activate();
- G1ConcurrentRefineStats* refinement_stats() {
- return &_refinement_stats;
- }
-
- const G1ConcurrentRefineStats* refinement_stats() const {
- return &_refinement_stats;
- }
-
// Total cpu time spent in this thread so far.
jlong cpu_time();
};
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineThreadsNeeded.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineThreadsNeeded.cpp
index d34229bd359..3ab26bd72af 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThreadsNeeded.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThreadsNeeded.cpp
@@ -45,48 +45,22 @@ G1ConcurrentRefineThreadsNeeded::G1ConcurrentRefineThreadsNeeded(G1Policy* polic
//
// 1. Minimize the number of refinement threads running at once.
//
-// 2. Minimize the number of activations and deactivations for the
-// refinement threads that run.
-//
-// 3. Delay performing refinement work. Having more dirty cards waiting to
+// 2. Delay performing refinement work. Having more dirty cards waiting to
// be refined can be beneficial, as further writes to the same card don't
// create more work.
void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
size_t available_bytes,
size_t num_cards,
size_t target_num_cards) {
+ _predicted_time_until_next_gc_ms = _policy->predict_time_to_next_gc_ms(available_bytes);
+
+ // Estimate number of cards that need to be processed before next GC.
const G1Analytics* analytics = _policy->analytics();
- // Estimate time until next GC, based on remaining bytes available for
- // allocation and the allocation rate.
- double alloc_region_rate = analytics->predict_alloc_rate_ms();
- double alloc_bytes_rate = alloc_region_rate * G1HeapRegion::GrainBytes;
- if (alloc_bytes_rate == 0.0) {
- // A zero rate indicates we don't yet have data to use for predictions.
- // Since we don't have any idea how long until the next GC, use a time of
- // zero.
- _predicted_time_until_next_gc_ms = 0.0;
- } else {
- // If the heap size is large and the allocation rate is small, we can get
- // a predicted time until next GC that is so large it can cause problems
- // (such as overflow) in other calculations. Limit the prediction to one
- // hour, which is still large in this context.
- const double one_hour_ms = 60.0 * 60.0 * MILLIUNITS;
- double raw_time_ms = available_bytes / alloc_bytes_rate;
- _predicted_time_until_next_gc_ms = MIN2(raw_time_ms, one_hour_ms);
- }
+ double incoming_rate = analytics->predict_dirtied_cards_rate_ms();
+ double raw_cards = incoming_rate * _predicted_time_until_next_gc_ms;
+ size_t incoming_cards = static_cast(raw_cards);
- // Estimate number of cards that need to be processed before next GC. There
- // are no incoming cards when time is short, because in that case the
- // controller activates refinement by mutator threads to stay on target even
- // if threads deactivate in the meantime. This also covers the case of not
- // having a real prediction of time until GC.
- size_t incoming_cards = 0;
- if (_predicted_time_until_next_gc_ms > _update_period_ms) {
- double incoming_rate = analytics->predict_dirtied_cards_rate_ms();
- double raw_cards = incoming_rate * _predicted_time_until_next_gc_ms;
- incoming_cards = static_cast(raw_cards);
- }
size_t total_cards = num_cards + incoming_cards;
_predicted_cards_at_next_gc = total_cards;
@@ -100,9 +74,8 @@ void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
// The calculation of the number of threads needed isn't very stable when
// time is short, and can lead to starting up lots of threads for not much
// profit. If we're in the last update period, don't change the number of
- // threads running, other than to treat the current thread as running. That
- // might not be sufficient, but hopefully we were already reasonably close.
- // We won't accumulate more because mutator refinement will be activated.
+ // threads needed. That might not be sufficient, but hopefully we were
+ // already reasonably close.
if (_predicted_time_until_next_gc_ms <= _update_period_ms) {
_threads_needed = MAX2(active_threads, 1u);
return;
@@ -133,11 +106,12 @@ void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
// close to the next GC we want to drive toward the target, so round up
// then. The rest of the time we round to nearest, trying to remain near
// the middle of the range.
+ double rthreads = nthreads;
if (_predicted_time_until_next_gc_ms <= _update_period_ms * 5.0) {
- nthreads = ::ceil(nthreads);
+ rthreads = ::ceil(nthreads);
} else {
- nthreads = ::round(nthreads);
+ rthreads = ::round(nthreads);
}
- _threads_needed = static_cast(MIN2(nthreads, UINT_MAX));
+ _threads_needed = static_cast(MIN2(rthreads, UINT_MAX));
}
diff --git a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
deleted file mode 100644
index ec9d68af3bb..00000000000
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
+++ /dev/null
@@ -1,599 +0,0 @@
-/*
- * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "gc/g1/g1BarrierSet.inline.hpp"
-#include "gc/g1/g1CardTableEntryClosure.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/g1ConcurrentRefineStats.hpp"
-#include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
-#include "gc/g1/g1FreeIdSet.hpp"
-#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
-#include "gc/g1/g1RedirtyCardsQueue.hpp"
-#include "gc/g1/g1RemSet.hpp"
-#include "gc/g1/g1ThreadLocalData.hpp"
-#include "gc/shared/bufferNode.hpp"
-#include "gc/shared/bufferNodeList.hpp"
-#include "gc/shared/suspendibleThreadSet.hpp"
-#include "memory/iterator.hpp"
-#include "runtime/atomicAccess.hpp"
-#include "runtime/javaThread.hpp"
-#include "runtime/mutex.hpp"
-#include "runtime/mutexLocker.hpp"
-#include "runtime/os.hpp"
-#include "runtime/safepoint.hpp"
-#include "runtime/threads.hpp"
-#include "runtime/threadSMR.hpp"
-#include "utilities/globalCounter.inline.hpp"
-#include "utilities/macros.hpp"
-#include "utilities/nonblockingQueue.inline.hpp"
-#include "utilities/pair.hpp"
-#include "utilities/quickSort.hpp"
-#include "utilities/ticks.hpp"
-
-G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
- PtrQueue(qset),
- _refinement_stats(new G1ConcurrentRefineStats())
-{ }
-
-G1DirtyCardQueue::~G1DirtyCardQueue() {
- delete _refinement_stats;
-}
-
-// Assumed to be zero by concurrent threads.
-static uint par_ids_start() { return 0; }
-
-G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) :
- PtrQueueSet(allocator),
- _num_cards(0),
- _mutator_refinement_threshold(SIZE_MAX),
- _completed(),
- _paused(),
- _free_ids(par_ids_start(), num_par_ids()),
- _detached_refinement_stats()
-{}
-
-G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
- abandon_completed_buffers();
-}
-
-// Determines how many mutator threads can process the buffers in parallel.
-uint G1DirtyCardQueueSet::num_par_ids() {
- return (uint)os::initial_active_processor_count();
-}
-
-void G1DirtyCardQueueSet::flush_queue(G1DirtyCardQueue& queue) {
- if (queue.buffer() != nullptr) {
- G1ConcurrentRefineStats* stats = queue.refinement_stats();
- stats->inc_dirtied_cards(queue.size());
- }
- PtrQueueSet::flush_queue(queue);
-}
-
-void G1DirtyCardQueueSet::enqueue(G1DirtyCardQueue& queue,
- volatile CardValue* card_ptr) {
- CardValue* value = const_cast(card_ptr);
- if (!try_enqueue(queue, value)) {
- handle_zero_index(queue);
- retry_enqueue(queue, value);
- }
-}
-
-void G1DirtyCardQueueSet::handle_zero_index(G1DirtyCardQueue& queue) {
- assert(queue.index() == 0, "precondition");
- BufferNode* old_node = exchange_buffer_with_new(queue);
- if (old_node != nullptr) {
- assert(old_node->index() == 0, "invariant");
- G1ConcurrentRefineStats* stats = queue.refinement_stats();
- stats->inc_dirtied_cards(old_node->capacity());
- handle_completed_buffer(old_node, stats);
- }
-}
-
-void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
- G1BarrierSet::dirty_card_queue_set().handle_zero_index(queue);
-}
-
-size_t G1DirtyCardQueueSet::num_cards() const {
- return AtomicAccess::load(&_num_cards);
-}
-
-void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
- assert(cbn != nullptr, "precondition");
- // Increment _num_cards before adding to queue, so queue removal doesn't
- // need to deal with _num_cards possibly going negative.
- AtomicAccess::add(&_num_cards, cbn->size());
- // Perform push in CS. The old tail may be popped while the push is
- // observing it (attaching it to the new buffer). We need to ensure it
- // can't be reused until the push completes, to avoid ABA problems.
- GlobalCounter::CriticalSection cs(Thread::current());
- _completed.push(*cbn);
-}
-
-// Thread-safe attempt to remove and return the first buffer from
-// the _completed queue, using the NonblockingQueue::try_pop() underneath.
-// It has a limitation that it may return null when there are objects
-// in the queue if there is a concurrent push/append operation.
-BufferNode* G1DirtyCardQueueSet::dequeue_completed_buffer() {
- Thread* current_thread = Thread::current();
- BufferNode* result = nullptr;
- while (true) {
- // Use GlobalCounter critical section to avoid ABA problem.
- // The release of a buffer to its allocator's free list uses
- // GlobalCounter::write_synchronize() to coordinate with this
- // dequeuing operation.
- // We use a CS per iteration, rather than over the whole loop,
- // because we're not guaranteed to make progress. Lingering in
- // one CS could defer releasing buffer to the free list for reuse,
- // leading to excessive allocations.
- GlobalCounter::CriticalSection cs(current_thread);
- if (_completed.try_pop(&result)) return result;
- }
-}
-
-BufferNode* G1DirtyCardQueueSet::get_completed_buffer() {
- BufferNode* result = dequeue_completed_buffer();
- if (result == nullptr) { // Unlikely if no paused buffers.
- enqueue_previous_paused_buffers();
- result = dequeue_completed_buffer();
- if (result == nullptr) return nullptr;
- }
- AtomicAccess::sub(&_num_cards, result->size());
- return result;
-}
-
-#ifdef ASSERT
-void G1DirtyCardQueueSet::verify_num_cards() const {
- size_t actual = 0;
- for (BufferNode* cur = _completed.first();
- !_completed.is_end(cur);
- cur = cur->next()) {
- actual += cur->size();
- }
- assert(actual == AtomicAccess::load(&_num_cards),
- "Num entries in completed buffers should be %zu but are %zu",
- AtomicAccess::load(&_num_cards), actual);
-}
-#endif // ASSERT
-
-G1DirtyCardQueueSet::PausedBuffers::PausedList::PausedList() :
- _head(nullptr), _tail(nullptr),
- _safepoint_id(SafepointSynchronize::safepoint_id())
-{}
-
-#ifdef ASSERT
-G1DirtyCardQueueSet::PausedBuffers::PausedList::~PausedList() {
- assert(AtomicAccess::load(&_head) == nullptr, "precondition");
- assert(_tail == nullptr, "precondition");
-}
-#endif // ASSERT
-
-bool G1DirtyCardQueueSet::PausedBuffers::PausedList::is_next() const {
- assert_not_at_safepoint();
- return _safepoint_id == SafepointSynchronize::safepoint_id();
-}
-
-void G1DirtyCardQueueSet::PausedBuffers::PausedList::add(BufferNode* node) {
- assert_not_at_safepoint();
- assert(is_next(), "precondition");
- BufferNode* old_head = AtomicAccess::xchg(&_head, node);
- if (old_head == nullptr) {
- assert(_tail == nullptr, "invariant");
- _tail = node;
- } else {
- node->set_next(old_head);
- }
-}
-
-G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::PausedList::take() {
- BufferNode* head = AtomicAccess::load(&_head);
- BufferNode* tail = _tail;
- AtomicAccess::store(&_head, (BufferNode*)nullptr);
- _tail = nullptr;
- return HeadTail(head, tail);
-}
-
-G1DirtyCardQueueSet::PausedBuffers::PausedBuffers() : _plist(nullptr) {}
-
-#ifdef ASSERT
-G1DirtyCardQueueSet::PausedBuffers::~PausedBuffers() {
- assert(AtomicAccess::load(&_plist) == nullptr, "invariant");
-}
-#endif // ASSERT
-
-void G1DirtyCardQueueSet::PausedBuffers::add(BufferNode* node) {
- assert_not_at_safepoint();
- PausedList* plist = AtomicAccess::load_acquire(&_plist);
- if (plist == nullptr) {
- // Try to install a new next list.
- plist = new PausedList();
- PausedList* old_plist = AtomicAccess::cmpxchg(&_plist, (PausedList*)nullptr, plist);
- if (old_plist != nullptr) {
- // Some other thread installed a new next list. Use it instead.
- delete plist;
- plist = old_plist;
- }
- }
- assert(plist->is_next(), "invariant");
- plist->add(node);
-}
-
-G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_previous() {
- assert_not_at_safepoint();
- PausedList* previous;
- {
- // Deal with plist in a critical section, to prevent it from being
- // deleted out from under us by a concurrent take_previous().
- GlobalCounter::CriticalSection cs(Thread::current());
- previous = AtomicAccess::load_acquire(&_plist);
- if ((previous == nullptr) || // Nothing to take.
- previous->is_next() || // Not from a previous safepoint.
- // Some other thread stole it.
- (AtomicAccess::cmpxchg(&_plist, previous, (PausedList*)nullptr) != previous)) {
- return HeadTail();
- }
- }
- // We now own previous.
- HeadTail result = previous->take();
- // There might be other threads examining previous (in concurrent
- // take_previous()). Synchronize to wait until any such threads are
- // done with such examination before deleting.
- GlobalCounter::write_synchronize();
- delete previous;
- return result;
-}
-
-G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_all() {
- assert_at_safepoint();
- HeadTail result;
- PausedList* plist = AtomicAccess::load(&_plist);
- if (plist != nullptr) {
- AtomicAccess::store(&_plist, (PausedList*)nullptr);
- result = plist->take();
- delete plist;
- }
- return result;
-}
-
-void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) {
- assert_not_at_safepoint();
- assert(node->next() == nullptr, "precondition");
- // Ensure there aren't any paused buffers from a previous safepoint.
- enqueue_previous_paused_buffers();
- // Cards for paused buffers are included in count, to contribute to
- // notification checking after the coming safepoint if it doesn't GC.
- // Note that this means the queue's _num_cards differs from the number
- // of cards in the queued buffers when there are paused buffers.
- AtomicAccess::add(&_num_cards, node->size());
- _paused.add(node);
-}
-
-void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(const HeadTail& paused) {
- if (paused._head != nullptr) {
- assert(paused._tail != nullptr, "invariant");
- // Cards from paused buffers are already recorded in the queue count.
- _completed.append(*paused._head, *paused._tail);
- }
-}
-
-void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() {
- assert_not_at_safepoint();
- enqueue_paused_buffers_aux(_paused.take_previous());
-}
-
-void G1DirtyCardQueueSet::enqueue_all_paused_buffers() {
- assert_at_safepoint();
- enqueue_paused_buffers_aux(_paused.take_all());
-}
-
-void G1DirtyCardQueueSet::abandon_completed_buffers() {
- BufferNodeList list = take_all_completed_buffers();
- BufferNode* buffers_to_delete = list._head;
- while (buffers_to_delete != nullptr) {
- BufferNode* bn = buffers_to_delete;
- buffers_to_delete = bn->next();
- bn->set_next(nullptr);
- deallocate_buffer(bn);
- }
-}
-
-// Merge lists of buffers. The source queue set is emptied as a
-// result. The queue sets must share the same allocator.
-void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
- assert(allocator() == src->allocator(), "precondition");
- const BufferNodeList from = src->take_all_completed_buffers();
- if (from._head != nullptr) {
- AtomicAccess::add(&_num_cards, from._entry_count);
- _completed.append(*from._head, *from._tail);
- }
-}
-
-BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
- enqueue_all_paused_buffers();
- verify_num_cards();
- Pair pair = _completed.take_all();
- size_t num_cards = AtomicAccess::load(&_num_cards);
- AtomicAccess::store(&_num_cards, size_t(0));
- return BufferNodeList(pair.first, pair.second, num_cards);
-}
-
-class G1RefineBufferedCards : public StackObj {
- BufferNode* const _node;
- CardTable::CardValue** const _node_buffer;
- const size_t _node_buffer_capacity;
- const uint _worker_id;
- G1ConcurrentRefineStats* _stats;
- G1RemSet* const _g1rs;
-
- static inline ptrdiff_t compare_cards(const CardTable::CardValue* p1,
- const CardTable::CardValue* p2) {
- return p2 - p1;
- }
-
- // Sorts the cards from start_index to _node_buffer_capacity in *decreasing*
- // address order. Tests showed that this order is preferable to not sorting
- // or increasing address order.
- void sort_cards(size_t start_index) {
- QuickSort::sort(&_node_buffer[start_index],
- _node_buffer_capacity - start_index,
- compare_cards);
- }
-
- // Returns the index to the first clean card in the buffer.
- size_t clean_cards() {
- const size_t start = _node->index();
- assert(start <= _node_buffer_capacity, "invariant");
-
- // Two-fingered compaction algorithm similar to the filtering mechanism in
- // SATBMarkQueue. The main difference is that clean_card_before_refine()
- // could change the buffer element in-place.
- // We don't check for SuspendibleThreadSet::should_yield(), because
- // cleaning and redirtying the cards is fast.
- CardTable::CardValue** src = &_node_buffer[start];
- CardTable::CardValue** dst = &_node_buffer[_node_buffer_capacity];
- assert(src <= dst, "invariant");
- for ( ; src < dst; ++src) {
- // Search low to high for a card to keep.
- if (_g1rs->clean_card_before_refine(src)) {
- // Found keeper. Search high to low for a card to discard.
- while (src < --dst) {
- if (!_g1rs->clean_card_before_refine(dst)) {
- *dst = *src; // Replace discard with keeper.
- break;
- }
- }
- // If discard search failed (src == dst), the outer loop will also end.
- }
- }
-
- // dst points to the first retained clean card, or the end of the buffer
- // if all the cards were discarded.
- const size_t first_clean = dst - _node_buffer;
- assert(first_clean >= start && first_clean <= _node_buffer_capacity, "invariant");
- // Discarded cards are considered as refined.
- _stats->inc_refined_cards(first_clean - start);
- _stats->inc_precleaned_cards(first_clean - start);
- return first_clean;
- }
-
- bool refine_cleaned_cards(size_t start_index) {
- bool result = true;
- size_t i = start_index;
- for ( ; i < _node_buffer_capacity; ++i) {
- if (SuspendibleThreadSet::should_yield()) {
- redirty_unrefined_cards(i);
- result = false;
- break;
- }
- _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
- }
- _node->set_index(i);
- _stats->inc_refined_cards(i - start_index);
- return result;
- }
-
- void redirty_unrefined_cards(size_t start) {
- for ( ; start < _node_buffer_capacity; ++start) {
- *_node_buffer[start] = G1CardTable::dirty_card_val();
- }
- }
-
-public:
- G1RefineBufferedCards(BufferNode* node,
- uint worker_id,
- G1ConcurrentRefineStats* stats) :
- _node(node),
- _node_buffer(reinterpret_cast(BufferNode::make_buffer_from_node(node))),
- _node_buffer_capacity(node->capacity()),
- _worker_id(worker_id),
- _stats(stats),
- _g1rs(G1CollectedHeap::heap()->rem_set()) {}
-
- bool refine() {
- size_t first_clean_index = clean_cards();
- if (first_clean_index == _node_buffer_capacity) {
- _node->set_index(first_clean_index);
- return true;
- }
- // This fence serves two purposes. First, the cards must be cleaned
- // before processing the contents. Second, we can't proceed with
- // processing a region until after the read of the region's top in
- // collect_and_clean_cards(), for synchronization with possibly concurrent
- // humongous object allocation (see comment at the StoreStore fence before
- // setting the regions' tops in humongous allocation path).
- // It's okay that reading region's top and reading region's type were racy
- // wrto each other. We need both set, in any order, to proceed.
- OrderAccess::fence();
- sort_cards(first_clean_index);
- return refine_cleaned_cards(first_clean_index);
- }
-};
-
-bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
- uint worker_id,
- G1ConcurrentRefineStats* stats) {
- Ticks start_time = Ticks::now();
- G1RefineBufferedCards buffered_cards(node, worker_id, stats);
- bool result = buffered_cards.refine();
- stats->inc_refinement_time(Ticks::now() - start_time);
- return result;
-}
-
-void G1DirtyCardQueueSet::handle_refined_buffer(BufferNode* node,
- bool fully_processed) {
- if (fully_processed) {
- assert(node->is_empty(), "Buffer not fully consumed: index: %zu, size: %zu",
- node->index(), node->capacity());
- deallocate_buffer(node);
- } else {
- assert(!node->is_empty(), "Buffer fully consumed.");
- // Buffer incompletely processed because there is a pending safepoint.
- // Record partially processed buffer, to be finished later.
- record_paused_buffer(node);
- }
-}
-
-void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node,
- G1ConcurrentRefineStats* stats) {
- enqueue_completed_buffer(new_node);
-
- // No need for mutator refinement if number of cards is below limit.
- if (AtomicAccess::load(&_num_cards) <= AtomicAccess::load(&_mutator_refinement_threshold)) {
- return;
- }
-
- // Don't try to process a buffer that will just get immediately paused.
- // When going into a safepoint it's just a waste of effort.
- // When coming out of a safepoint, Java threads may be running before the
- // yield request (for non-Java threads) has been cleared.
- if (SuspendibleThreadSet::should_yield()) {
- return;
- }
-
- // Only Java threads perform mutator refinement.
- if (!Thread::current()->is_Java_thread()) {
- return;
- }
-
- BufferNode* node = get_completed_buffer();
- if (node == nullptr) return; // Didn't get a buffer to process.
-
- // Refine cards in buffer.
-
- uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
- bool fully_processed = refine_buffer(node, worker_id, stats);
- _free_ids.release_par_id(worker_id); // release the id
-
- // Deal with buffer after releasing id, to let another thread use id.
- handle_refined_buffer(node, fully_processed);
-}
-
-bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
- size_t stop_at,
- G1ConcurrentRefineStats* stats) {
- // Not enough cards to trigger processing.
- if (AtomicAccess::load(&_num_cards) <= stop_at) return false;
-
- BufferNode* node = get_completed_buffer();
- if (node == nullptr) return false; // Didn't get a buffer to process.
-
- bool fully_processed = refine_buffer(node, worker_id, stats);
- handle_refined_buffer(node, fully_processed);
- return true;
-}
-
-void G1DirtyCardQueueSet::abandon_logs_and_stats() {
- assert_at_safepoint();
-
- // Disable mutator refinement until concurrent refinement decides otherwise.
- set_mutator_refinement_threshold(SIZE_MAX);
-
- // Iterate over all the threads, resetting per-thread queues and stats.
- struct AbandonThreadLogClosure : public ThreadClosure {
- G1DirtyCardQueueSet& _qset;
- AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
- virtual void do_thread(Thread* t) {
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
- _qset.reset_queue(queue);
- queue.refinement_stats()->reset();
- }
- } closure(*this);
- Threads::threads_do(&closure);
-
- enqueue_all_paused_buffers();
- abandon_completed_buffers();
-
- // Reset stats from detached threads.
- MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
- _detached_refinement_stats.reset();
-}
-
-void G1DirtyCardQueueSet::update_refinement_stats(G1ConcurrentRefineStats& stats) {
- assert_at_safepoint();
-
- _concatenated_refinement_stats = stats;
-
- enqueue_all_paused_buffers();
- verify_num_cards();
-
- // Collect and reset stats from detached threads.
- MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
- _concatenated_refinement_stats += _detached_refinement_stats;
- _detached_refinement_stats.reset();
-}
-
-G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenate_log_and_stats(Thread* thread) {
- assert_at_safepoint();
-
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
- // Flush the buffer if non-empty. Flush before accumulating and
- // resetting stats, since flushing may modify the stats.
- if (!queue.is_empty()) {
- flush_queue(queue);
- }
-
- G1ConcurrentRefineStats result = *queue.refinement_stats();
- queue.refinement_stats()->reset();
- return result;
-}
-
-G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
- assert_at_safepoint();
- return _concatenated_refinement_stats;
-}
-
-void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) {
- MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
- _detached_refinement_stats += *stats;
- stats->reset();
-}
-
-size_t G1DirtyCardQueueSet::mutator_refinement_threshold() const {
- return AtomicAccess::load(&_mutator_refinement_threshold);
-}
-
-void G1DirtyCardQueueSet::set_mutator_refinement_threshold(size_t value) {
- AtomicAccess::store(&_mutator_refinement_threshold, value);
-}
diff --git a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
deleted file mode 100644
index 6beb536df87..00000000000
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
-#define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
-
-#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1ConcurrentRefineStats.hpp"
-#include "gc/g1/g1FreeIdSet.hpp"
-#include "gc/shared/bufferNode.hpp"
-#include "gc/shared/bufferNodeList.hpp"
-#include "gc/shared/ptrQueue.hpp"
-#include "memory/allocation.hpp"
-#include "memory/padded.hpp"
-#include "utilities/nonblockingQueue.hpp"
-
-class G1PrimaryConcurrentRefineThread;
-class G1DirtyCardQueueSet;
-class G1RedirtyCardsQueueSet;
-class Thread;
-
-// A ptrQueue whose elements are "oops", pointers to object heads.
-class G1DirtyCardQueue: public PtrQueue {
- G1ConcurrentRefineStats* _refinement_stats;
-
-public:
- G1DirtyCardQueue(G1DirtyCardQueueSet* qset);
-
- // Flush before destroying; queue may be used to capture pending work while
- // doing something else, with auto-flush on completion.
- ~G1DirtyCardQueue();
-
- G1ConcurrentRefineStats* refinement_stats() const {
- return _refinement_stats;
- }
-
- // Compiler support.
- static ByteSize byte_offset_of_index() {
- return PtrQueue::byte_offset_of_index();
- }
- using PtrQueue::byte_width_of_index;
-
- static ByteSize byte_offset_of_buf() {
- return PtrQueue::byte_offset_of_buf();
- }
- using PtrQueue::byte_width_of_buf;
-
-};
-
-class G1DirtyCardQueueSet: public PtrQueueSet {
- // Head and tail of a list of BufferNodes, linked through their next()
- // fields. Similar to BufferNodeList, but without the _entry_count.
- struct HeadTail {
- BufferNode* _head;
- BufferNode* _tail;
- HeadTail() : _head(nullptr), _tail(nullptr) {}
- HeadTail(BufferNode* head, BufferNode* tail) : _head(head), _tail(tail) {}
- };
-
- // Concurrent refinement may stop processing in the middle of a buffer if
- // there is a pending safepoint, to avoid long delays to safepoint. A
- // partially processed buffer needs to be recorded for processing by the
- // safepoint if it's a GC safepoint; otherwise it needs to be recorded for
- // further concurrent refinement work after the safepoint. But if the
- // buffer was obtained from the completed buffer queue then it can't simply
- // be added back to the queue, as that would introduce a new source of ABA
- // for the queue.
- //
- // The PausedBuffer object is used to record such buffers for the upcoming
- // safepoint, and provides access to the buffers recorded for previous
- // safepoints. Before obtaining a buffer from the completed buffers queue,
- // we first transfer any buffers from previous safepoints to the queue.
- // This is ABA-safe because threads cannot be in the midst of a queue pop
- // across a safepoint.
- //
- // The paused buffers are conceptually an extension of the completed buffers
- // queue, and operations which need to deal with all of the queued buffers
- // (such as concatenating or abandoning logs) also need to deal with any
- // paused buffers. In general, if a safepoint performs a GC then the paused
- // buffers will be processed as part of it, and there won't be any paused
- // buffers after a GC safepoint.
- class PausedBuffers {
- class PausedList : public CHeapObj {
- BufferNode* volatile _head;
- BufferNode* _tail;
- size_t _safepoint_id;
-
- NONCOPYABLE(PausedList);
-
- public:
- PausedList();
- DEBUG_ONLY(~PausedList();)
-
- // Return true if this list was created to hold buffers for the
- // next safepoint.
- // precondition: not at safepoint.
- bool is_next() const;
-
- // Thread-safe add the buffer to the list.
- // precondition: not at safepoint.
- // precondition: is_next().
- void add(BufferNode* node);
-
- // Take all the buffers from the list. Not thread-safe.
- HeadTail take();
- };
-
- // The most recently created list, which might be for either the next or
- // a previous safepoint, or might be null if the next list hasn't been
- // created yet. We only need one list because of the requirement that
- // threads calling add() must first ensure there are no paused buffers
- // from a previous safepoint. There might be many list instances existing
- // at the same time though; there can be many threads competing to create
- // and install the next list, and meanwhile there can be a thread dealing
- // with the previous list.
- PausedList* volatile _plist;
- DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(PausedList*));
-
- NONCOPYABLE(PausedBuffers);
-
- public:
- PausedBuffers();
- DEBUG_ONLY(~PausedBuffers();)
-
- // Thread-safe add the buffer to paused list for next safepoint.
- // precondition: not at safepoint.
- // precondition: does not have paused buffers from a previous safepoint.
- void add(BufferNode* node);
-
- // Thread-safe take all paused buffers for previous safepoints.
- // precondition: not at safepoint.
- HeadTail take_previous();
-
- // Take all the paused buffers.
- // precondition: at safepoint.
- HeadTail take_all();
- };
-
- DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
- // Upper bound on the number of cards in the completed and paused buffers.
- volatile size_t _num_cards;
- DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(size_t));
- // If the queue contains more cards than configured here, the
- // mutator must start doing some of the concurrent refinement work.
- volatile size_t _mutator_refinement_threshold;
- DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(size_t));
- // Buffers ready for refinement.
- // NonblockingQueue has inner padding of one cache line.
- NonblockingQueue _completed;
- // Add a trailer padding after NonblockingQueue.
- DEFINE_PAD_MINUS_SIZE(3, DEFAULT_PADDING_SIZE, sizeof(BufferNode*));
- // Buffers for which refinement is temporarily paused.
- // PausedBuffers has inner padding, including trailer.
- PausedBuffers _paused;
-
- G1FreeIdSet _free_ids;
-
- G1ConcurrentRefineStats _concatenated_refinement_stats;
- G1ConcurrentRefineStats _detached_refinement_stats;
-
- // Verify _num_cards == sum of cards in the completed queue.
- void verify_num_cards() const NOT_DEBUG_RETURN;
-
- // Thread-safe add a buffer to paused list for next safepoint.
- // precondition: not at safepoint.
- void record_paused_buffer(BufferNode* node);
- void enqueue_paused_buffers_aux(const HeadTail& paused);
- // Thread-safe transfer paused buffers for previous safepoints to the queue.
- // precondition: not at safepoint.
- void enqueue_previous_paused_buffers();
- // Transfer all paused buffers to the queue.
- // precondition: at safepoint.
- void enqueue_all_paused_buffers();
-
- void abandon_completed_buffers();
-
- // Refine the cards in "node" from its index to buffer_capacity.
- // Stops processing if SuspendibleThreadSet::should_yield() is true.
- // Returns true if the entire buffer was processed, false if there
- // is a pending yield request. The node's index is updated to exclude
- // the processed elements, e.g. up to the element before processing
- // stopped, or one past the last element if the entire buffer was
- // processed. Updates stats.
- bool refine_buffer(BufferNode* node,
- uint worker_id,
- G1ConcurrentRefineStats* stats);
-
- // Deal with buffer after a call to refine_buffer. If fully processed,
- // deallocate the buffer. Otherwise, record it as paused.
- void handle_refined_buffer(BufferNode* node, bool fully_processed);
-
- // Thread-safe attempt to remove and return the first buffer from
- // the _completed queue.
- // Returns null if the queue is empty, or if a concurrent push/append
- // interferes. It uses GlobalCounter critical section to avoid ABA problem.
- BufferNode* dequeue_completed_buffer();
- // Remove and return a completed buffer from the list, or return null
- // if none available.
- BufferNode* get_completed_buffer();
-
- // Called when queue is full or has no buffer.
- void handle_zero_index(G1DirtyCardQueue& queue);
-
- // Enqueue the buffer, and optionally perform refinement by the mutator.
- // Mutator refinement is only done by Java threads, and only if there
- // are more than mutator_refinement_threshold cards in the completed buffers.
- // Updates stats.
- //
- // Mutator refinement, if performed, stops processing a buffer if
- // SuspendibleThreadSet::should_yield(), recording the incompletely
- // processed buffer for later processing of the remainder.
- void handle_completed_buffer(BufferNode* node, G1ConcurrentRefineStats* stats);
-
-public:
- G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
- ~G1DirtyCardQueueSet();
-
- // The number of parallel ids that can be claimed to allow collector or
- // mutator threads to do card-processing work.
- static uint num_par_ids();
-
- static void handle_zero_index_for_thread(Thread* t);
-
- virtual void enqueue_completed_buffer(BufferNode* node);
-
- // Upper bound on the number of cards currently in this queue set.
- // Read without synchronization. The value may be high because there
- // is a concurrent modification of the set of buffers.
- size_t num_cards() const;
-
- void merge_bufferlists(G1RedirtyCardsQueueSet* src);
-
- BufferNodeList take_all_completed_buffers();
-
- void flush_queue(G1DirtyCardQueue& queue);
-
- using CardValue = G1CardTable::CardValue;
- void enqueue(G1DirtyCardQueue& queue, volatile CardValue* card_ptr);
-
- // If there are more than stop_at cards in the completed buffers, pop
- // a buffer, refine its contents, and return true. Otherwise return
- // false. Updates stats.
- //
- // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
- // recording the incompletely processed buffer for later processing of
- // the remainder.
- bool refine_completed_buffer_concurrently(uint worker_id,
- size_t stop_at,
- G1ConcurrentRefineStats* stats);
-
- // If a full collection is happening, reset per-thread refinement stats and
- // partial logs, and release completed logs. The full collection will make
- // them all irrelevant.
- // precondition: at safepoint.
- void abandon_logs_and_stats();
-
- // Update global refinement statistics with the ones given and the ones from
- // detached threads.
- // precondition: at safepoint.
- void update_refinement_stats(G1ConcurrentRefineStats& stats);
- // Add the given thread's partial logs to the global list and return and reset
- // its refinement stats.
- // precondition: at safepoint.
- G1ConcurrentRefineStats concatenate_log_and_stats(Thread* thread);
-
- // Return the total of mutator refinement stats for all threads.
- // precondition: at safepoint.
- // precondition: only call after concatenate_logs_and_stats.
- G1ConcurrentRefineStats concatenated_refinement_stats() const;
-
- // Accumulate refinement stats from threads that are detaching.
- void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);
-
- // Number of cards above which mutator threads should do refinement.
- size_t mutator_refinement_threshold() const;
-
- // Set number of cards above which mutator threads should do refinement.
- void set_mutator_refinement_threshold(size_t value);
-};
-
-#endif // SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
diff --git a/src/hotspot/share/gc/g1/g1FromCardCache.cpp b/src/hotspot/share/gc/g1/g1FromCardCache.cpp
index 4a29bcbc6dc..8f5c84da0e3 100644
--- a/src/hotspot/share/gc/g1/g1FromCardCache.cpp
+++ b/src/hotspot/share/gc/g1/g1FromCardCache.cpp
@@ -22,8 +22,6 @@
*
*/
-#include "gc/g1/g1ConcurrentRefine.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1FromCardCache.hpp"
#include "gc/shared/gc_globals.hpp"
#include "memory/padded.inline.hpp"
@@ -80,7 +78,7 @@ void G1FromCardCache::print(outputStream* out) {
#endif
uint G1FromCardCache::num_par_rem_sets() {
- return G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads + MAX2(ConcGCThreads, ParallelGCThreads);
+ return G1ConcRefinementThreads + ConcGCThreads;
}
void G1FromCardCache::clear(uint region_idx) {
diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp
index cc71cf86172..5dbf70f36b3 100644
--- a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp
@@ -147,6 +147,10 @@ void G1FullGCCompactTask::free_non_overlapping_regions(uint src_start_idx, uint
for (uint i = non_overlapping_start; i <= src_end_idx; ++i) {
G1HeapRegion* hr = _g1h->region_at(i);
+ if (VerifyDuringGC) {
+ // Satisfy some asserts in free_..._region
+ hr->clear_both_card_tables();
+ }
_g1h->free_humongous_region(hr, nullptr);
}
}
diff --git a/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp b/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp
index 25002186280..52b0d04a500 100644
--- a/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp
@@ -41,7 +41,7 @@ void G1FullGCMarkTask::work(uint worker_id) {
Ticks start = Ticks::now();
ResourceMark rm;
G1FullGCMarker* marker = collector()->marker(worker_id);
- MarkingNMethodClosure code_closure(marker->mark_closure(), !NMethodToOopClosure::FixRelocations, true /* keepalive nmethods */);
+ MarkingNMethodClosure code_closure(marker->mark_closure());
if (ClassUnloading) {
_root_processor.process_strong_roots(marker->mark_closure(),
diff --git a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.inline.hpp
index f9868bba678..64d85660ca7 100644
--- a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.inline.hpp
@@ -35,6 +35,10 @@
#include "gc/shared/fullGCForwarding.inline.hpp"
void G1DetermineCompactionQueueClosure::free_empty_humongous_region(G1HeapRegion* hr) {
+ if (VerifyDuringGC) {
+ // Satisfy some asserts in free_..._region.
+ hr->clear_both_card_tables();
+ }
_g1h->free_humongous_region(hr, nullptr);
_collector->set_free(hr->hrm_index());
add_to_compaction_queue(hr);
diff --git a/src/hotspot/share/gc/g1/g1FullGCResetMetadataTask.cpp b/src/hotspot/share/gc/g1/g1FullGCResetMetadataTask.cpp
index ae9a78a9cdf..02397392a6e 100644
--- a/src/hotspot/share/gc/g1/g1FullGCResetMetadataTask.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCResetMetadataTask.cpp
@@ -32,7 +32,7 @@ G1FullGCResetMetadataTask::G1ResetMetadataClosure::G1ResetMetadataClosure(G1Full
void G1FullGCResetMetadataTask::G1ResetMetadataClosure::reset_region_metadata(G1HeapRegion* hr) {
hr->rem_set()->clear();
- hr->clear_cardtable();
+ hr->clear_both_card_tables();
}
bool G1FullGCResetMetadataTask::G1ResetMetadataClosure::do_heap_region(G1HeapRegion* hr) {
diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
index 15fb65c5700..b211b1e32fb 100644
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
@@ -50,8 +50,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
{
assert(max_gc_threads > 0, "Must have some GC threads");
- _gc_par_phases[RetireTLABsAndFlushLogs] = new WorkerDataArray("RetireTLABsAndFlushLogs", "JT Retire TLABs And Flush Logs (ms):", max_gc_threads);
- _gc_par_phases[NonJavaThreadFlushLogs] = new WorkerDataArray("NonJavaThreadFlushLogs", "Non-JT Flush Logs (ms):", max_gc_threads);
+ _gc_par_phases[RetireTLABs] = new WorkerDataArray("RetireTLABs", "JavaThread Retire TLABs (ms):", max_gc_threads);
_gc_par_phases[GCWorkerStart] = new WorkerDataArray("GCWorkerStart", "GC Worker Start (ms):", max_gc_threads);
_gc_par_phases[ExtRootScan] = new WorkerDataArray("ExtRootScan", "Ext Root Scanning (ms):", max_gc_threads);
@@ -83,7 +82,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[OptMergeRS]->create_thread_work_items(GCMergeRSWorkItemsStrings[i], i);
}
- _gc_par_phases[MergeLB] = new WorkerDataArray("MergeLB", "Log Buffers (ms):", max_gc_threads);
+ _gc_par_phases[SweepRT] = new WorkerDataArray("SweepRT", "Sweep (ms):", max_gc_threads);
_gc_par_phases[ScanHR] = new WorkerDataArray("ScanHR", "Scan Heap Roots (ms):", max_gc_threads);
_gc_par_phases[OptScanHR] = new WorkerDataArray("OptScanHR", "Optional Scan Heap Roots (ms):", max_gc_threads);
_gc_par_phases[CodeRoots] = new WorkerDataArray("CodeRoots", "Code Root Scan (ms):", max_gc_threads);
@@ -98,7 +97,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[MergePSS] = new WorkerDataArray("MergePSS", "Merge Per-Thread State (ms):", max_gc_threads);
_gc_par_phases[RestoreEvacuationFailedRegions] = new WorkerDataArray("RestoreEvacuationFailedRegions", "Restore Evacuation Failed Regions (ms):", max_gc_threads);
_gc_par_phases[RemoveSelfForwards] = new WorkerDataArray("RemoveSelfForwards", "Remove Self Forwards (ms):", max_gc_threads);
- _gc_par_phases[ClearCardTable] = new WorkerDataArray("ClearLoggedCards", "Clear Logged Cards (ms):", max_gc_threads);
+ _gc_par_phases[ClearCardTable] = new WorkerDataArray("ClearPendingCards", "Clear Pending Cards (ms):", max_gc_threads);
_gc_par_phases[RecalculateUsed] = new WorkerDataArray("RecalculateUsed", "Recalculate Used Memory (ms):", max_gc_threads);
#if COMPILER2_OR_JVMCI
_gc_par_phases[UpdateDerivedPointers] = new WorkerDataArray("UpdateDerivedPointers", "Update Derived Pointers (ms):", max_gc_threads);
@@ -107,11 +106,15 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[ResetPartialArrayStateManager] = new WorkerDataArray("ResetPartialArrayStateManager", "Reset Partial Array State Manager (ms):", max_gc_threads);
_gc_par_phases[ProcessEvacuationFailedRegions] = new WorkerDataArray("ProcessEvacuationFailedRegions", "Process Evacuation Failed Regions (ms):", max_gc_threads);
+ _gc_par_phases[ScanHR]->create_thread_work_items("Pending Cards:", ScanHRPendingCards);
+ _gc_par_phases[ScanHR]->create_thread_work_items("Scanned Empty:", ScanHRScannedEmptyCards);
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Cards:", ScanHRScannedCards);
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Blocks:", ScanHRScannedBlocks);
_gc_par_phases[ScanHR]->create_thread_work_items("Claimed Chunks:", ScanHRClaimedChunks);
_gc_par_phases[ScanHR]->create_thread_work_items("Found Roots:", ScanHRFoundRoots);
+ _gc_par_phases[OptScanHR]->create_thread_work_items("Pending Cards:", ScanHRPendingCards);
+ _gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Empty:", ScanHRScannedEmptyCards);
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Cards:", ScanHRScannedCards);
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Blocks:", ScanHRScannedBlocks);
_gc_par_phases[OptScanHR]->create_thread_work_items("Claimed Chunks:", ScanHRClaimedChunks);
@@ -119,9 +122,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Refs:", ScanHRScannedOptRefs);
_gc_par_phases[OptScanHR]->create_thread_work_items("Used Memory:", ScanHRUsedMemory);
- _gc_par_phases[MergeLB]->create_thread_work_items("Dirty Cards:", MergeLBDirtyCards);
- _gc_par_phases[MergeLB]->create_thread_work_items("Skipped Cards:", MergeLBSkippedCards);
-
_gc_par_phases[CodeRoots]->create_thread_work_items("Scanned Nmethods:", CodeRootsScannedNMethods);
_gc_par_phases[OptCodeRoots]->create_thread_work_items("Scanned Nmethods:", CodeRootsScannedNMethods);
@@ -129,7 +129,10 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[MergePSS]->create_thread_work_items("Copied Bytes:", MergePSSCopiedBytes);
_gc_par_phases[MergePSS]->create_thread_work_items("LAB Waste:", MergePSSLABWasteBytes);
_gc_par_phases[MergePSS]->create_thread_work_items("LAB Undo Waste:", MergePSSLABUndoWasteBytes);
- _gc_par_phases[MergePSS]->create_thread_work_items("Evac Fail Extra Cards:", MergePSSEvacFailExtra);
+ _gc_par_phases[MergePSS]->create_thread_work_items("Pending Cards:", MergePSSPendingCards);
+ _gc_par_phases[MergePSS]->create_thread_work_items("To-Young-Gen Cards:", MergePSSToYoungGenCards);
+ _gc_par_phases[MergePSS]->create_thread_work_items("Evac-Fail Cards:", MergePSSEvacFail);
+ _gc_par_phases[MergePSS]->create_thread_work_items("Marked Cards:", MergePSSMarked);
_gc_par_phases[RestoreEvacuationFailedRegions]->create_thread_work_items("Evacuation Failed Regions:", RestoreEvacFailureRegionsEvacFailedNum);
_gc_par_phases[RestoreEvacuationFailedRegions]->create_thread_work_items("Pinned Regions:", RestoreEvacFailureRegionsPinnedNum);
@@ -150,9 +153,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[OptTermination]->create_thread_work_items("Optional Termination Attempts:");
- _gc_par_phases[RedirtyCards] = new WorkerDataArray("RedirtyCards", "Redirty Logged Cards (ms):", max_gc_threads);
- _gc_par_phases[RedirtyCards]->create_thread_work_items("Redirtied Cards:");
-
_gc_par_phases[ResizeThreadLABs] = new WorkerDataArray("ResizeTLABs", "Resize TLABs (ms):", max_gc_threads);
_gc_par_phases[FreeCollectionSet] = new WorkerDataArray("FreeCSet", "Free Collection Set (ms):", max_gc_threads);
@@ -171,9 +171,9 @@ void G1GCPhaseTimes::reset() {
_cur_optional_evac_time_ms = 0.0;
_cur_collection_nmethod_list_cleanup_time_ms = 0.0;
_cur_merge_heap_roots_time_ms = 0.0;
+ _cur_merge_refinement_table_time_ms = 0.0;
_cur_optional_merge_heap_roots_time_ms = 0.0;
_cur_prepare_merge_heap_roots_time_ms = 0.0;
- _cur_distribute_log_buffers_time_ms = 0.0;
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
_cur_pre_evacuate_prepare_time_ms = 0.0;
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
@@ -249,7 +249,7 @@ void G1GCPhaseTimes::record_gc_pause_end() {
ASSERT_PHASE_UNINITIALIZED(MergeER);
ASSERT_PHASE_UNINITIALIZED(MergeRS);
ASSERT_PHASE_UNINITIALIZED(OptMergeRS);
- ASSERT_PHASE_UNINITIALIZED(MergeLB);
+ ASSERT_PHASE_UNINITIALIZED(SweepRT);
ASSERT_PHASE_UNINITIALIZED(ScanHR);
ASSERT_PHASE_UNINITIALIZED(CodeRoots);
ASSERT_PHASE_UNINITIALIZED(OptCodeRoots);
@@ -425,8 +425,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
}
debug_time("Pre Evacuate Prepare", _cur_pre_evacuate_prepare_time_ms);
- debug_phase(_gc_par_phases[RetireTLABsAndFlushLogs], 1);
- debug_phase(_gc_par_phases[NonJavaThreadFlushLogs], 1);
+ debug_phase(_gc_par_phases[RetireTLABs], 1);
debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
debug_time("Region Register", _cur_region_register_time);
@@ -458,8 +457,8 @@ double G1GCPhaseTimes::print_evacuate_initial_collection_set() const {
debug_time("Prepare Merge Heap Roots", _cur_prepare_merge_heap_roots_time_ms);
debug_phase_merge_remset();
- debug_time("Distribute Log Buffers", _cur_distribute_log_buffers_time_ms);
- debug_phase(_gc_par_phases[MergeLB]);
+ debug_time("Merge Refinement Table", _cur_merge_refinement_table_time_ms);
+ debug_phase(_gc_par_phases[SweepRT], 1);
info_time("Evacuate Collection Set", _cur_collection_initial_evac_time_ms);
@@ -521,7 +520,6 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
if (G1CollectedHeap::heap()->should_sample_collection_set_candidates()) {
debug_phase(_gc_par_phases[SampleCollectionSetCandidates], 1);
}
- debug_phase(_gc_par_phases[RedirtyCards], 1);
if (UseTLAB && ResizeTLAB) {
debug_phase(_gc_par_phases[ResizeThreadLABs], 1);
}
diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
index 045160a6162..8223148b791 100644
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
@@ -46,8 +46,7 @@ class G1GCPhaseTimes : public CHeapObj {
public:
enum GCParPhases {
- RetireTLABsAndFlushLogs,
- NonJavaThreadFlushLogs,
+ RetireTLABs,
GCWorkerStart,
ExtRootScan,
ThreadRoots,
@@ -59,7 +58,7 @@ class G1GCPhaseTimes : public CHeapObj {
MergeER = StrongOopStorageSetRoots + EnumRange().size(),
MergeRS,
OptMergeRS,
- MergeLB,
+ SweepRT,
ScanHR,
OptScanHR,
CodeRoots,
@@ -71,7 +70,6 @@ class G1GCPhaseTimes : public CHeapObj {
Other,
GCWorkerTotal,
GCWorkerEnd,
- RedirtyCards,
FreeCollectionSet,
YoungFreeCSet,
NonYoungFreeCSet,
@@ -111,16 +109,19 @@ class G1GCPhaseTimes : public CHeapObj {
MergeRSHowlArrayOfCards,
MergeRSHowlBitmap,
MergeRSHowlFull,
- MergeRSCards,
+ MergeRSFromRemSetCards,
+ MergeRSTotalCards,
MergeRSContainersSentinel
};
static constexpr const char* GCMergeRSWorkItemsStrings[MergeRSContainersSentinel] =
{ "Merged Inline:", "Merged ArrayOfCards:", "Merged Howl:", "Merged Full:",
"Merged Howl Inline:", "Merged Howl ArrayOfCards:", "Merged Howl BitMap:", "Merged Howl Full:",
- "Merged Cards:" };
+ "Merged From RS Cards:", "Total Cards:" };
enum GCScanHRWorkItems {
+ ScanHRPendingCards,
+ ScanHRScannedEmptyCards,
ScanHRScannedCards,
ScanHRScannedBlocks,
ScanHRClaimedChunks,
@@ -129,11 +130,6 @@ class G1GCPhaseTimes : public CHeapObj {
ScanHRUsedMemory
};
- enum GCMergeLBWorkItems {
- MergeLBDirtyCards,
- MergeLBSkippedCards
- };
-
enum GCCodeRootsWorkItems {
CodeRootsScannedNMethods
};
@@ -143,7 +139,10 @@ class G1GCPhaseTimes : public CHeapObj {
MergePSSLABSize,
MergePSSLABWasteBytes,
MergePSSLABUndoWasteBytes,
- MergePSSEvacFailExtra
+ MergePSSPendingCards, // To be scanned cards generated by GC (from cross-references and evacuation failure).
+ MergePSSToYoungGenCards, // To-young-gen cards generated by GC.
+ MergePSSEvacFail, // Evacuation failure generated dirty cards by GC.
+ MergePSSMarked, // Total newly marked cards.
};
enum RestoreEvacFailureRegionsWorkItems {
@@ -176,9 +175,9 @@ class G1GCPhaseTimes : public CHeapObj {
double _cur_collection_nmethod_list_cleanup_time_ms;
double _cur_merge_heap_roots_time_ms;
+ // Merge refinement table time. Note that this time is included in _cur_merge_heap_roots_time_ms.
+ double _cur_merge_refinement_table_time_ms;
double _cur_optional_merge_heap_roots_time_ms;
- // Included in above merge and optional-merge time.
- double _cur_distribute_log_buffers_time_ms;
double _cur_prepare_merge_heap_roots_time_ms;
double _cur_optional_prepare_merge_heap_roots_time_ms;
@@ -302,6 +301,10 @@ class G1GCPhaseTimes : public CHeapObj {
_cur_merge_heap_roots_time_ms += ms;
}
+ void record_merge_refinement_table_time(double ms) {
+ _cur_merge_refinement_table_time_ms = ms;
+ }
+
void record_or_add_optional_merge_heap_roots_time(double ms) {
_cur_optional_merge_heap_roots_time_ms += ms;
}
@@ -310,10 +313,6 @@ class G1GCPhaseTimes : public CHeapObj {
_cur_prepare_merge_heap_roots_time_ms += ms;
}
- void record_distribute_log_buffers_time_ms(double ms) {
- _cur_distribute_log_buffers_time_ms += ms;
- }
-
void record_or_add_optional_prepare_merge_heap_roots_time(double ms) {
_cur_optional_prepare_merge_heap_roots_time_ms += ms;
}
@@ -382,10 +381,6 @@ class G1GCPhaseTimes : public CHeapObj {
_recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
}
- double cur_distribute_log_buffers_time_ms() {
- return _cur_distribute_log_buffers_time_ms;
- }
-
double cur_collection_par_time_ms() {
return _cur_collection_initial_evac_time_ms +
_cur_optional_evac_time_ms +
@@ -396,6 +391,10 @@ class G1GCPhaseTimes : public CHeapObj {
_cur_collection_nmethod_list_cleanup_time_ms;
}
+ double cur_merge_refinement_table_time() const {
+ return _cur_merge_refinement_table_time_ms;
+ }
+
double cur_resize_heap_time_ms() {
return _cur_resize_heap_time_ms;
}
diff --git a/src/hotspot/share/gc/g1/g1HeapRegion.cpp b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
index 09bdfefccb7..ca4359dcc24 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegion.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
@@ -39,6 +39,7 @@
#include "logging/log.hpp"
#include "logging/logStream.hpp"
#include "memory/iterator.inline.hpp"
+#include "memory/memRegion.hpp"
#include "memory/resourceArea.hpp"
#include "oops/access.inline.hpp"
#include "oops/compressedOops.inline.hpp"
@@ -137,11 +138,21 @@ void G1HeapRegion::hr_clear(bool clear_space) {
if (clear_space) clear(SpaceDecorator::Mangle);
}
-void G1HeapRegion::clear_cardtable() {
+void G1HeapRegion::clear_card_table() {
G1CardTable* ct = G1CollectedHeap::heap()->card_table();
ct->clear_MemRegion(MemRegion(bottom(), end()));
}
+void G1HeapRegion::clear_refinement_table() {
+ G1CardTable* ct = G1CollectedHeap::heap()->refinement_table();
+ ct->clear_MemRegion(MemRegion(bottom(), end()));
+}
+
+void G1HeapRegion::clear_both_card_tables() {
+ clear_card_table();
+ clear_refinement_table();
+}
+
void G1HeapRegion::set_free() {
if (!is_free()) {
report_region_type_change(G1HeapRegionTraceType::Free);
@@ -591,8 +602,12 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
G1HeapRegion* _from;
G1HeapRegion* _to;
- CardValue _cv_obj;
- CardValue _cv_field;
+
+ CardValue _cv_obj_ct; // In card table.
+ CardValue _cv_field_ct;
+
+ CardValue _cv_obj_rt; // In refinement table.
+ CardValue _cv_field_rt;
RemSetChecker(G1VerifyFailureCounter* failures, oop containing_obj, T* p, oop obj)
: Checker(failures, containing_obj, p, obj) {
@@ -600,19 +615,23 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
_to = this->_g1h->heap_region_containing(obj);
CardTable* ct = this->_g1h->card_table();
- _cv_obj = *ct->byte_for_const(this->_containing_obj);
- _cv_field = *ct->byte_for_const(p);
+ _cv_obj_ct = *ct->byte_for_const(this->_containing_obj);
+ _cv_field_ct = *ct->byte_for_const(p);
+
+ ct = this->_g1h->refinement_table();
+ _cv_obj_rt = *ct->byte_for_const(this->_containing_obj);
+ _cv_field_rt = *ct->byte_for_const(p);
}
bool failed() const {
if (_from != _to && !_from->is_young() &&
_to->rem_set()->is_complete() &&
_from->rem_set()->cset_group() != _to->rem_set()->cset_group()) {
- const CardValue dirty = G1CardTable::dirty_card_val();
+ const CardValue clean = G1CardTable::clean_card_val();
return !(_to->rem_set()->contains_reference(this->_p) ||
(this->_containing_obj->is_objArray() ?
- _cv_field == dirty :
- _cv_obj == dirty || _cv_field == dirty));
+ (_cv_field_ct != clean || _cv_field_rt != clean) :
+ (_cv_obj_ct != clean || _cv_field_ct != clean || _cv_obj_rt != clean || _cv_field_rt != clean)));
}
return false;
}
@@ -630,7 +649,8 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
log.error("Missing rem set entry:");
this->print_containing_obj(&ls, _from);
this->print_referenced_obj(&ls, _to, "");
- log.error("Obj head CV = %d, field CV = %d.", _cv_obj, _cv_field);
+ log.error("CT obj head CV = %d, field CV = %d.", _cv_obj_ct, _cv_field_ct);
+ log.error("RT Obj head CV = %d, field CV = %d.", _cv_obj_rt, _cv_field_rt);
log.error("----------");
}
};
diff --git a/src/hotspot/share/gc/g1/g1HeapRegion.hpp b/src/hotspot/share/gc/g1/g1HeapRegion.hpp
index 71584ffb24d..17ec3055b52 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegion.hpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegion.hpp
@@ -42,7 +42,6 @@ class G1CollectedHeap;
class G1CMBitMap;
class G1CSetCandidateGroup;
class G1Predictions;
-class G1HeapRegion;
class G1HeapRegionRemSet;
class G1HeapRegionSetBase;
class nmethod;
@@ -478,7 +477,10 @@ public:
// Callers must ensure this is not called by multiple threads at the same time.
void hr_clear(bool clear_space);
// Clear the card table corresponding to this region.
- void clear_cardtable();
+ void clear_card_table();
+ void clear_refinement_table();
+
+ void clear_both_card_tables();
// Notify the region that an evacuation failure occurred for an object within this
// region.
diff --git a/src/hotspot/share/gc/g1/g1HeapRegionManager.cpp b/src/hotspot/share/gc/g1/g1HeapRegionManager.cpp
index d4286a1caeb..795b6543bae 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegionManager.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegionManager.cpp
@@ -63,7 +63,8 @@ public:
G1HeapRegionManager::G1HeapRegionManager() :
_bot_mapper(nullptr),
- _cardtable_mapper(nullptr),
+ _card_table_mapper(nullptr),
+ _refinement_table_mapper(nullptr),
_committed_map(),
_next_highest_used_hrm_index(0),
_regions(), _heap_mapper(nullptr),
@@ -74,7 +75,8 @@ G1HeapRegionManager::G1HeapRegionManager() :
void G1HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage,
G1RegionToSpaceMapper* bitmap,
G1RegionToSpaceMapper* bot,
- G1RegionToSpaceMapper* cardtable) {
+ G1RegionToSpaceMapper* card_table,
+ G1RegionToSpaceMapper* refinement_table) {
_next_highest_used_hrm_index = 0;
_heap_mapper = heap_storage;
@@ -82,7 +84,8 @@ void G1HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage,
_bitmap_mapper = bitmap;
_bot_mapper = bot;
- _cardtable_mapper = cardtable;
+ _card_table_mapper = card_table;
+ _refinement_table_mapper = refinement_table;
_regions.initialize(heap_storage->reserved(), G1HeapRegion::GrainBytes);
@@ -186,7 +189,8 @@ void G1HeapRegionManager::commit_regions(uint index, size_t num_regions, WorkerT
_bitmap_mapper->commit_regions(index, num_regions, pretouch_workers);
_bot_mapper->commit_regions(index, num_regions, pretouch_workers);
- _cardtable_mapper->commit_regions(index, num_regions, pretouch_workers);
+ _card_table_mapper->commit_regions(index, num_regions, pretouch_workers);
+ _refinement_table_mapper->commit_regions(index, num_regions, pretouch_workers);
}
void G1HeapRegionManager::uncommit_regions(uint start, uint num_regions) {
@@ -209,7 +213,8 @@ void G1HeapRegionManager::uncommit_regions(uint start, uint num_regions) {
_bitmap_mapper->uncommit_regions(start, num_regions);
_bot_mapper->uncommit_regions(start, num_regions);
- _cardtable_mapper->uncommit_regions(start, num_regions);
+ _card_table_mapper->uncommit_regions(start, num_regions);
+ _refinement_table_mapper->uncommit_regions(start, num_regions);
_committed_map.uncommit(start, end);
}
@@ -261,19 +266,23 @@ void G1HeapRegionManager::clear_auxiliary_data_structures(uint start, uint num_r
// Signal G1BlockOffsetTable to clear the given regions.
_bot_mapper->signal_mapping_changed(start, num_regions);
// Signal G1CardTable to clear the given regions.
- _cardtable_mapper->signal_mapping_changed(start, num_regions);
+ _card_table_mapper->signal_mapping_changed(start, num_regions);
+ // Signal refinement table to clear the given regions.
+ _refinement_table_mapper->signal_mapping_changed(start, num_regions);
}
MemoryUsage G1HeapRegionManager::get_auxiliary_data_memory_usage() const {
size_t used_sz =
_bitmap_mapper->committed_size() +
_bot_mapper->committed_size() +
- _cardtable_mapper->committed_size();
+ _card_table_mapper->committed_size() +
+ _refinement_table_mapper->committed_size();
size_t committed_sz =
_bitmap_mapper->reserved_size() +
_bot_mapper->reserved_size() +
- _cardtable_mapper->reserved_size();
+ _card_table_mapper->reserved_size() +
+ _refinement_table_mapper->reserved_size();
return MemoryUsage(0, used_sz, committed_sz, committed_sz);
}
diff --git a/src/hotspot/share/gc/g1/g1HeapRegionManager.hpp b/src/hotspot/share/gc/g1/g1HeapRegionManager.hpp
index 19ae9887e94..b4ce3b0a8be 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegionManager.hpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegionManager.hpp
@@ -74,7 +74,8 @@ class G1HeapRegionManager: public CHeapObj {
friend class G1HeapRegionClaimer;
G1RegionToSpaceMapper* _bot_mapper;
- G1RegionToSpaceMapper* _cardtable_mapper;
+ G1RegionToSpaceMapper* _card_table_mapper;
+ G1RegionToSpaceMapper* _refinement_table_mapper;
// Keeps track of the currently committed regions in the heap. The committed regions
// can either be active (ready for use) or inactive (ready for uncommit).
@@ -161,7 +162,8 @@ public:
void initialize(G1RegionToSpaceMapper* heap_storage,
G1RegionToSpaceMapper* bitmap,
G1RegionToSpaceMapper* bot,
- G1RegionToSpaceMapper* cardtable);
+ G1RegionToSpaceMapper* card_table,
+ G1RegionToSpaceMapper* refinement_table);
// Return the "dummy" region used for G1AllocRegion. This is currently a hardwired
// new G1HeapRegion that owns G1HeapRegion at index 0. Since at the moment we commit
diff --git a/src/hotspot/share/gc/g1/g1HeapVerifier.cpp b/src/hotspot/share/gc/g1/g1HeapVerifier.cpp
index c5af7e34dd9..21b3545f7e0 100644
--- a/src/hotspot/share/gc/g1/g1HeapVerifier.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapVerifier.cpp
@@ -42,6 +42,7 @@
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/handles.inline.hpp"
+#include "runtime/threads.hpp"
int G1HeapVerifier::_enabled_verification_types = G1HeapVerifier::G1VerifyAll;
@@ -528,6 +529,7 @@ void G1HeapVerifier::verify_before_gc() {
void G1HeapVerifier::verify_after_gc() {
verify(VerifyOption::G1UseConcMarking, "After GC");
+ verify_card_tables_in_sync();
}
void G1HeapVerifier::verify_bitmap_clear(bool from_tams) {
@@ -556,17 +558,17 @@ void G1HeapVerifier::verify_bitmap_clear(bool from_tams) {
G1CollectedHeap::heap()->heap_region_iterate(&cl);
}
-#ifndef PRODUCT
class G1VerifyCardTableCleanup: public G1HeapRegionClosure {
G1HeapVerifier* _verifier;
public:
G1VerifyCardTableCleanup(G1HeapVerifier* verifier)
: _verifier(verifier) { }
virtual bool do_heap_region(G1HeapRegion* r) {
+ _verifier->verify_ct_clean_region(r);
if (r->is_survivor()) {
- _verifier->verify_dirty_region(r);
+ _verifier->verify_rt_clean_region(r);
} else {
- _verifier->verify_not_dirty_region(r);
+ _verifier->verify_rt_clean_from_top(r);
}
return false;
}
@@ -579,14 +581,35 @@ void G1HeapVerifier::verify_card_table_cleanup() {
}
}
-void G1HeapVerifier::verify_not_dirty_region(G1HeapRegion* hr) {
- // All of the region should be clean.
- G1CardTable* ct = _g1h->card_table();
- MemRegion mr(hr->bottom(), hr->end());
- ct->verify_not_dirty_region(mr);
+class G1VerifyCardTablesClean: public G1HeapRegionClosure {
+ G1HeapVerifier* _verifier;
+ bool _both_card_tables;
+
+public:
+ G1VerifyCardTablesClean(G1HeapVerifier* verifier, bool both_card_tables = true)
+ : _verifier(verifier), _both_card_tables(both_card_tables) { }
+
+ virtual bool do_heap_region(G1HeapRegion* r) {
+ _verifier->verify_rt_clean_region(r); // Must be all Clean from bottom -> end.
+ if (_both_card_tables) {
+ _verifier->verify_ct_clean_region(r);
+ }
+ return false;
+ }
+};
+
+void G1HeapVerifier::verify_card_tables_clean(bool both_card_tables) {
+ G1VerifyCardTablesClean cl(this, both_card_tables);
+ _g1h->heap_region_iterate(&cl);
}
-void G1HeapVerifier::verify_dirty_region(G1HeapRegion* hr) {
+void G1HeapVerifier::verify_rt_clean_from_top(G1HeapRegion* hr) {
+ G1CardTable* ct = _g1h->refinement_table();
+ MemRegion mr(align_up(hr->top(), G1CardTable::card_size()), hr->end());
+ ct->verify_region(mr, G1CardTable::clean_card_val(), true);
+}
+
+void G1HeapVerifier::verify_rt_dirty_to_dummy_top(G1HeapRegion* hr) {
// We cannot guarantee that [bottom(),end()] is dirty. Threads
// dirty allocated blocks as they allocate them. The thread that
// retires each region and replaces it with a new one will do a
@@ -594,29 +617,56 @@ void G1HeapVerifier::verify_dirty_region(G1HeapRegion* hr) {
// not dirty that area (one less thing to have to do while holding
// a lock). So we can only verify that [bottom(),pre_dummy_top()]
// is dirty.
- G1CardTable* ct = _g1h->card_table();
+ G1CardTable* ct = _g1h->refinement_table();
MemRegion mr(hr->bottom(), hr->pre_dummy_top());
- if (hr->is_young()) {
- ct->verify_g1_young_region(mr);
- } else {
- ct->verify_dirty_region(mr);
- }
+ ct->verify_dirty_region(mr);
}
-class G1VerifyDirtyYoungListClosure : public G1HeapRegionClosure {
-private:
- G1HeapVerifier* _verifier;
-public:
- G1VerifyDirtyYoungListClosure(G1HeapVerifier* verifier) : G1HeapRegionClosure(), _verifier(verifier) { }
- virtual bool do_heap_region(G1HeapRegion* r) {
- _verifier->verify_dirty_region(r);
- return false;
- }
-};
+void G1HeapVerifier::verify_ct_clean_region(G1HeapRegion* hr) {
+ G1CardTable* ct = _g1h->card_table();
+ MemRegion mr(hr->bottom(), hr->end());
+ ct->verify_region(mr, G1CardTable::clean_card_val(), true);
+}
-void G1HeapVerifier::verify_dirty_young_regions() {
- G1VerifyDirtyYoungListClosure cl(this);
- _g1h->collection_set()->iterate(&cl);
+void G1HeapVerifier::verify_rt_clean_region(G1HeapRegion* hr) {
+ G1CardTable* ct = _g1h->refinement_table();
+ MemRegion mr(hr->bottom(), hr->end());
+ ct->verify_region(mr, G1CardTable::clean_card_val(), true);
+}
+
+#ifndef PRODUCT
+
+void G1HeapVerifier::verify_card_tables_in_sync() {
+
+ // Non-Java thread card tables must be null.
+ class AssertCardTableBaseNull : public ThreadClosure {
+ public:
+
+ void do_thread(Thread* thread) {
+ ResourceMark rm;
+ assert(G1ThreadLocalData::get_byte_map_base(thread) == nullptr, "thread " PTR_FORMAT " (%s) has non-null card table base",
+ p2i(thread), thread->name());
+ }
+ } check_null_cl;
+
+ Threads::non_java_threads_do(&check_null_cl);
+
+ // Java thread card tables must be the same as the global card table.
+ class AssertSameCardTableClosure : public ThreadClosure {
+ public:
+
+ void do_thread(Thread* thread) {
+ G1CardTable::CardValue* global_ct_base = G1CollectedHeap::heap()->card_table_base();
+ G1CardTable::CardValue* cur_ct_base = G1ThreadLocalData::get_byte_map_base(thread);
+
+ ResourceMark rm;
+ assert(cur_ct_base == global_ct_base,
+ "thread " PTR_FORMAT " (%s) has wrong card table base, should be " PTR_FORMAT " is " PTR_FORMAT,
+ p2i(thread), thread->name(), p2i(global_ct_base), p2i(cur_ct_base));
+ }
+ } check_same_cl;
+
+ Threads::java_threads_do(&check_same_cl);
}
class G1CheckRegionAttrTableClosure : public G1HeapRegionClosure {
diff --git a/src/hotspot/share/gc/g1/g1HeapVerifier.hpp b/src/hotspot/share/gc/g1/g1HeapVerifier.hpp
index d4ab4c60214..6a26c77ec0d 100644
--- a/src/hotspot/share/gc/g1/g1HeapVerifier.hpp
+++ b/src/hotspot/share/gc/g1/g1HeapVerifier.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -78,11 +78,16 @@ public:
// Do sanity check on the contents of the in-cset fast test table.
bool check_region_attr_table() PRODUCT_RETURN_( return true; );
- void verify_card_table_cleanup() PRODUCT_RETURN;
+ void verify_card_table_cleanup();
+ void verify_card_tables_clean(bool both_card_tables);
- void verify_not_dirty_region(G1HeapRegion* hr) PRODUCT_RETURN;
- void verify_dirty_region(G1HeapRegion* hr) PRODUCT_RETURN;
- void verify_dirty_young_regions() PRODUCT_RETURN;
+ void verify_ct_clean_region(G1HeapRegion* hr);
+ void verify_rt_dirty_to_dummy_top(G1HeapRegion* hr);
+ void verify_rt_clean_from_top(G1HeapRegion* hr);
+ void verify_rt_clean_region(G1HeapRegion* hr);
+
+ // Verify that the global card table and the thread's card tables are in sync.
+ void verify_card_tables_in_sync() PRODUCT_RETURN;
};
#endif // SHARE_GC_G1_G1HEAPVERIFIER_HPP
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.cpp b/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
index 1ee299e2ee4..a9c6462680f 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
@@ -34,7 +34,7 @@ G1MonotonicArena::Segment::Segment(uint slot_size, uint num_slots, Segment* next
_next(next),
_next_allocate(0),
_mem_tag(mem_tag) {
- _bottom = ((char*) this) + header_size();
+ guarantee(is_aligned(this, SegmentPayloadMaxAlignment), "Make sure Segments are always created at correctly aligned memory");
}
G1MonotonicArena::Segment* G1MonotonicArena::Segment::create_segment(uint slot_size,
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.hpp b/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
index 0434a222b21..211820c5254 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
@@ -110,9 +110,10 @@ protected:
void deallocate(void* slot) override { ShouldNotReachHere(); }
};
+static constexpr uint SegmentPayloadMaxAlignment = 8;
// A single segment/arena containing _num_slots blocks of memory of _slot_size.
// Segments can be linked together using a singly linked list.
-class G1MonotonicArena::Segment {
+class alignas(SegmentPayloadMaxAlignment) G1MonotonicArena::Segment {
const uint _slot_size;
const uint _num_slots;
Segment* volatile _next;
@@ -122,16 +123,15 @@ class G1MonotonicArena::Segment {
uint volatile _next_allocate;
const MemTag _mem_tag;
- char* _bottom; // Actual data.
- // Do not add class member variables beyond this point
-
- static size_t header_size() { return align_up(sizeof(Segment), DEFAULT_PADDING_SIZE); }
+ static size_t header_size() { return align_up(sizeof(Segment), SegmentPayloadMaxAlignment); }
static size_t payload_size(uint slot_size, uint num_slots) {
- // The cast (size_t) is required to guard against overflow wrap around.
- return (size_t)slot_size * num_slots;
+ // The cast is required to guard against overflow wrap around.
+ return static_cast(slot_size) * num_slots;
}
+ void* payload(size_t octet) { return &reinterpret_cast(this)[header_size() + octet]; }
+
size_t payload_size() const { return payload_size(_slot_size, _num_slots); }
NONCOPYABLE(Segment);
@@ -156,7 +156,7 @@ public:
_next_allocate = 0;
assert(next != this, " loop condition");
set_next(next);
- memset((void*)_bottom, 0, payload_size());
+ memset(payload(0), 0, payload_size());
}
uint slot_size() const { return _slot_size; }
@@ -176,14 +176,10 @@ public:
static Segment* create_segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag);
static void delete_segment(Segment* segment);
- // Copies the contents of this segment into the destination.
- void copy_to(void* dest) const {
- ::memcpy(dest, _bottom, length() * _slot_size);
- }
-
bool is_full() const { return _next_allocate >= _num_slots; }
};
+static_assert(alignof(G1MonotonicArena::Segment) >= SegmentPayloadMaxAlignment, "assert alignment of Segment (and indirectly its payload)");
// Set of (free) Segments. The assumed usage is that allocation
// to it and removal of segments is strictly separate, but every action may be
@@ -240,6 +236,7 @@ public:
assert(_initial_num_slots > 0, "Must be");
assert(_max_num_slots > 0, "Must be");
assert(_slot_alignment > 0, "Must be");
+ assert(SegmentPayloadMaxAlignment % _slot_alignment == 0, "ensure that _slot_alignment is a divisor of SegmentPayloadMaxAlignment");
}
virtual uint next_num_slots(uint prev_num_slots) const {
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.inline.hpp b/src/hotspot/share/gc/g1/g1MonotonicArena.inline.hpp
index d4c1aa8c4e3..dd9ccae1849 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.inline.hpp
@@ -39,7 +39,7 @@ inline void* G1MonotonicArena::Segment::allocate_slot() {
if (result >= _num_slots) {
return nullptr;
}
- void* r = _bottom + (size_t)result * _slot_size;
+ void* r = payload(static_cast(result) * _slot_size);
return r;
}
diff --git a/src/hotspot/share/gc/g1/g1OopClosures.hpp b/src/hotspot/share/gc/g1/g1OopClosures.hpp
index 3bff668bcec..a61c9d17f70 100644
--- a/src/hotspot/share/gc/g1/g1OopClosures.hpp
+++ b/src/hotspot/share/gc/g1/g1OopClosures.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -86,19 +86,19 @@ public:
// This closure is applied to the fields of the objects that have just been copied during evacuation.
class G1ScanEvacuatedObjClosure : public G1ScanClosureBase {
- friend class G1SkipCardEnqueueSetter;
+ friend class G1SkipCardMarkSetter;
- enum SkipCardEnqueueTristate {
+ enum SkipCardMarkTristate {
False = 0,
True,
Uninitialized
};
- SkipCardEnqueueTristate _skip_card_enqueue;
+ SkipCardMarkTristate _skip_card_mark;
public:
G1ScanEvacuatedObjClosure(G1CollectedHeap* g1h, G1ParScanThreadState* par_scan_state) :
- G1ScanClosureBase(g1h, par_scan_state), _skip_card_enqueue(Uninitialized) { }
+ G1ScanClosureBase(g1h, par_scan_state), _skip_card_mark(Uninitialized) { }
template void do_oop_work(T* p);
virtual void do_oop(oop* p) { do_oop_work(p); }
@@ -109,22 +109,22 @@ public:
}
#ifdef ASSERT
- bool skip_card_enqueue_set() const { return _skip_card_enqueue != Uninitialized; }
+ bool skip_card_mark_set() const { return _skip_card_mark != Uninitialized; }
#endif
};
-// RAII object to properly set the _skip_card_enqueue field in G1ScanEvacuatedObjClosure.
-class G1SkipCardEnqueueSetter : public StackObj {
+// RAII object to properly set the _skip_card_mark field in G1ScanEvacuatedObjClosure.
+class G1SkipCardMarkSetter : public StackObj {
G1ScanEvacuatedObjClosure* _closure;
public:
- G1SkipCardEnqueueSetter(G1ScanEvacuatedObjClosure* closure, bool skip_card_enqueue) : _closure(closure) {
- assert(_closure->_skip_card_enqueue == G1ScanEvacuatedObjClosure::Uninitialized, "Must not be set");
- _closure->_skip_card_enqueue = skip_card_enqueue ? G1ScanEvacuatedObjClosure::True : G1ScanEvacuatedObjClosure::False;
+ G1SkipCardMarkSetter(G1ScanEvacuatedObjClosure* closure, bool skip_card_mark) : _closure(closure) {
+ assert(_closure->_skip_card_mark == G1ScanEvacuatedObjClosure::Uninitialized, "Must not be set");
+ _closure->_skip_card_mark = skip_card_mark ? G1ScanEvacuatedObjClosure::True : G1ScanEvacuatedObjClosure::False;
}
- ~G1SkipCardEnqueueSetter() {
- DEBUG_ONLY(_closure->_skip_card_enqueue = G1ScanEvacuatedObjClosure::Uninitialized;)
+ ~G1SkipCardMarkSetter() {
+ DEBUG_ONLY(_closure->_skip_card_mark = G1ScanEvacuatedObjClosure::Uninitialized;)
}
};
@@ -206,13 +206,20 @@ public:
class G1ConcurrentRefineOopClosure: public BasicOopIterateClosure {
G1CollectedHeap* _g1h;
uint _worker_id;
+ bool _has_ref_to_cset;
+ bool _has_ref_to_old;
public:
G1ConcurrentRefineOopClosure(G1CollectedHeap* g1h, uint worker_id) :
_g1h(g1h),
- _worker_id(worker_id) {
+ _worker_id(worker_id),
+ _has_ref_to_cset(false),
+ _has_ref_to_old(false) {
}
+ bool has_ref_to_cset() const { return _has_ref_to_cset; }
+ bool has_ref_to_old() const { return _has_ref_to_old; }
+
virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS; }
template void do_oop_work(T* p);
@@ -223,6 +230,7 @@ public:
class G1RebuildRemSetClosure : public BasicOopIterateClosure {
G1CollectedHeap* _g1h;
uint _worker_id;
+
public:
G1RebuildRemSetClosure(G1CollectedHeap* g1h, uint worker_id) : _g1h(g1h), _worker_id(worker_id) {
}
diff --git a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
index c0c67fda949..87e3a1cc7c4 100644
--- a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
@@ -90,11 +90,11 @@ inline void G1ScanEvacuatedObjClosure::do_oop_work(T* p) {
prefetch_and_push(p, obj);
} else if (!G1HeapRegion::is_in_same_region(p, obj)) {
handle_non_cset_obj_common(region_attr, p, obj);
- assert(_skip_card_enqueue != Uninitialized, "Scan location has not been initialized.");
- if (_skip_card_enqueue == True) {
+ assert(_skip_card_mark != Uninitialized, "Scan location has not been initialized.");
+ if (_skip_card_mark == True) {
return;
}
- _par_scan_state->enqueue_card_if_tracked(region_attr, p, obj);
+ _par_scan_state->mark_card_if_tracked(region_attr, p, obj);
}
}
@@ -127,6 +127,11 @@ inline static void check_obj_during_refinement(T* p, oop const obj) {
template
inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
+ // Early out if we already found a to-young reference.
+ if (_has_ref_to_cset) {
+ return;
+ }
+
T o = RawAccess::oop_load(p);
if (CompressedOops::is_null(o)) {
return;
@@ -146,7 +151,12 @@ inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
return;
}
- G1HeapRegionRemSet* to_rem_set = _g1h->heap_region_containing(obj)->rem_set();
+ G1HeapRegion* to_region = _g1h->heap_region_containing(obj);
+ if (to_region->is_young()) {
+ _has_ref_to_cset = true;
+ return;
+ }
+ G1HeapRegionRemSet* to_rem_set = to_region->rem_set();
assert(to_rem_set != nullptr, "Need per-region 'into' remsets.");
if (to_rem_set->is_tracked()) {
@@ -154,6 +164,7 @@ inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
if (from->rem_set()->cset_group() != to_rem_set->cset_group()) {
to_rem_set->add_reference(p, _worker_id);
+ _has_ref_to_old = true;
}
}
}
@@ -180,7 +191,7 @@ inline void G1ScanCardClosure::do_oop_work(T* p) {
_heap_roots_found++;
} else if (!G1HeapRegion::is_in_same_region(p, obj)) {
handle_non_cset_obj_common(region_attr, p, obj);
- _par_scan_state->enqueue_card_if_tracked(region_attr, p, obj);
+ _par_scan_state->mark_card_if_tracked(region_attr, p, obj);
}
}
@@ -272,10 +283,14 @@ template void G1RebuildRemSetClosure::do_oop_work(T* p) {
G1HeapRegion* to = _g1h->heap_region_containing(obj);
G1HeapRegionRemSet* rem_set = to->rem_set();
if (rem_set->is_tracked()) {
- G1HeapRegion* from = _g1h->heap_region_containing(p);
+ if (to->is_young()) {
+ G1BarrierSet::g1_barrier_set()->write_ref_field_post(p);
+ } else {
+ G1HeapRegion* from = _g1h->heap_region_containing(p);
- if (from->rem_set()->cset_group() != rem_set->cset_group()) {
- rem_set->add_reference(p, _worker_id);
+ if (from->rem_set()->cset_group() != rem_set->cset_group()) {
+ rem_set->add_reference(p, _worker_id);
+ }
}
}
}
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
index 42c3a872e6b..80e5fd44fcd 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
@@ -57,22 +57,21 @@
#define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
- G1RedirtyCardsQueueSet* rdcqs,
uint worker_id,
uint num_workers,
G1CollectionSet* collection_set,
G1EvacFailureRegions* evac_failure_regions)
: _g1h(g1h),
_task_queue(g1h->task_queue(worker_id)),
- _rdc_local_qset(rdcqs),
- _ct(g1h->card_table()),
+ _ct(g1h->refinement_table()),
_closures(nullptr),
_plab_allocator(nullptr),
_age_table(false),
_tenuring_threshold(g1h->policy()->tenuring_threshold()),
_scanner(g1h, this),
_worker_id(worker_id),
- _last_enqueued_card(SIZE_MAX),
+ _num_cards_marked_dirty(0),
+ _num_cards_marked_to_cset(0),
_stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
_stack_trim_lower_threshold(GCDrainStackTargetSize),
_trim_ticks(),
@@ -88,7 +87,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
_evacuation_failed_info(),
_evac_failure_regions(evac_failure_regions),
- _evac_failure_enqueued_cards(0)
+ _num_cards_from_evac_failure(0)
{
// We allocate number of young gen regions in the collection set plus one
// entries, since entry 0 keeps track of surviving bytes for non-young regions.
@@ -112,8 +111,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
initialize_numa_stats();
}
-size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* rdc_buffers) {
- *rdc_buffers = _rdc_local_qset.flush();
+size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers) {
flush_numa_stats();
// Update allocation statistics.
_plab_allocator->flush_and_retire_stats(num_workers);
@@ -147,8 +145,16 @@ size_t G1ParScanThreadState::lab_undo_waste_words() const {
return _plab_allocator->undo_waste();
}
-size_t G1ParScanThreadState::evac_failure_enqueued_cards() const {
- return _evac_failure_enqueued_cards;
+size_t G1ParScanThreadState::num_cards_pending() const {
+ return _num_cards_marked_dirty + _num_cards_from_evac_failure;
+}
+
+size_t G1ParScanThreadState::num_cards_marked() const {
+ return num_cards_pending() + _num_cards_marked_to_cset;
+}
+
+size_t G1ParScanThreadState::num_cards_from_evac_failure() const {
+ return _num_cards_from_evac_failure;
}
#ifdef ASSERT
@@ -230,7 +236,7 @@ void G1ParScanThreadState::do_partial_array(PartialArrayState* state, bool stole
PartialArraySplitter::Claim claim =
_partial_array_splitter.claim(state, _task_queue, stolen);
G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
- G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
+ G1SkipCardMarkSetter x(&_scanner, dest_attr.is_new_survivor());
// Process claimed task.
to_array->oop_iterate_range(&_scanner,
checked_cast(claim._start),
@@ -250,7 +256,7 @@ void G1ParScanThreadState::start_partial_objarray(oop from_obj,
// The source array is unused when processing states.
_partial_array_splitter.start(_task_queue, nullptr, to_array, array_length);
- assert(_scanner.skip_card_enqueue_set(), "must be");
+ assert(_scanner.skip_card_mark_set(), "must be");
// Process the initial chunk. No need to process the type in the
// klass, as it will already be handled by processing the built-in
// module.
@@ -451,7 +457,7 @@ void G1ParScanThreadState::do_iterate_object(oop const obj,
_string_dedup_requests.add(old);
}
- assert(_scanner.skip_card_enqueue_set(), "must be");
+ assert(_scanner.skip_card_mark_set(), "must be");
obj->oop_iterate_backwards(&_scanner, klass);
}
@@ -546,7 +552,7 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
// Instead, we use dest_attr.is_young() because the two values are always
// equal: successfully allocated young regions must be survivor regions.
assert(dest_attr.is_young() == _g1h->heap_region_containing(obj)->is_survivor(), "must be");
- G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_young());
+ G1SkipCardMarkSetter x(&_scanner, dest_attr.is_young());
do_iterate_object(obj, old, klass, region_attr, dest_attr, age);
}
@@ -569,7 +575,7 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
assert(worker_id < _num_workers, "out of bounds access");
if (_states[worker_id] == nullptr) {
_states[worker_id] =
- new G1ParScanThreadState(_g1h, rdcqs(),
+ new G1ParScanThreadState(_g1h,
worker_id,
_num_workers,
_collection_set,
@@ -595,22 +601,24 @@ void G1ParScanThreadStateSet::flush_stats() {
// because it resets the PLAB allocator where we get this info from.
size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
- size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers, &_rdc_buffers[worker_id]) * HeapWordSize;
- size_t evac_fail_enqueued_cards = pss->evac_failure_enqueued_cards();
+ size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers) * HeapWordSize;
+ size_t pending_cards = pss->num_cards_pending();
+ size_t to_young_gen_cards = pss->num_cards_marked() - pss->num_cards_pending();
+ size_t evac_failure_cards = pss->num_cards_from_evac_failure();
+ size_t marked_cards = pss->num_cards_marked();
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_undo_waste_bytes, G1GCPhaseTimes::MergePSSLABUndoWasteBytes);
- p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, evac_fail_enqueued_cards, G1GCPhaseTimes::MergePSSEvacFailExtra);
+ p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, pending_cards, G1GCPhaseTimes::MergePSSPendingCards);
+ p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, to_young_gen_cards, G1GCPhaseTimes::MergePSSToYoungGenCards);
+ p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, evac_failure_cards, G1GCPhaseTimes::MergePSSEvacFail);
+ p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, marked_cards, G1GCPhaseTimes::MergePSSMarked);
delete pss;
_states[worker_id] = nullptr;
}
- G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
- dcq.merge_bufferlists(rdcqs());
- rdcqs()->verify_empty();
-
_flushed = true;
}
@@ -652,7 +660,7 @@ oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, Kla
// existing closure to scan evacuated objects; since we are iterating from a
// collection set region (i.e. never a Survivor region), we always need to
// gather cards for this case.
- G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
+ G1SkipCardMarkSetter x(&_scanner, false /* skip_card_mark */);
do_iterate_object(old, old, klass, attr, attr, m.age());
}
@@ -709,9 +717,7 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
G1EvacFailureRegions* evac_failure_regions) :
_g1h(g1h),
_collection_set(collection_set),
- _rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
_states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
- _rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
_num_workers(num_workers),
_flushed(false),
@@ -719,7 +725,6 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
{
for (uint i = 0; i < num_workers; ++i) {
_states[i] = nullptr;
- _rdc_buffers[i] = BufferNodeList();
}
memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
}
@@ -728,7 +733,6 @@ G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
assert(_flushed, "thread local state from the per thread states should have been flushed");
FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
- FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
}
#if TASKQUEUE_STATS
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
index 4d569622238..3fb080d40be 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,6 @@
#include "gc/g1/g1CollectedHeap.hpp"
#include "gc/g1/g1OopClosures.hpp"
-#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1YoungGCAllocationFailureInjector.hpp"
#include "gc/shared/ageTable.hpp"
#include "gc/shared/copyFailedInfo.hpp"
@@ -52,7 +51,6 @@ class outputStream;
class G1ParScanThreadState : public CHeapObj {
G1CollectedHeap* _g1h;
G1ScannerTasksQueue* _task_queue;
- G1RedirtyCardsLocalQueueSet _rdc_local_qset;
G1CardTable* _ct;
G1EvacuationRootClosures* _closures;
@@ -65,9 +63,8 @@ class G1ParScanThreadState : public CHeapObj {
uint _worker_id;
- // Remember the last enqueued card to avoid enqueuing the same card over and over;
- // since we only ever scan a card once, this is sufficient.
- size_t _last_enqueued_card;
+ size_t _num_cards_marked_dirty;
+ size_t _num_cards_marked_to_cset;
// Upper and lower threshold to start and end work queue draining.
uint const _stack_trim_upper_threshold;
@@ -104,22 +101,19 @@ class G1ParScanThreadState : public CHeapObj {
EvacuationFailedInfo _evacuation_failed_info;
G1EvacFailureRegions* _evac_failure_regions;
- // Number of additional cards into evacuation failed regions enqueued into
- // the local DCQS. This is an approximation, as cards that would be added later
- // outside of evacuation failure will not be subtracted again.
- size_t _evac_failure_enqueued_cards;
+ // Number of additional cards into evacuation failed regions.
+ size_t _num_cards_from_evac_failure;
- // Enqueue the card if not already in the set; this is a best-effort attempt on
+ // Mark the card if not already in the set; this is a best-effort attempt on
// detecting duplicates.
- template bool enqueue_if_new(T* p);
- // Enqueue the card of p into the (evacuation failed) region.
- template void enqueue_card_into_evac_fail_region(T* p, oop obj);
+ template bool mark_if_new(T* p, bool into_survivor);
+ // Mark the card of p into the (evacuation failed) region.
+ template void mark_card_into_evac_fail_region(T* p, oop obj);
bool inject_allocation_failure(uint region_idx) ALLOCATION_FAILURE_INJECTOR_RETURN_( return false; );
public:
G1ParScanThreadState(G1CollectedHeap* g1h,
- G1RedirtyCardsQueueSet* rdcqs,
uint worker_id,
uint num_workers,
G1CollectionSet* collection_set,
@@ -139,16 +133,16 @@ public:
void push_on_queue(ScannerTask task);
- // Apply the post barrier to the given reference field. Enqueues the card of p
+ // Apply the post barrier to the given reference field. Marks the card of p
// if the barrier does not filter out the reference for some reason (e.g.
// p and q are in the same region, p is in survivor, p is in collection set)
// To be called during GC if nothing particular about p and obj are known.
template void write_ref_field_post(T* p, oop obj);
- // Enqueue the card if the reference's target region's remembered set is tracked.
+ // Mark the card if the reference's target region's remembered set is tracked.
// Assumes that a significant amount of pre-filtering (like done by
// write_ref_field_post() above) has already been performed.
- template void enqueue_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o);
+ template void mark_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o);
G1EvacuationRootClosures* closures() { return _closures; }
uint worker_id() { return _worker_id; }
@@ -156,11 +150,22 @@ public:
size_t lab_waste_words() const;
size_t lab_undo_waste_words() const;
- size_t evac_failure_enqueued_cards() const;
+ // Newly marked cards during this garbage collection, to be refined concurrently
+ // later. Contains both marks generated by new cross-region references as well
+ // as cards generated from regions into evacuation failed regions.
+ // Does not contain cards into the next collection set (e.g. survivors) - they will not
+ // be refined concurrently. Calculation is done on a best-effort basis.
+ size_t num_cards_pending() const;
+ // Number of cards newly generated by references into evacuation failed regions.
+ // Calculation is done on a best-effort basis.
+ size_t num_cards_from_evac_failure() const;
+ // Sum of cards marked by evacuation. Contains both pending cards as well as cards
+ // into the next collection set (e.g. survivors).
+ size_t num_cards_marked() const;
// Pass locally gathered statistics to global state. Returns the total number of
// HeapWords copied.
- size_t flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* buffer_log);
+ size_t flush_stats(size_t* surviving_young_words, uint num_workers);
#if TASKQUEUE_STATS
PartialArrayTaskStats* partial_array_task_stats();
@@ -249,9 +254,7 @@ public:
class G1ParScanThreadStateSet : public StackObj {
G1CollectedHeap* _g1h;
G1CollectionSet* _collection_set;
- G1RedirtyCardsQueueSet _rdcqs;
G1ParScanThreadState** _states;
- BufferNodeList* _rdc_buffers;
size_t* _surviving_young_words_total;
uint _num_workers;
bool _flushed;
@@ -264,9 +267,6 @@ class G1ParScanThreadStateSet : public StackObj {
G1EvacFailureRegions* evac_failure_regions);
~G1ParScanThreadStateSet();
- G1RedirtyCardsQueueSet* rdcqs() { return &_rdcqs; }
- BufferNodeList* rdc_buffers() { return _rdc_buffers; }
-
void flush_stats();
void record_unused_optional_region(G1HeapRegion* hr);
#if TASKQUEUE_STATS
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp
index 148284e7ef7..ee5bc93290e 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp
@@ -96,25 +96,24 @@ G1OopStarChunkedList* G1ParScanThreadState::oops_into_optional_region(const G1He
return &_oops_into_optional_regions[hr->index_in_opt_cset()];
}
-template bool G1ParScanThreadState::enqueue_if_new(T* p) {
- size_t card_index = ct()->index_for(p);
- // If the card hasn't been added to the buffer, do it.
- if (_last_enqueued_card != card_index) {
- _rdc_local_qset.enqueue(ct()->byte_for_index(card_index));
- _last_enqueued_card = card_index;
+template bool G1ParScanThreadState::mark_if_new(T* p, bool into_new_survivor) {
+ G1CardTable::CardValue* card = ct()->byte_for(p);
+ G1CardTable::CardValue value = *card;
+ if (value == G1CardTable::clean_card_val()) {
+ *card = into_new_survivor ? G1CardTable::g1_to_cset_card : G1CardTable::g1_dirty_card;
return true;
} else {
return false;
}
}
-template void G1ParScanThreadState::enqueue_card_into_evac_fail_region(T* p, oop obj) {
+template void G1ParScanThreadState::mark_card_into_evac_fail_region(T* p, oop obj) {
assert(!G1HeapRegion::is_in_same_region(p, obj), "Should have filtered out cross-region references already.");
assert(!_g1h->heap_region_containing(p)->is_survivor(), "Should have filtered out from-newly allocated survivor references already.");
assert(_g1h->heap_region_containing(obj)->in_collection_set(), "Only for enqeueing reference into collection set region");
- if (enqueue_if_new(p)) {
- _evac_failure_enqueued_cards++;
+ if (mark_if_new(p, false /* into_new_survivor */)) { // The reference is never into survivor regions.
+ _num_cards_from_evac_failure++;
}
}
@@ -137,18 +136,18 @@ template void G1ParScanThreadState::write_ref_field_post(T* p, oop obj
if (dest_attr.is_in_cset()) {
assert(obj->is_forwarded(), "evac-failed but not forwarded: " PTR_FORMAT, p2i(obj));
assert(obj->forwardee() == obj, "evac-failed but not self-forwarded: " PTR_FORMAT, p2i(obj));
- enqueue_card_into_evac_fail_region(p, obj);
+ mark_card_into_evac_fail_region(p, obj);
return;
}
- enqueue_card_if_tracked(dest_attr, p, obj);
+ mark_card_if_tracked(dest_attr, p, obj);
}
-template void G1ParScanThreadState::enqueue_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o) {
+template void G1ParScanThreadState::mark_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o) {
assert(!G1HeapRegion::is_in_same_region(p, o), "Should have filtered out cross-region references already.");
assert(!_g1h->heap_region_containing(p)->is_survivor(), "Should have filtered out from-newly allocated survivor references already.");
// We relabel all regions that failed evacuation as old gen without remembered,
// and so pre-filter them out in the caller.
- assert(!_g1h->heap_region_containing(o)->in_collection_set(), "Should not try to enqueue reference into collection set region");
+ assert(!_g1h->heap_region_containing(o)->in_collection_set(), "Should not try to mark reference into collection set region");
#ifdef ASSERT
G1HeapRegion* const hr_obj = _g1h->heap_region_containing(o);
@@ -161,7 +160,14 @@ template void G1ParScanThreadState::enqueue_card_if_tracked(G1HeapRegi
if (!region_attr.remset_is_tracked()) {
return;
}
- enqueue_if_new(p);
+ bool into_survivor = region_attr.is_new_survivor();
+ if (mark_if_new(p, into_survivor)) {
+ if (into_survivor) {
+ _num_cards_marked_to_cset++;
+ } else {
+ _num_cards_marked_dirty++;
+ }
+ }
}
#endif // SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP
diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp
index 9f872aa6ccd..754cc502031 100644
--- a/src/hotspot/share/gc/g1/g1Policy.cpp
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp
@@ -67,8 +67,7 @@ G1Policy::G1Policy(STWGCTimer* gc_timer) :
_reserve_regions(0),
_young_gen_sizer(),
_free_regions_at_end_of_collection(0),
- _card_rs_length(0),
- _pending_cards_at_gc_start(0),
+ _pending_cards_from_gc(0),
_concurrent_start_to_mixed(),
_collection_set(nullptr),
_g1h(nullptr),
@@ -553,12 +552,9 @@ G1GCPhaseTimes* G1Policy::phase_times() const {
return _phase_times;
}
-void G1Policy::revise_young_list_target_length(size_t card_rs_length, size_t code_root_rs_length) {
+void G1Policy::revise_young_list_target_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length) {
guarantee(use_adaptive_young_list_length(), "should not call this otherwise" );
- size_t thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
- G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
- size_t pending_cards = dcqs.num_cards() + thread_buffer_cards;
update_young_length_bounds(pending_cards, card_rs_length, code_root_rs_length);
}
@@ -567,7 +563,7 @@ void G1Policy::record_full_collection_start() {
// Release the future to-space so that it is available for compaction into.
collector_state()->set_in_young_only_phase(false);
collector_state()->set_in_full_gc(true);
- _pending_cards_at_gc_start = 0;
+ _collection_set->abandon_all_candidates();
}
void G1Policy::record_full_collection_end() {
@@ -600,59 +596,70 @@ void G1Policy::record_full_collection_end() {
record_pause(G1GCPauseType::FullGC, start_time_sec, end_sec);
}
-static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats& stats) {
+static void log_refinement_stats(const G1ConcurrentRefineStats& stats) {
log_debug(gc, refine, stats)
- ("%s refinement: %.2fms, refined: %zu"
- ", precleaned: %zu, dirtied: %zu",
- kind,
- stats.refinement_time().seconds() * MILLIUNITS,
+ ("Refinement: sweep: %.2fms, yield: %.2fms refined: %zu, dirtied: %zu",
+ TimeHelper::counter_to_millis(stats.sweep_duration()),
+ TimeHelper::counter_to_millis(stats.yield_during_sweep_duration()),
stats.refined_cards(),
- stats.precleaned_cards(),
- stats.dirtied_cards());
+ stats.cards_pending());
}
-void G1Policy::record_concurrent_refinement_stats(size_t pending_cards,
- size_t thread_buffer_cards) {
- _pending_cards_at_gc_start = pending_cards;
- _analytics->report_dirtied_cards_in_thread_buffers(thread_buffer_cards);
-
- // Collect per-thread stats, mostly from mutator activity.
- G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
- G1ConcurrentRefineStats mut_stats = dcqs.concatenated_refinement_stats();
-
- // Collect specialized concurrent refinement thread stats.
- G1ConcurrentRefine* cr = _g1h->concurrent_refine();
- G1ConcurrentRefineStats cr_stats = cr->get_and_reset_refinement_stats();
-
- G1ConcurrentRefineStats total_stats = mut_stats + cr_stats;
-
- log_refinement_stats("Mutator", mut_stats);
- log_refinement_stats("Concurrent", cr_stats);
- log_refinement_stats("Total", total_stats);
+void G1Policy::record_refinement_stats(G1ConcurrentRefineStats* refine_stats) {
+ log_refinement_stats(*refine_stats);
// Record the rate at which cards were refined.
- // Don't update the rate if the current sample is empty or time is zero.
- Tickspan refinement_time = total_stats.refinement_time();
- size_t refined_cards = total_stats.refined_cards();
- if ((refined_cards > 0) && (refinement_time > Tickspan())) {
- double rate = refined_cards / (refinement_time.seconds() * MILLIUNITS);
+ // Don't update the rate if the current sample is empty or time is zero (which is
+ // the case during GC).
+ double refinement_time = TimeHelper::counter_to_millis(refine_stats->sweep_duration());
+ size_t refined_cards = refine_stats->refined_cards();
+ if ((refined_cards > 0) && (refinement_time > 0)) {
+ double rate = refined_cards / refinement_time;
_analytics->report_concurrent_refine_rate_ms(rate);
- log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms", rate);
+ log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms predicted: %.2f cards/ms", rate, _analytics->predict_concurrent_refine_rate_ms());
}
+}
+template
+static T saturated_sub(T x, T y) {
+ return (x < y) ? T() : (x - y);
+}
+
+void G1Policy::record_dirtying_stats(double last_mutator_start_dirty_ms,
+ double last_mutator_end_dirty_ms,
+ size_t pending_cards,
+ double yield_duration_ms,
+ size_t next_pending_cards_from_gc,
+ size_t next_to_collection_set_cards) {
+ assert(SafepointSynchronize::is_at_safepoint() || G1ReviseYoungLength_lock->is_locked(),
+ "must be (at safepoint %s locked %s)",
+ BOOL_TO_STR(SafepointSynchronize::is_at_safepoint()), BOOL_TO_STR(G1ReviseYoungLength_lock->is_locked()));
// Record mutator's card logging rate.
- double mut_start_time = _analytics->prev_collection_pause_end_ms();
- double mut_end_time = cur_pause_start_sec() * MILLIUNITS;
- double mut_time = mut_end_time - mut_start_time;
+
// Unlike above for conc-refine rate, here we should not require a
// non-empty sample, since an application could go some time with only
// young-gen or filtered out writes. But we'll ignore unusually short
// sample periods, as they may just pollute the predictions.
- if (mut_time > 1.0) { // Require > 1ms sample time.
- double dirtied_rate = total_stats.dirtied_cards() / mut_time;
+ double const mutator_dirty_time_ms = (last_mutator_end_dirty_ms - last_mutator_start_dirty_ms) - yield_duration_ms;
+ assert(mutator_dirty_time_ms >= 0.0,
+ "must be (start: %.2f end: %.2f yield: %.2f)",
+ last_mutator_start_dirty_ms, last_mutator_end_dirty_ms, yield_duration_ms);
+
+ if (mutator_dirty_time_ms > 1.0) { // Require > 1ms sample time.
+ // The subtractive term is pending_cards_from_gc() which includes both dirtied and dirty-as-young cards,
+ // which can be larger than what is actually considered as "pending" (dirty cards only).
+ size_t dirtied_cards = saturated_sub(pending_cards, pending_cards_from_gc());
+ double dirtied_rate = dirtied_cards / mutator_dirty_time_ms;
_analytics->report_dirtied_cards_rate_ms(dirtied_rate);
- log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms", dirtied_rate);
+ log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms dirtying time %.2f (start %.2f end %.2f yield %.2f) dirtied %zu (pending %zu during_gc %zu)",
+ dirtied_rate,
+ mutator_dirty_time_ms,
+ last_mutator_start_dirty_ms, last_mutator_end_dirty_ms, yield_duration_ms,
+ dirtied_cards, pending_cards, pending_cards_from_gc());
}
+
+ _pending_cards_from_gc = next_pending_cards_from_gc;
+ _to_collection_set_cards = next_to_collection_set_cards;
}
bool G1Policy::should_retain_evac_failed_region(uint index) const {
@@ -761,27 +768,27 @@ bool G1Policy::concurrent_operation_is_full_mark(const char* msg) {
((_g1h->gc_cause() != GCCause::_g1_humongous_allocation) || need_to_start_conc_mark(msg));
}
-double G1Policy::logged_cards_processing_time() const {
+double G1Policy::pending_cards_processing_time() const {
double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR);
- size_t logged_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
+ size_t pending_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
+ phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
- double merge_logged_cards_time = average_time_ms(G1GCPhaseTimes::MergeLB) +
- phase_times()->cur_distribute_log_buffers_time_ms();
+ double merge_pending_cards_time = phase_times()->cur_merge_refinement_table_time();
- // Approximate the time spent processing cards from log buffers by scaling
- // the total processing time by the ratio of logged cards to total cards
+ // Approximate the time spent processing cards from pending cards by scaling
+ // the total processing time by the ratio of pending cards to total cards
// processed. There might be duplicate cards in different log buffers,
// leading to an overestimate. That effect should be relatively small
// unless there are few cards to process, because cards in buffers are
// dirtied to limit duplication. Also need to avoid scaling when both
// counts are zero, which happens especially during early GCs. So ascribe
- // all of the time to the logged cards unless there are more total cards.
- if (logged_dirty_cards >= scan_heap_roots_cards) {
- return all_cards_processing_time + merge_logged_cards_time;
+ // all of the time to the pending cards unless there are more total cards.
+ if (pending_cards >= scan_heap_roots_cards) {
+ return all_cards_processing_time + merge_pending_cards_time;
}
- return (all_cards_processing_time * logged_dirty_cards / scan_heap_roots_cards) + merge_logged_cards_time;
+ return (all_cards_processing_time * pending_cards / scan_heap_roots_cards) + merge_pending_cards_time;
}
// Anything below that is considered to be zero
@@ -815,6 +822,22 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
// We make the assumption that these are rare.
bool update_stats = !allocation_failure;
+ size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
+
+ // Number of scanned cards with "Dirty" value (and nothing else).
+ size_t const pending_cards_from_refinement_table = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
+ // Number of cards actually merged in the Merge RS phase. MergeRSCards below includes the cards from the Eager Reclaim phase.
+ size_t const merged_cards_from_card_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSFromRemSetCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSFromRemSetCards);
+ // Number of cards attempted to merge in the Merge RS phase.
+ size_t const total_cards_from_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSTotalCards) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSTotalCards);
+
+ // Cards marked as being to collection set. May be inaccurate due to races.
+ size_t const total_non_young_rs_cards = MIN2(pending_cards_from_refinement_table + merged_cards_from_card_rs, total_cards_scanned);
+
if (update_stats) {
// We maintain the invariant that all objects allocated by mutator
// threads will be allocated out of eden regions. So, we can use
@@ -827,6 +850,98 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
uint regions_allocated = _collection_set->eden_region_length();
double alloc_rate_ms = (double) regions_allocated / app_time_ms;
_analytics->report_alloc_rate_ms(alloc_rate_ms);
+
+ double merge_refinement_table_time = p->cur_merge_refinement_table_time();
+ if (merge_refinement_table_time != 0.0) {
+ _analytics->report_merge_refinement_table_time_ms(merge_refinement_table_time);
+ }
+ if (merged_cards_from_card_rs >= G1NumCardsCostSampleThreshold) {
+ double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
+ average_time_ms(G1GCPhaseTimes::MergeRS) +
+ average_time_ms(G1GCPhaseTimes::OptMergeRS);
+ _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / merged_cards_from_card_rs, is_young_only_pause);
+ log_debug(gc, ergo, cset)("cost per card merge (young %s): avg time %.2f merged cards %zu cost(1m) %.2f pred_cost(1m-yo) %.2f pred_cost(1m-old) %.2f",
+ BOOL_TO_STR(is_young_only_pause),
+ avg_time_merge_cards, merged_cards_from_card_rs, 1e6 * avg_time_merge_cards / merged_cards_from_card_rs, _analytics->predict_card_merge_time_ms(1e6, true), _analytics->predict_card_merge_time_ms(1e6, false));
+ } else {
+ log_debug(gc, ergo, cset)("cost per card merge (young: %s): skipped, total cards %zu", BOOL_TO_STR(is_young_only_pause), total_non_young_rs_cards);
+ }
+
+ // Update prediction for card scan
+
+ if (total_cards_scanned >= G1NumCardsCostSampleThreshold) {
+ double avg_card_scan_time = average_time_ms(G1GCPhaseTimes::ScanHR) +
+ average_time_ms(G1GCPhaseTimes::OptScanHR);
+
+ _analytics->report_cost_per_card_scan_ms(avg_card_scan_time / total_cards_scanned, is_young_only_pause);
+
+ log_debug(gc, ergo, cset)("cost per card scan (young: %s): avg time %.2f total cards %zu cost(1m) %.2f pred_cost(1m-yo) %.2f pred_cost(1m-old) %.2f",
+ BOOL_TO_STR(is_young_only_pause),
+ avg_card_scan_time, total_cards_scanned, 1e6 * avg_card_scan_time / total_cards_scanned, _analytics->predict_card_scan_time_ms(1e6, true), _analytics->predict_card_scan_time_ms(1e6, false));
+ } else {
+ log_debug(gc, ergo, cset)("cost per card scan (young: %s): skipped, total cards %zu", BOOL_TO_STR(is_young_only_pause), total_cards_scanned);
+ }
+
+ // Update prediction for the ratio between cards actually merged onto the card
+ // table from the remembered sets and the total number of cards attempted to
+ // merge.
+ double merge_to_scan_ratio = 1.0;
+ if (total_cards_from_rs > 0) {
+ merge_to_scan_ratio = (double)merged_cards_from_card_rs / total_cards_from_rs;
+ }
+ _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, is_young_only_pause);
+
+ // Update prediction for code root scan
+ size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
+ p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
+
+ if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
+ double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
+ average_time_ms(G1GCPhaseTimes::OptCodeRoots);
+
+ _analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
+ }
+
+ // Update prediction for copy cost per byte
+ size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
+
+ if (copied_bytes > 0) {
+ double avg_copy_time = average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy);
+ double cost_per_byte_ms = avg_copy_time / copied_bytes;
+ _analytics->report_cost_per_byte_ms(cost_per_byte_ms, is_young_only_pause);
+ }
+
+ if (_collection_set->young_region_length() > 0) {
+ _analytics->report_young_other_cost_per_region_ms(young_other_time_ms() /
+ _collection_set->young_region_length());
+ }
+
+ if (_collection_set->initial_old_region_length() > 0) {
+ _analytics->report_non_young_other_cost_per_region_ms(non_young_other_time_ms() /
+ _collection_set->initial_old_region_length());
+ }
+
+ _analytics->report_constant_other_time_ms(constant_other_time_ms(pause_time_ms));
+
+ _analytics->report_pending_cards(pending_cards_from_refinement_table, is_young_only_pause);
+
+ _analytics->report_card_rs_length(total_cards_scanned - total_non_young_rs_cards, is_young_only_pause);
+ _analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
+ }
+
+ {
+ double mutator_end_time = cur_pause_start_sec() * MILLIUNITS;
+ G1ConcurrentRefineStats* stats = _g1h->concurrent_refine()->sweep_state().stats();
+ // Record any available refinement statistics.
+ record_refinement_stats(stats);
+
+ double yield_duration_ms = TimeHelper::counter_to_millis(_g1h->yield_duration_in_refinement_epoch());
+ record_dirtying_stats(TimeHelper::counter_to_millis(_g1h->last_refinement_epoch_start()),
+ mutator_end_time,
+ pending_cards_from_refinement_table,
+ yield_duration_ms,
+ phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSPendingCards),
+ phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSToYoungGenCards));
}
record_pause(this_pause, start_time_sec, end_time_sec, allocation_failure);
@@ -857,82 +972,6 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
_eden_surv_rate_group->start_adding_regions();
- if (update_stats) {
- // Update prediction for card merge.
- size_t const merged_cards_from_log_buffers = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
- // MergeRSCards includes the cards from the Eager Reclaim phase.
- size_t const merged_cards_from_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSCards) +
- p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSCards);
- size_t const total_cards_merged = merged_cards_from_rs +
- merged_cards_from_log_buffers;
-
- if (total_cards_merged >= G1NumCardsCostSampleThreshold) {
- double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
- average_time_ms(G1GCPhaseTimes::MergeRS) +
- average_time_ms(G1GCPhaseTimes::MergeLB) +
- p->cur_distribute_log_buffers_time_ms() +
- average_time_ms(G1GCPhaseTimes::OptMergeRS);
- _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, is_young_only_pause);
- }
-
- // Update prediction for card scan
- size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
- p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
-
- if (total_cards_scanned >= G1NumCardsCostSampleThreshold) {
- double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
- average_time_ms(G1GCPhaseTimes::OptScanHR);
-
- _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, is_young_only_pause);
- }
-
- // Update prediction for the ratio between cards from the remembered
- // sets and actually scanned cards from the remembered sets.
- // Due to duplicates in the log buffers, the number of scanned cards
- // can be smaller than the cards in the log buffers.
- const size_t scanned_cards_from_rs = (total_cards_scanned > merged_cards_from_log_buffers) ? total_cards_scanned - merged_cards_from_log_buffers : 0;
- double scan_to_merge_ratio = 0.0;
- if (merged_cards_from_rs > 0) {
- scan_to_merge_ratio = (double)scanned_cards_from_rs / merged_cards_from_rs;
- }
- _analytics->report_card_scan_to_merge_ratio(scan_to_merge_ratio, is_young_only_pause);
-
- // Update prediction for code root scan
- size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
- p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
-
- if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
- double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
- average_time_ms(G1GCPhaseTimes::OptCodeRoots);
-
- _analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
- }
-
- // Update prediction for copy cost per byte
- size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
-
- if (copied_bytes > 0) {
- double cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / copied_bytes;
- _analytics->report_cost_per_byte_ms(cost_per_byte_ms, is_young_only_pause);
- }
-
- if (_collection_set->young_region_length() > 0) {
- _analytics->report_young_other_cost_per_region_ms(young_other_time_ms() /
- _collection_set->young_region_length());
- }
-
- if (_collection_set->initial_old_region_length() > 0) {
- _analytics->report_non_young_other_cost_per_region_ms(non_young_other_time_ms() /
- _collection_set->initial_old_region_length());
- }
-
- _analytics->report_constant_other_time_ms(constant_other_time_ms(pause_time_ms));
-
- _analytics->report_pending_cards((double)pending_cards_at_gc_start(), is_young_only_pause);
- _analytics->report_card_rs_length((double)_card_rs_length, is_young_only_pause);
- _analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
- }
-
assert(!(G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause) && collector_state()->mark_or_rebuild_in_progress()),
"If the last pause has been concurrent start, we should not have been in the marking window");
if (G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause)) {
@@ -963,29 +1002,26 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
}
// Note that _mmu_tracker->max_gc_time() returns the time in seconds.
- double logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
+ double pending_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
- double const logged_cards_time_ms = logged_cards_processing_time();
- size_t logged_cards =
- phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB,
- G1GCPhaseTimes::MergeLBDirtyCards);
- bool exceeded_goal = logged_cards_time_goal_ms < logged_cards_time_ms;
- size_t predicted_thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
+ double const pending_cards_time_ms = pending_cards_processing_time();
+ size_t pending_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
+ phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
+
+ bool exceeded_goal = pending_cards_time_goal_ms < pending_cards_time_ms;
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
log_debug(gc, ergo, refine)
- ("GC refinement: goal: %zu + %zu / %1.2fms, actual: %zu / %1.2fms, %s",
+ ("GC refinement: goal: %zu / %1.2fms, actual: %zu / %1.2fms, %s",
cr->pending_cards_target(),
- predicted_thread_buffer_cards,
- logged_cards_time_goal_ms,
- logged_cards,
- logged_cards_time_ms,
+ pending_cards_time_goal_ms,
+ pending_cards,
+ pending_cards_time_ms,
(exceeded_goal ? " (exceeded goal)" : ""));
- cr->adjust_after_gc(logged_cards_time_ms,
- logged_cards,
- predicted_thread_buffer_cards,
- logged_cards_time_goal_ms);
+ cr->adjust_after_gc(pending_cards_time_ms,
+ pending_cards,
+ pending_cards_time_goal_ms);
}
G1IHOPControl* G1Policy::create_ihop_control(const G1OldGenAllocationTracker* old_gen_alloc_tracker,
@@ -1057,34 +1093,27 @@ double G1Policy::predict_base_time_ms(size_t pending_cards,
size_t code_root_rs_length) const {
bool in_young_only_phase = collector_state()->in_young_only_phase();
- size_t unique_cards_from_rs = _analytics->predict_scan_card_num(card_rs_length, in_young_only_phase);
- // Assume that all cards from the log buffers will be scanned, i.e. there are no
- // duplicates in that set.
- size_t effective_scanned_cards = unique_cards_from_rs + pending_cards;
+ // Cards from the refinement table and the cards from the young gen remset are
+ // unique to each other as they are located on the card table.
+ size_t effective_scanned_cards = card_rs_length + pending_cards;
- double card_merge_time = _analytics->predict_card_merge_time_ms(pending_cards + card_rs_length, in_young_only_phase);
+ double refinement_table_merge_time = _analytics->predict_merge_refinement_table_time_ms();
double card_scan_time = _analytics->predict_card_scan_time_ms(effective_scanned_cards, in_young_only_phase);
double code_root_scan_time = _analytics->predict_code_root_scan_time_ms(code_root_rs_length, in_young_only_phase);
double constant_other_time = _analytics->predict_constant_other_time_ms();
double survivor_evac_time = predict_survivor_regions_evac_time();
- double total_time = card_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
+ double total_time = refinement_table_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
log_trace(gc, ergo, heap)("Predicted base time: total %f lb_cards %zu card_rs_length %zu effective_scanned_cards %zu "
- "card_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
+ "refinement_table_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
"constant_other_time %f survivor_evac_time %f",
total_time, pending_cards, card_rs_length, effective_scanned_cards,
- card_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
+ refinement_table_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
constant_other_time, survivor_evac_time);
return total_time;
}
-double G1Policy::predict_base_time_ms(size_t pending_cards) const {
- bool for_young_only_phase = collector_state()->in_young_only_phase();
- size_t card_rs_length = _analytics->predict_card_rs_length(for_young_only_phase);
- return predict_base_time_ms(pending_cards, card_rs_length);
-}
-
double G1Policy::predict_base_time_ms(size_t pending_cards, size_t card_rs_length) const {
bool for_young_only_phase = collector_state()->in_young_only_phase();
size_t code_root_rs_length = _analytics->predict_code_root_rs_length(for_young_only_phase);
@@ -1428,6 +1457,64 @@ size_t G1Policy::allowed_waste_in_collection_set() const {
return G1HeapWastePercent * _g1h->capacity() / 100;
}
+bool G1Policy::try_get_available_bytes_estimate(size_t& available_bytes) const {
+ // Getting used young bytes requires holding Heap_lock. But we can't use
+ // normal lock and block until available. Blocking on the lock could
+ // deadlock with a GC VMOp that is holding the lock and requesting a
+ // safepoint. Instead try to lock, and return the result of that attempt,
+ // and the estimate if successful.
+ if (Heap_lock->try_lock()) {
+ size_t used_bytes = estimate_used_young_bytes_locked();
+ Heap_lock->unlock();
+
+ size_t young_bytes = young_list_target_length() * G1HeapRegion::GrainBytes;
+ available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
+ return true;
+ } else {
+ available_bytes = 0;
+ return false;
+ }
+}
+
+double G1Policy::predict_time_to_next_gc_ms(size_t available_bytes) const {
+ double alloc_region_rate = _analytics->predict_alloc_rate_ms();
+ double alloc_bytes_rate = alloc_region_rate * G1HeapRegion::GrainBytes;
+ if (alloc_bytes_rate == 0.0) {
+ // A zero rate indicates we don't yet have data to use for predictions.
+ // Since we don't have any idea how long until the next GC, use a time of
+ // zero.
+ return 0.0;
+ } else {
+ // If the heap size is large and the allocation rate is small, we can get
+ // a predicted time until next GC that is so large it can cause problems
+ // (such as overflow) in other calculations. Limit the prediction to one
+ // hour, which is still large in this context.
+ const double one_hour_ms = 60.0 * 60.0 * MILLIUNITS;
+ double raw_time_ms = available_bytes / alloc_bytes_rate;
+ return MIN2(raw_time_ms, one_hour_ms);
+ }
+}
+
+uint64_t G1Policy::adjust_wait_time_ms(double wait_time_ms, uint64_t min_time_ms) {
+ return MAX2(static_cast(sqrt(wait_time_ms) * 4.0), min_time_ms);
+}
+
+double G1Policy::last_mutator_dirty_start_time_ms() {
+ return TimeHelper::counter_to_millis(_g1h->last_refinement_epoch_start());
+}
+
+size_t G1Policy::current_pending_cards() {
+ double now = os::elapsedTime() * MILLIUNITS;
+ return _pending_cards_from_gc + _analytics->predict_dirtied_cards_rate_ms() * (now - last_mutator_dirty_start_time_ms());
+}
+
+size_t G1Policy::current_to_collection_set_cards() {
+ // The incremental part is covered by the dirtied_cards_rate, i.e. pending cards
+ // cover both to collection set cards and other interesting cards because we do not
+ // know which is which until we look.
+ return _to_collection_set_cards;
+}
+
uint G1Policy::min_retained_old_cset_length() const {
// Guarantee some progress with retained regions regardless of available time by
// taking at least one region.
diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp
index e9f7529e509..01bad97ab84 100644
--- a/src/hotspot/share/gc/g1/g1Policy.hpp
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp
@@ -48,6 +48,7 @@ class G1HeapRegion;
class G1CollectionSet;
class G1CollectionSetCandidates;
class G1CollectionSetChooser;
+class G1ConcurrentRefineStats;
class G1IHOPControl;
class G1Analytics;
class G1SurvivorRegions;
@@ -101,9 +102,18 @@ class G1Policy: public CHeapObj {
uint _free_regions_at_end_of_collection;
- size_t _card_rs_length;
-
- size_t _pending_cards_at_gc_start;
+ // Tracks the number of cards marked as dirty (only) during garbage collection
+ // (evacuation) on the card table.
+ // This is needed to properly account for those cards in the heuristics to start
+ // refinement at the correct time which needs to know how many cards are currently
+ // approximately on the card table.
+ // After the first completed refinement sweep of the refinement table between two
+ // garbage collections this value is reset to zero as that refinement processed all
+ // those cards.
+ size_t _pending_cards_from_gc;
+ // Tracks the approximate number of cards found as to-collection-set by either the
+ // garbage collection or the most recent refinement sweep.
+ size_t _to_collection_set_cards;
G1ConcurrentStartToMixedTimeTracker _concurrent_start_to_mixed;
@@ -111,7 +121,7 @@ class G1Policy: public CHeapObj {
return collector_state()->in_young_only_phase() && !collector_state()->mark_or_rebuild_in_progress();
}
- double logged_cards_processing_time() const;
+ double pending_cards_processing_time() const;
public:
const G1Predictions& predictor() const { return _predictor; }
const G1Analytics* analytics() const { return const_cast(_analytics); }
@@ -129,16 +139,10 @@ public:
hr->install_surv_rate_group(_survivor_surv_rate_group);
}
- void record_card_rs_length(size_t num_cards) {
- _card_rs_length = num_cards;
- }
-
double cur_pause_start_sec() const {
return _cur_pause_start_sec;
}
- double predict_base_time_ms(size_t pending_cards) const;
-
double predict_base_time_ms(size_t pending_cards, size_t card_rs_length) const;
// Base time contains handling remembered sets and constant other time of the
@@ -239,7 +243,13 @@ private:
public:
size_t predict_bytes_to_copy(G1HeapRegion* hr) const;
- size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; }
+
+ double last_mutator_dirty_start_time_ms();
+ size_t pending_cards_from_gc() const { return _pending_cards_from_gc; }
+
+ size_t current_pending_cards();
+
+ size_t current_to_collection_set_cards();
// GC efficiency for collecting the region based on the time estimate for
// merging and scanning incoming references.
@@ -286,7 +296,7 @@ public:
// Check the current value of the young list RSet length and
// compare it against the last prediction. If the current value is
// higher, recalculate the young list target length prediction.
- void revise_young_list_target_length(size_t card_rs_length, size_t code_root_rs_length);
+ void revise_young_list_target_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length);
// This should be called after the heap is resized.
void record_new_heap_size(uint new_number_of_regions);
@@ -325,7 +335,6 @@ public:
// Amount of allowed waste in bytes in the collection set.
size_t allowed_waste_in_collection_set() const;
-
private:
// Predict the number of bytes of surviving objects from survivor and old
@@ -359,17 +368,39 @@ public:
bool use_adaptive_young_list_length() const;
+ // Try to get an estimate of the currently available bytes in the young gen. This
+ // operation considers itself low-priority: if other threads need the resources
+ // required to get the information, return false to indicate that the caller
+ // should retry "soon".
+ bool try_get_available_bytes_estimate(size_t& bytes) const;
+ // Estimate time until next GC, based on remaining bytes available for
+ // allocation and the allocation rate.
+ double predict_time_to_next_gc_ms(size_t available_bytes) const;
+
+ // Adjust wait times to make them less frequent the longer the next GC is away.
+ // But don't increase the wait time too rapidly, further bound it by min_time_ms.
+ // This reduces the number of thread wakeups that just immediately
+ // go back to waiting, while still being responsive to behavior changes.
+ uint64_t adjust_wait_time_ms(double wait_time_ms, uint64_t min_time_ms);
+
+private:
// Return an estimate of the number of bytes used in young gen.
// precondition: holding Heap_lock
size_t estimate_used_young_bytes_locked() const;
+public:
+
void transfer_survivors_to_cset(const G1SurvivorRegions* survivors);
- // Record and log stats and pending cards before not-full collection.
- // thread_buffer_cards is the number of cards that were in per-thread
- // buffers. pending_cards includes thread_buffer_cards.
- void record_concurrent_refinement_stats(size_t pending_cards,
- size_t thread_buffer_cards);
+ // Record and log stats and pending cards to update predictors.
+ void record_refinement_stats(G1ConcurrentRefineStats* stats);
+
+ void record_dirtying_stats(double last_mutator_start_dirty_ms,
+ double last_mutator_end_dirty_ms,
+ size_t pending_cards,
+ double yield_duration,
+ size_t next_pending_cards_from_gc,
+ size_t next_to_collection_set_cards);
bool should_retain_evac_failed_region(G1HeapRegion* r) const {
return should_retain_evac_failed_region(r->hrm_index());
diff --git a/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp b/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp
deleted file mode 100644
index 45e262c440a..00000000000
--- a/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "gc/g1/g1RedirtyCardsQueue.hpp"
-#include "gc/shared/bufferNode.hpp"
-#include "runtime/atomicAccess.hpp"
-#include "utilities/debug.hpp"
-#include "utilities/macros.hpp"
-
-// G1RedirtyCardsLocalQueueSet
-
-G1RedirtyCardsLocalQueueSet::G1RedirtyCardsLocalQueueSet(G1RedirtyCardsQueueSet* shared_qset) :
- PtrQueueSet(shared_qset->allocator()),
- _shared_qset(shared_qset),
- _buffers(),
- _queue(this)
-{}
-
-#ifdef ASSERT
-G1RedirtyCardsLocalQueueSet::~G1RedirtyCardsLocalQueueSet() {
- assert(_buffers._head == nullptr, "unflushed qset");
- assert(_buffers._tail == nullptr, "invariant");
- assert(_buffers._entry_count == 0, "invariant");
-}
-#endif // ASSERT
-
-void G1RedirtyCardsLocalQueueSet::enqueue_completed_buffer(BufferNode* node) {
- _buffers._entry_count += node->size();
- node->set_next(_buffers._head);
- _buffers._head = node;
- if (_buffers._tail == nullptr) {
- _buffers._tail = node;
- }
-}
-
-void G1RedirtyCardsLocalQueueSet::enqueue(void* value) {
- if (!try_enqueue(_queue, value)) {
- BufferNode* old_node = exchange_buffer_with_new(_queue);
- if (old_node != nullptr) {
- enqueue_completed_buffer(old_node);
- }
- retry_enqueue(_queue, value);
- }
-}
-
-BufferNodeList G1RedirtyCardsLocalQueueSet::flush() {
- flush_queue(_queue);
- BufferNodeList cur_buffers = _buffers;
- _shared_qset->add_bufferlist(_buffers);
- _buffers = BufferNodeList();
- return cur_buffers;
-}
-
-// G1RedirtyCardsLocalQueueSet::Queue
-
-G1RedirtyCardsLocalQueueSet::Queue::Queue(G1RedirtyCardsLocalQueueSet* qset) :
- PtrQueue(qset)
-{}
-
-#ifdef ASSERT
-G1RedirtyCardsLocalQueueSet::Queue::~Queue() {
- assert(buffer() == nullptr, "unflushed queue");
-}
-#endif // ASSERT
-
-// G1RedirtyCardsQueueSet
-
-G1RedirtyCardsQueueSet::G1RedirtyCardsQueueSet(BufferNode::Allocator* allocator) :
- PtrQueueSet(allocator),
- _list(),
- _entry_count(0),
- _tail(nullptr)
- DEBUG_ONLY(COMMA _collecting(true))
-{}
-
-G1RedirtyCardsQueueSet::~G1RedirtyCardsQueueSet() {
- verify_empty();
-}
-
-#ifdef ASSERT
-void G1RedirtyCardsQueueSet::verify_empty() const {
- assert(_list.empty(), "precondition");
- assert(_tail == nullptr, "invariant");
- assert(_entry_count == 0, "invariant");
-}
-#endif // ASSERT
-
-BufferNode* G1RedirtyCardsQueueSet::all_completed_buffers() const {
- DEBUG_ONLY(_collecting = false;)
- return _list.top();
-}
-
-BufferNodeList G1RedirtyCardsQueueSet::take_all_completed_buffers() {
- DEBUG_ONLY(_collecting = false;)
- BufferNodeList result(_list.pop_all(), _tail, _entry_count);
- _tail = nullptr;
- _entry_count = 0;
- DEBUG_ONLY(_collecting = true;)
- return result;
-}
-
-void G1RedirtyCardsQueueSet::update_tail(BufferNode* node) {
- // Node is the tail of a (possibly single element) list just prepended to
- // _list. If, after that prepend, node's follower is null, then node is
- // also the tail of _list, so record it as such.
- if (node->next() == nullptr) {
- assert(_tail == nullptr, "invariant");
- _tail = node;
- }
-}
-
-void G1RedirtyCardsQueueSet::enqueue_completed_buffer(BufferNode* node) {
- assert(_collecting, "precondition");
- AtomicAccess::add(&_entry_count, node->size());
- _list.push(*node);
- update_tail(node);
-}
-
-void G1RedirtyCardsQueueSet::add_bufferlist(const BufferNodeList& buffers) {
- assert(_collecting, "precondition");
- if (buffers._head != nullptr) {
- assert(buffers._tail != nullptr, "invariant");
- AtomicAccess::add(&_entry_count, buffers._entry_count);
- _list.prepend(*buffers._head, *buffers._tail);
- update_tail(buffers._tail);
- }
-}
diff --git a/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp b/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp
deleted file mode 100644
index add66f24cca..00000000000
--- a/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
-#define SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
-
-#include "gc/shared/bufferNode.hpp"
-#include "gc/shared/bufferNodeList.hpp"
-#include "gc/shared/ptrQueue.hpp"
-#include "memory/padded.hpp"
-#include "utilities/macros.hpp"
-
-class G1RedirtyCardsQueueSet;
-
-// A thread-local qset and queue. It provides an uncontended staging
-// area for completed buffers, to be flushed to the shared qset en masse.
-class G1RedirtyCardsLocalQueueSet : private PtrQueueSet {
- class Queue : public PtrQueue {
- public:
- Queue(G1RedirtyCardsLocalQueueSet* qset);
- ~Queue() NOT_DEBUG(= default);
- };
-
- G1RedirtyCardsQueueSet* _shared_qset;
- BufferNodeList _buffers;
- Queue _queue;
-
- // Add the buffer to the local list.
- virtual void enqueue_completed_buffer(BufferNode* node);
-
-public:
- G1RedirtyCardsLocalQueueSet(G1RedirtyCardsQueueSet* shared_qset);
- ~G1RedirtyCardsLocalQueueSet() NOT_DEBUG(= default);
-
- void enqueue(void* value);
-
- // Transfer all completed buffers to the shared qset.
- // Returns the flushed BufferNodeList which is later used
- // as a shortcut into the shared qset.
- BufferNodeList flush();
-};
-
-// Card table entries to be redirtied and the cards reprocessed later.
-// Has two phases, collecting and processing. During the collecting
-// phase buffers are added to the set. Once collecting is complete and
-// processing starts, buffers can no longer be added. Taking all the
-// collected (and processed) buffers reverts back to collecting, allowing
-// the set to be reused for another round of redirtying.
-class G1RedirtyCardsQueueSet : public PtrQueueSet {
- DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
- BufferNode::Stack _list;
- DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(size_t));
- volatile size_t _entry_count;
- DEFINE_PAD_MINUS_SIZE(3, DEFAULT_PADDING_SIZE, sizeof(BufferNode*));
- BufferNode* _tail;
- DEBUG_ONLY(mutable bool _collecting;)
-
- void update_tail(BufferNode* node);
-
-public:
- G1RedirtyCardsQueueSet(BufferNode::Allocator* allocator);
- ~G1RedirtyCardsQueueSet();
-
- void verify_empty() const NOT_DEBUG_RETURN;
-
- // Collect buffers. These functions are thread-safe.
- // precondition: Must not be concurrent with buffer processing.
- virtual void enqueue_completed_buffer(BufferNode* node);
- void add_bufferlist(const BufferNodeList& buffers);
-
- // Processing phase operations.
- // precondition: Must not be concurrent with buffer collection.
- BufferNode* all_completed_buffers() const;
- BufferNodeList take_all_completed_buffers();
-};
-
-#endif // SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp
index 2a09512730c..d2df416edc2 100644
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp
@@ -27,11 +27,12 @@
#include "gc/g1/g1BlockOffsetTable.inline.hpp"
#include "gc/g1/g1CardSet.inline.hpp"
#include "gc/g1/g1CardTable.inline.hpp"
+#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CardTableEntryClosure.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectionSet.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
#include "gc/g1/g1FromCardCache.hpp"
#include "gc/g1/g1GCParPhaseTimesTracker.hpp"
#include "gc/g1/g1GCPhaseTimes.hpp"
@@ -42,8 +43,6 @@
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/g1RemSet.hpp"
#include "gc/g1/g1RootClosures.hpp"
-#include "gc/shared/bufferNode.hpp"
-#include "gc/shared/bufferNodeList.hpp"
#include "gc/shared/gc_globals.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "jfr/jfrEvents.hpp"
@@ -63,7 +62,7 @@
// Collects information about the overall heap root scan progress during an evacuation.
//
// Scanning the remembered sets works by first merging all sources of cards to be
-// scanned (log buffers, remembered sets) into a single data structure to remove
+// scanned (refinement table, remembered sets) into a single data structure to remove
// duplicates and simplify work distribution.
//
// During the following card scanning we not only scan this combined set of cards, but
@@ -89,37 +88,13 @@
class G1RemSetScanState : public CHeapObj {
class G1DirtyRegions;
- size_t _max_reserved_regions;
-
- // Card table iteration claim for each heap region, from 0 (completely unscanned)
- // to (>=) G1HeapRegion::CardsPerRegion (completely scanned).
- uint volatile* _card_table_scan_state;
-
- uint _scan_chunks_per_region; // Number of chunks per region.
- uint8_t _log_scan_chunks_per_region; // Log of number of chunks per region.
- bool* _region_scan_chunks;
- size_t _num_total_scan_chunks; // Total number of elements in _region_scan_chunks.
- uint8_t _scan_chunks_shift; // For conversion between card index and chunk index.
-public:
- uint scan_chunk_size_in_cards() const { return (uint)1 << _scan_chunks_shift; }
-
- // Returns whether the chunk corresponding to the given region/card in region contain a
- // dirty card, i.e. actually needs scanning.
- bool chunk_needs_scan(uint const region_idx, uint const card_in_region) const {
- size_t const idx = ((size_t)region_idx << _log_scan_chunks_per_region) + (card_in_region >> _scan_chunks_shift);
- assert(idx < _num_total_scan_chunks, "Index %zu out of bounds %zu",
- idx, _num_total_scan_chunks);
- return _region_scan_chunks[idx];
- }
-
-private:
+ G1CardTableClaimTable _card_claim_table;
// The complete set of regions which card table needs to be cleared at the end
- // of GC because we scribbled over these card tables.
+ // of GC because we scribbled over these card table entries.
//
// Regions may be added for two reasons:
- // - they were part of the collection set: they may contain g1_young_card_val
- // or regular card marks that we never scan so we must always clear their card
- // table
+ // - they were part of the collection set: they may contain regular card marks
+ // that we never scan so we must always clear their card table.
// - or in case g1 does an optional evacuation pass, g1 marks the cards in there
// as g1_scanned_card_val. If G1 only did an initial evacuation pass, the
// scanning already cleared these cards. In that case they are not in this set
@@ -129,7 +104,7 @@ private:
// in the current evacuation pass.
G1DirtyRegions* _next_dirty_regions;
- // Set of (unique) regions that can be added to concurrently.
+// Set of (unique) regions that can be added to concurrently.
class G1DirtyRegions : public CHeapObj {
uint* _buffer;
uint _cur_idx;
@@ -147,8 +122,6 @@ private:
reset();
}
- static size_t chunk_size() { return M; }
-
~G1DirtyRegions() {
FREE_C_HEAP_ARRAY(uint, _buffer);
FREE_C_HEAP_ARRAY(bool, _contains);
@@ -197,7 +170,7 @@ private:
// entries from free regions.
HeapWord** _scan_top;
- class G1ClearCardTableTask : public G1AbstractSubTask {
+class G1ClearCardTableTask : public G1AbstractSubTask {
G1CollectedHeap* _g1h;
G1DirtyRegions* _regions;
uint volatile _cur_dirty_regions;
@@ -229,9 +202,9 @@ private:
virtual ~G1ClearCardTableTask() {
_scan_state->cleanup();
-#ifndef PRODUCT
- G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup();
-#endif
+ if (VerifyDuringGC) {
+ G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup();
+ }
}
void do_work(uint worker_id) override {
@@ -243,7 +216,15 @@ private:
for (uint i = next; i < max; i++) {
G1HeapRegion* r = _g1h->region_at(_regions->at(i));
- r->clear_cardtable();
+ // The card table contains "dirty" card marks. Clear unconditionally.
+ //
+ // Humongous reclaim candidates are not in the dirty set. This is fine because
+ // their card and refinement table should always be clear as they are typeArrays.
+ r->clear_card_table();
+ // There is no need to clear the refinement table here: at the start of the collection
+ // we had to clear the refinement card table for collection set regions already, and any
+ // old regions use it for old->collection set candidates, so they should not be cleared
+ // either.
}
}
}
@@ -251,56 +232,41 @@ private:
public:
G1RemSetScanState() :
- _max_reserved_regions(0),
- _card_table_scan_state(nullptr),
- _scan_chunks_per_region(G1CollectedHeap::get_chunks_per_region()),
- _log_scan_chunks_per_region(log2i(_scan_chunks_per_region)),
- _region_scan_chunks(nullptr),
- _num_total_scan_chunks(0),
- _scan_chunks_shift(0),
+ _card_claim_table(G1CollectedHeap::get_chunks_per_region_for_scan()),
_all_dirty_regions(nullptr),
_next_dirty_regions(nullptr),
- _scan_top(nullptr) {
- }
+ _scan_top(nullptr) { }
~G1RemSetScanState() {
- FREE_C_HEAP_ARRAY(uint, _card_table_scan_state);
- FREE_C_HEAP_ARRAY(bool, _region_scan_chunks);
FREE_C_HEAP_ARRAY(HeapWord*, _scan_top);
}
- void initialize(size_t max_reserved_regions) {
- assert(_card_table_scan_state == nullptr, "Must not be initialized twice");
- _max_reserved_regions = max_reserved_regions;
- _card_table_scan_state = NEW_C_HEAP_ARRAY(uint, max_reserved_regions, mtGC);
- _num_total_scan_chunks = max_reserved_regions * _scan_chunks_per_region;
- _region_scan_chunks = NEW_C_HEAP_ARRAY(bool, _num_total_scan_chunks, mtGC);
-
- _scan_chunks_shift = (uint8_t)log2i(G1HeapRegion::CardsPerRegion / _scan_chunks_per_region);
+ void initialize(uint max_reserved_regions) {
+ _card_claim_table.initialize(max_reserved_regions);
_scan_top = NEW_C_HEAP_ARRAY(HeapWord*, max_reserved_regions, mtGC);
}
+ // Reset the claim and clear scan top for all regions, including
+ // regions currently not available or free. Since regions might
+ // become used during the collection these values must be valid
+ // for those regions as well.
void prepare() {
- // Reset the claim and clear scan top for all regions, including
- // regions currently not available or free. Since regions might
- // become used during the collection these values must be valid
- // for those regions as well.
- for (size_t i = 0; i < _max_reserved_regions; i++) {
+ size_t max_reserved_regions = _card_claim_table.max_reserved_regions();
+
+ for (size_t i = 0; i < max_reserved_regions; i++) {
clear_scan_top((uint)i);
}
- _all_dirty_regions = new G1DirtyRegions(_max_reserved_regions);
- _next_dirty_regions = new G1DirtyRegions(_max_reserved_regions);
+ _all_dirty_regions = new G1DirtyRegions(max_reserved_regions);
+ _next_dirty_regions = new G1DirtyRegions(max_reserved_regions);
}
void prepare_for_merge_heap_roots() {
- assert(_next_dirty_regions->size() == 0, "next dirty regions must be empty");
+ // We populate the next dirty regions at the start of GC with all old/humongous
+ // regions.
+ //assert(_next_dirty_regions->size() == 0, "next dirty regions must be empty");
- for (size_t i = 0; i < _max_reserved_regions; i++) {
- _card_table_scan_state[i] = 0;
- }
-
- ::memset(_region_scan_chunks, false, _num_total_scan_chunks * sizeof(*_region_scan_chunks));
+ _card_claim_table.reset_all_to_unclaimed();
}
void complete_evac_phase(bool merge_dirty_regions) {
@@ -321,38 +287,10 @@ public:
return (hr != nullptr && !hr->in_collection_set() && hr->is_old_or_humongous());
}
- size_t num_visited_cards() const {
- size_t result = 0;
- for (uint i = 0; i < _num_total_scan_chunks; i++) {
- if (_region_scan_chunks[i]) {
- result++;
- }
- }
- return result * (G1HeapRegion::CardsPerRegion / _scan_chunks_per_region);
- }
-
size_t num_cards_in_dirty_regions() const {
return _next_dirty_regions->size() * G1HeapRegion::CardsPerRegion;
}
- void set_chunk_range_dirty(size_t const region_card_idx, size_t const card_length) {
- size_t chunk_idx = region_card_idx >> _scan_chunks_shift;
- // Make sure that all chunks that contain the range are marked. Calculate the
- // chunk of the last card that is actually marked.
- size_t const end_chunk = (region_card_idx + card_length - 1) >> _scan_chunks_shift;
- for (; chunk_idx <= end_chunk; chunk_idx++) {
- _region_scan_chunks[chunk_idx] = true;
- }
- }
-
- void set_chunk_dirty(size_t const card_idx) {
- assert((card_idx >> _scan_chunks_shift) < _num_total_scan_chunks,
- "Trying to access index %zu out of bounds %zu",
- card_idx >> _scan_chunks_shift, _num_total_scan_chunks);
- size_t const chunk_idx = card_idx >> _scan_chunks_shift;
- _region_scan_chunks[chunk_idx] = true;
- }
-
G1AbstractSubTask* create_cleanup_after_scan_heap_roots_task() {
return new G1ClearCardTableTask(G1CollectedHeap::heap(), _all_dirty_regions, this);
}
@@ -391,22 +329,16 @@ public:
}
bool has_cards_to_scan(uint region) {
- assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
- return _card_table_scan_state[region] < G1HeapRegion::CardsPerRegion;
- }
-
- uint claim_cards_to_scan(uint region, uint increment) {
- assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
- return AtomicAccess::fetch_then_add(&_card_table_scan_state[region], increment, memory_order_relaxed);
+ return _card_claim_table.has_unclaimed_cards(region);
}
void add_dirty_region(uint const region) {
-#ifdef ASSERT
+ #ifdef ASSERT
G1HeapRegion* hr = G1CollectedHeap::heap()->region_at(region);
assert(!hr->in_collection_set() && hr->is_old_or_humongous(),
"Region %u is not suitable for scanning, is %sin collection set or %s",
hr->hrm_index(), hr->in_collection_set() ? "" : "not ", hr->get_short_type_str());
-#endif
+ #endif
_next_dirty_regions->add_dirty_region(region);
}
@@ -431,14 +363,16 @@ public:
void clear_scan_top(uint region_idx) {
set_scan_top(region_idx, nullptr);
}
+
+ G1CardTableChunkClaimer claimer(uint region_idx) {
+ return G1CardTableChunkClaimer(&_card_claim_table, region_idx);
+ }
};
-G1RemSet::G1RemSet(G1CollectedHeap* g1h,
- G1CardTable* ct) :
+G1RemSet::G1RemSet(G1CollectedHeap* g1h) :
_scan_state(new G1RemSetScanState()),
_prev_period_summary(false),
_g1h(g1h),
- _ct(ct),
_g1p(_g1h->policy()) {
}
@@ -450,36 +384,6 @@ void G1RemSet::initialize(uint max_reserved_regions) {
_scan_state->initialize(max_reserved_regions);
}
-// Helper class to claim dirty chunks within the card table.
-class G1CardTableChunkClaimer {
- G1RemSetScanState* _scan_state;
- uint _region_idx;
- uint _cur_claim;
-
-public:
- G1CardTableChunkClaimer(G1RemSetScanState* scan_state, uint region_idx) :
- _scan_state(scan_state),
- _region_idx(region_idx),
- _cur_claim(0) {
- guarantee(size() <= G1HeapRegion::CardsPerRegion, "Should not claim more space than possible.");
- }
-
- bool has_next() {
- while (true) {
- _cur_claim = _scan_state->claim_cards_to_scan(_region_idx, size());
- if (_cur_claim >= G1HeapRegion::CardsPerRegion) {
- return false;
- }
- if (_scan_state->chunk_needs_scan(_region_idx, _cur_claim)) {
- return true;
- }
- }
- }
-
- uint value() const { return _cur_claim; }
- uint size() const { return _scan_state->scan_chunk_size_in_cards(); }
-};
-
// Scans a heap region for dirty cards.
class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
using CardValue = CardTable::CardValue;
@@ -495,6 +399,8 @@ class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
uint _worker_id;
+ size_t _cards_pending;
+ size_t _cards_empty;
size_t _cards_scanned;
size_t _blocks_scanned;
size_t _chunks_claimed;
@@ -508,9 +414,9 @@ class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
HeapWord* _scanned_to;
CardValue _scanned_card_value;
- HeapWord* scan_memregion(uint region_idx_for_card, MemRegion mr) {
+ HeapWord* scan_memregion(uint region_idx_for_card, MemRegion mr, size_t &roots_found) {
G1HeapRegion* const card_region = _g1h->region_at(region_idx_for_card);
- G1ScanCardClosure card_cl(_g1h, _pss, _heap_roots_found);
+ G1ScanCardClosure card_cl(_g1h, _pss, roots_found);
HeapWord* const scanned_to = card_region->oops_on_memregion_seq_iterate_careful(mr, &card_cl);
assert(scanned_to != nullptr, "Should be able to scan range");
@@ -520,8 +426,8 @@ class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
return scanned_to;
}
- void do_claimed_block(uint const region_idx, CardValue* const dirty_l, CardValue* const dirty_r) {
- _ct->change_dirty_cards_to(dirty_l, dirty_r, _scanned_card_value);
+ void do_claimed_block(uint const region_idx, CardValue* const dirty_l, CardValue* const dirty_r, size_t& pending_cards) {
+ pending_cards += _ct->change_dirty_cards_to(dirty_l, dirty_r, _scanned_card_value);
size_t num_cards = pointer_delta(dirty_r, dirty_l, sizeof(CardValue));
_blocks_scanned++;
@@ -536,115 +442,22 @@ class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
return;
}
MemRegion mr(MAX2(card_start, _scanned_to), scan_end);
- _scanned_to = scan_memregion(region_idx, mr);
+ size_t roots_found = 0;
+ _scanned_to = scan_memregion(region_idx, mr, roots_found);
+ if (roots_found == 0) {
+ _cards_empty += num_cards;
+ }
_cards_scanned += num_cards;
+ _heap_roots_found += roots_found;
}
- // To locate consecutive dirty cards inside a chunk.
- class ChunkScanner {
- using Word = size_t;
-
- CardValue* const _start_card;
- CardValue* const _end_card;
-
- static const size_t ExpandedToScanMask = G1CardTable::WordAlreadyScanned;
- static const size_t ToScanMask = G1CardTable::g1_card_already_scanned;
-
- static bool is_card_dirty(const CardValue* const card) {
- return (*card & ToScanMask) == 0;
- }
-
- static bool is_word_aligned(const void* const addr) {
- return ((uintptr_t)addr) % sizeof(Word) == 0;
- }
-
- CardValue* find_first_dirty_card(CardValue* i_card) const {
- while (!is_word_aligned(i_card)) {
- if (is_card_dirty(i_card)) {
- return i_card;
- }
- i_card++;
- }
-
- for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
- Word word_value = *reinterpret_cast(i_card);
- bool has_dirty_cards_in_word = (~word_value & ExpandedToScanMask) != 0;
-
- if (has_dirty_cards_in_word) {
- for (uint i = 0; i < sizeof(Word); ++i) {
- if (is_card_dirty(i_card)) {
- return i_card;
- }
- i_card++;
- }
- assert(false, "should have early-returned");
- }
- }
-
- return _end_card;
- }
-
- CardValue* find_first_non_dirty_card(CardValue* i_card) const {
- while (!is_word_aligned(i_card)) {
- if (!is_card_dirty(i_card)) {
- return i_card;
- }
- i_card++;
- }
-
- for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
- Word word_value = *reinterpret_cast(i_card);
- bool all_cards_dirty = (word_value == G1CardTable::WordAllDirty);
-
- if (!all_cards_dirty) {
- for (uint i = 0; i < sizeof(Word); ++i) {
- if (!is_card_dirty(i_card)) {
- return i_card;
- }
- i_card++;
- }
- assert(false, "should have early-returned");
- }
- }
-
- return _end_card;
- }
-
- public:
- ChunkScanner(CardValue* const start_card, CardValue* const end_card) :
- _start_card(start_card),
- _end_card(end_card) {
- assert(is_word_aligned(start_card), "precondition");
- assert(is_word_aligned(end_card), "precondition");
- }
-
- template
- void on_dirty_cards(Func&& f) {
- for (CardValue* cur_card = _start_card; cur_card < _end_card; /* empty */) {
- CardValue* dirty_l = find_first_dirty_card(cur_card);
- CardValue* dirty_r = find_first_non_dirty_card(dirty_l);
-
- assert(dirty_l <= dirty_r, "inv");
-
- if (dirty_l == dirty_r) {
- assert(dirty_r == _end_card, "finished the entire chunk");
- return;
- }
-
- f(dirty_l, dirty_r);
-
- cur_card = dirty_r + 1;
- }
- }
- };
-
void scan_heap_roots(G1HeapRegion* r) {
uint const region_idx = r->hrm_index();
ResourceMark rm;
- G1CardTableChunkClaimer claim(_scan_state, region_idx);
+ G1CardTableChunkClaimer claim = _scan_state->claimer(region_idx);
// Set the current scan "finger" to null for every heap region to scan. Since
// the claim value is monotonically increasing, the check to not scan below this
@@ -652,6 +465,8 @@ class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
// to resetting this value for every claim.
_scanned_to = nullptr;
+ size_t pending_cards = 0;
+
while (claim.has_next()) {
_chunks_claimed++;
@@ -660,11 +475,12 @@ class G1ScanHRForRegionClosure : public G1HeapRegionClosure {
CardValue* const start_card = _ct->byte_for_index(region_card_base_idx);
CardValue* const end_card = start_card + claim.size();
- ChunkScanner chunk_scanner{start_card, end_card};
+ G1ChunkScanner chunk_scanner{start_card, end_card};
chunk_scanner.on_dirty_cards([&] (CardValue* dirty_l, CardValue* dirty_r) {
- do_claimed_block(region_idx, dirty_l, dirty_r);
+ do_claimed_block(region_idx, dirty_l, dirty_r, pending_cards);
});
}
+ _cards_pending += pending_cards;
}
public:
@@ -679,6 +495,8 @@ public:
_scan_state(scan_state),
_phase(phase),
_worker_id(worker_id),
+ _cards_pending(0),
+ _cards_empty(0),
_cards_scanned(0),
_blocks_scanned(0),
_chunks_claimed(0),
@@ -706,6 +524,8 @@ public:
Tickspan rem_set_root_scan_time() const { return _rem_set_root_scan_time; }
Tickspan rem_set_trim_partially_time() const { return _rem_set_trim_partially_time; }
+ size_t cards_pending() const { return _cards_pending; }
+ size_t cards_scanned_empty() const { return _cards_empty; }
size_t cards_scanned() const { return _cards_scanned; }
size_t blocks_scanned() const { return _blocks_scanned; }
size_t chunks_claimed() const { return _chunks_claimed; }
@@ -728,6 +548,9 @@ void G1RemSet::scan_heap_roots(G1ParScanThreadState* pss,
p->record_or_add_time_secs(objcopy_phase, worker_id, cl.rem_set_trim_partially_time().seconds());
p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_root_scan_time().seconds());
+
+ p->record_or_add_thread_work_item(scan_phase, worker_id, cl.cards_pending(), G1GCPhaseTimes::ScanHRPendingCards);
+ p->record_or_add_thread_work_item(scan_phase, worker_id, cl.cards_scanned_empty(), G1GCPhaseTimes::ScanHRScannedEmptyCards);
p->record_or_add_thread_work_item(scan_phase, worker_id, cl.cards_scanned(), G1GCPhaseTimes::ScanHRScannedCards);
p->record_or_add_thread_work_item(scan_phase, worker_id, cl.blocks_scanned(), G1GCPhaseTimes::ScanHRScannedBlocks);
p->record_or_add_thread_work_item(scan_phase, worker_id, cl.chunks_claimed(), G1GCPhaseTimes::ScanHRClaimedChunks);
@@ -901,6 +724,7 @@ void G1RemSet::prepare_region_for_scan(G1HeapRegion* r) {
assert_scan_top_is_null(hrm_index);
} else if (r->is_old_or_humongous()) {
_scan_state->set_scan_top(hrm_index, r->top());
+ _scan_state->add_dirty_region(hrm_index);
} else {
assert_scan_top_is_null(hrm_index);
assert(r->is_free(),
@@ -956,6 +780,90 @@ public:
}
};
+// Task to merge a non-dirty refinement table into the (primary) card table.
+class MergeRefinementTableTask : public WorkerTask {
+
+ G1CardTableClaimTable* _scan_state;
+ uint _max_workers;
+
+ class G1MergeRefinementTableRegionClosure : public G1HeapRegionClosure {
+ G1CardTableClaimTable* _scan_state;
+
+ bool do_heap_region(G1HeapRegion* r) override {
+ if (!_scan_state->has_unclaimed_cards(r->hrm_index())) {
+ return false;
+ }
+
+ // We can blindly clear all collection set region's refinement tables: these
+ // regions will be evacuated and need their refinement table reset in case
+ // of evacuation failure.
+ // Young regions contain random marks, which are obvious to just clear. The
+ // card marks of other collection set region's refinement tables are also
+ // uninteresting.
+ if (r->in_collection_set()) {
+ uint claim = _scan_state->claim_all_cards(r->hrm_index());
+ // Concurrent refinement may have started merging this region (we also
+ // get here for non-young regions), the claim may be non-zero for those.
+ // We could get away here with just clearing the area from the current
+ // claim to the last card in the region, but for now just do it all.
+ if (claim < G1HeapRegion::CardsPerRegion) {
+ r->clear_refinement_table();
+ }
+ return false;
+ }
+
+ assert(r->is_old_or_humongous(), "must be");
+
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ G1CardTable* card_table = g1h->card_table();
+ G1CardTable* refinement_table = g1h->refinement_table();
+
+ size_t const region_card_base_idx = (size_t)r->hrm_index() << G1HeapRegion::LogCardsPerRegion;
+
+ G1CardTableChunkClaimer claim(_scan_state, r->hrm_index());
+
+ while (claim.has_next()) {
+ size_t const start_idx = region_card_base_idx + claim.value();
+
+ size_t* card_cur_word = (size_t*)card_table->byte_for_index(start_idx);
+
+ size_t* refinement_cur_word = (size_t*)refinement_table->byte_for_index(start_idx);
+ size_t* const refinement_end_word = refinement_cur_word + claim.size() / (sizeof(size_t) / sizeof(G1CardTable::CardValue));
+
+ for (; refinement_cur_word < refinement_end_word; ++refinement_cur_word, ++card_cur_word) {
+ size_t value = *refinement_cur_word;
+ *refinement_cur_word = G1CardTable::WordAllClean;
+ // Dirty is "0", so we need to logically-and here. This is also safe
+ // for all other possible values in the card table; at this point this
+ // can be either g1_dirty_card or g1_to_cset_card which will both be
+ // scanned.
+ size_t new_value = *card_cur_word & value;
+ *card_cur_word = new_value;
+ }
+ }
+
+ return false;
+ }
+
+ public:
+ G1MergeRefinementTableRegionClosure(G1CardTableClaimTable* scan_state) : G1HeapRegionClosure(), _scan_state(scan_state) {
+ }
+ };
+
+public:
+ MergeRefinementTableTask(G1CardTableClaimTable* scan_state, uint max_workers) :
+ WorkerTask("Merge Refinement Table"), _scan_state(scan_state), _max_workers(max_workers) { guarantee(_scan_state != nullptr, "must be"); }
+
+ void work(uint worker_id) override {
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+ G1GCParPhaseTimesTracker x(g1h->phase_times(), G1GCPhaseTimes::SweepRT, worker_id, false /* allow multiple invocation */);
+
+ G1MergeRefinementTableRegionClosure cl(_scan_state);
+ _scan_state->heap_region_iterate_from_worker_offset(&cl, worker_id, _max_workers);
+ }
+};
+
class G1MergeHeapRootsTask : public WorkerTask {
class G1MergeCardSetStats {
@@ -973,12 +881,16 @@ class G1MergeHeapRootsTask : public WorkerTask {
_merged[tag]++;
}
- void inc_remset_cards(size_t increment = 1) {
- _merged[G1GCPhaseTimes::MergeRSCards] += increment;
+ void inc_merged_cards(size_t increment = 1) {
+ _merged[G1GCPhaseTimes::MergeRSFromRemSetCards] += increment;
+ }
+
+ void inc_total_cards(size_t increment = 1) {
+ _merged[G1GCPhaseTimes::MergeRSTotalCards] += increment;
}
void dec_remset_cards(size_t decrement) {
- _merged[G1GCPhaseTimes::MergeRSCards] -= decrement;
+ _merged[G1GCPhaseTimes::MergeRSTotalCards] -= decrement;
}
size_t merged(uint i) const { return _merged[i]; }
@@ -1031,10 +943,10 @@ class G1MergeHeapRootsTask : public WorkerTask {
}
void mark_card(G1CardTable::CardValue* value) {
- if (_ct->mark_clean_as_dirty(value)) {
- _scan_state->set_chunk_dirty(_ct->index_for_cardvalue(value));
+ if (_ct->mark_clean_as_from_remset(value)) {
+ _stats.inc_merged_cards();
}
- _stats.inc_remset_cards();
+ _stats.inc_total_cards();
}
public:
@@ -1054,7 +966,7 @@ class G1MergeHeapRootsTask : public WorkerTask {
// Returns whether the given region actually needs iteration.
bool start_iterate(uint const tag, uint const region_idx) {
- assert(tag < G1GCPhaseTimes::MergeRSCards, "invalid tag %u", tag);
+ assert(tag < G1GCPhaseTimes::MergeRSFromRemSetCards, "invalid tag %u", tag);
if (remember_if_interesting(region_idx)) {
_region_base_idx = (size_t)region_idx << G1HeapRegion::LogCardsPerRegion;
_stats.inc_card_set_merged(tag);
@@ -1064,9 +976,9 @@ class G1MergeHeapRootsTask : public WorkerTask {
}
void do_card_range(uint const start_card_idx, uint const length) {
- _ct->mark_range_dirty(_region_base_idx + start_card_idx, length);
- _stats.inc_remset_cards(length);
- _scan_state->set_chunk_range_dirty(_region_base_idx + start_card_idx, length);
+ size_t cards_changed = _ct->mark_clean_range_as_from_remset(_region_base_idx + start_card_idx, length);
+ _stats.inc_merged_cards(cards_changed);
+ _stats.inc_total_cards(length);
}
G1MergeCardSetStats stats() {
@@ -1086,12 +998,19 @@ class G1MergeHeapRootsTask : public WorkerTask {
class G1ClearBitmapClosure : public G1HeapRegionClosure {
G1CollectedHeap* _g1h;
G1RemSetScanState* _scan_state;
+ bool _initial_evacuation;
void assert_bitmap_clear(G1HeapRegion* hr, const G1CMBitMap* bitmap) {
assert(bitmap->get_next_marked_addr(hr->bottom(), hr->end()) == hr->end(),
"Bitmap should have no mark for region %u (%s)", hr->hrm_index(), hr->get_short_type_str());
}
+ void assert_refinement_table_clear(G1HeapRegion* hr) {
+#ifdef ASSERT
+ _g1h->refinement_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
+#endif
+ }
+
bool should_clear_region(G1HeapRegion* hr) const {
// The bitmap for young regions must obviously be clear as we never mark through them;
// old regions that are currently being marked through are only in the collection set
@@ -1110,14 +1029,31 @@ class G1MergeHeapRootsTask : public WorkerTask {
}
public:
- G1ClearBitmapClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state) :
+ G1ClearBitmapClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state, bool initial_evacuation) :
_g1h(g1h),
- _scan_state(scan_state)
+ _scan_state(scan_state),
+ _initial_evacuation(initial_evacuation)
{ }
bool do_heap_region(G1HeapRegion* hr) {
assert(_g1h->is_in_cset(hr), "Should only be used iterating the collection set");
+ // Collection set regions after the initial evacuation need their refinement
+ // table cleared because
+ // * we use the refinement table for recording references to other regions
+ // during evacuation failure handling
+ // * during previous passes we used the refinement table to contain marks for
+ // cross-region references. Now that we evacuate the region, they need to be
+ // cleared.
+ //
+ // We do not need to do this extra work for initial evacuation because we
+ // make sure the refinement table is clean for all regions either in
+ // concurrent refinement or in the merge refinement table phase earlier.
+ if (!_initial_evacuation) {
+ hr->clear_refinement_table();
+ } else {
+ assert_refinement_table_clear(hr);
+ }
// Evacuation failure uses the bitmap to record evacuation failed objects,
// so the bitmap for the regions in the collection set must be cleared if not already.
if (should_clear_region(hr)) {
@@ -1177,145 +1113,23 @@ class G1MergeHeapRootsTask : public WorkerTask {
}
};
- // Visitor for the log buffer entries to merge them into the card table.
- class G1MergeLogBufferCardsClosure : public G1CardTableEntryClosure {
-
- G1RemSetScanState* _scan_state;
- G1CardTable* _ct;
-
- size_t _cards_dirty;
- size_t _cards_skipped;
-
- void process_card(CardValue* card_ptr) {
- if (*card_ptr == G1CardTable::dirty_card_val()) {
- uint const region_idx = _ct->region_idx_for(card_ptr);
- _scan_state->add_dirty_region(region_idx);
- _scan_state->set_chunk_dirty(_ct->index_for_cardvalue(card_ptr));
- _cards_dirty++;
- }
- }
-
- public:
- G1MergeLogBufferCardsClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state) :
- _scan_state(scan_state),
- _ct(g1h->card_table()),
- _cards_dirty(0),
- _cards_skipped(0)
- {}
-
- void do_card_ptr(CardValue* card_ptr) override {
- // The only time we care about recording cards that
- // contain references that point into the collection set
- // is during RSet updating within an evacuation pause.
- assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
-
- uint const region_idx = _ct->region_idx_for(card_ptr);
-
- // The second clause must come after - the log buffers might contain cards to uncommitted
- // regions.
- // This code may count duplicate entries in the log buffers (even if rare) multiple
- // times.
- if (_scan_state->contains_cards_to_process(region_idx)) {
- process_card(card_ptr);
- } else {
- // We may have had dirty cards in the (initial) collection set (or the
- // young regions which are always in the initial collection set). We do
- // not fix their cards here: we already added these regions to the set of
- // regions to clear the card table at the end during the prepare() phase.
- _cards_skipped++;
- }
- }
-
- size_t cards_dirty() const { return _cards_dirty; }
- size_t cards_skipped() const { return _cards_skipped; }
- };
-
uint _num_workers;
G1HeapRegionClaimer _hr_claimer;
G1RemSetScanState* _scan_state;
- // To mitigate contention due multiple threads accessing and popping BufferNodes from a shared
- // G1DirtyCardQueueSet, we implement a sequential distribution phase. Here, BufferNodes are
- // distributed to worker threads in a sequential manner utilizing the _dirty_card_buffers. By doing
- // so, we effectively alleviate the bottleneck encountered during pop operations on the
- // G1DirtyCardQueueSet. Importantly, this approach preserves the helping aspect among worker
- // threads, allowing them to assist one another in case of imbalances in work distribution.
- BufferNode::Stack* _dirty_card_buffers;
-
bool _initial_evacuation;
volatile bool _fast_reclaim_handled;
- void apply_closure_to_dirty_card_buffers(G1MergeLogBufferCardsClosure* cl, uint worker_id) {
- G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
- for (uint i = 0; i < _num_workers; i++) {
- uint index = (worker_id + i) % _num_workers;
- while (BufferNode* node = _dirty_card_buffers[index].pop()) {
- cl->apply_to_buffer(node, worker_id);
- dcqs.deallocate_buffer(node);
- }
- }
- }
-
public:
G1MergeHeapRootsTask(G1RemSetScanState* scan_state, uint num_workers, bool initial_evacuation) :
WorkerTask("G1 Merge Heap Roots"),
_num_workers(num_workers),
_hr_claimer(num_workers),
_scan_state(scan_state),
- _dirty_card_buffers(nullptr),
_initial_evacuation(initial_evacuation),
_fast_reclaim_handled(false)
- {
- if (initial_evacuation) {
- Ticks start = Ticks::now();
-
- _dirty_card_buffers = NEW_C_HEAP_ARRAY(BufferNode::Stack, num_workers, mtGC);
- for (uint i = 0; i < num_workers; i++) {
- new (&_dirty_card_buffers[i]) BufferNode::Stack();
- }
-
- G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
- BufferNodeList buffers = dcqs.take_all_completed_buffers();
-
- size_t entries_per_thread = ceil(buffers._entry_count / (double)num_workers);
-
- BufferNode* head = buffers._head;
- BufferNode* tail = head;
-
- uint worker = 0;
- while (tail != nullptr) {
- size_t count = tail->size();
- BufferNode* cur = tail->next();
-
- while (count < entries_per_thread && cur != nullptr) {
- tail = cur;
- count += tail->size();
- cur = tail->next();
- }
-
- tail->set_next(nullptr);
- _dirty_card_buffers[worker++ % num_workers].prepend(*head, *tail);
-
- assert(cur != nullptr || tail == buffers._tail, "Must be");
- head = cur;
- tail = cur;
- }
-
- Tickspan total = Ticks::now() - start;
- G1CollectedHeap::heap()->phase_times()->record_distribute_log_buffers_time_ms(total.seconds() * 1000.0);
- }
- }
-
- ~G1MergeHeapRootsTask() {
- if (_dirty_card_buffers != nullptr) {
- using Stack = BufferNode::Stack;
- for (uint i = 0; i < _num_workers; i++) {
- _dirty_card_buffers[i].~Stack();
- }
- FREE_C_HEAP_ARRAY(Stack, _dirty_card_buffers);
- }
- }
+ { }
virtual void work(uint worker_id) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -1368,50 +1182,28 @@ public:
// Preparation for evacuation failure handling.
{
- G1ClearBitmapClosure clear(g1h, _scan_state);
+ G1ClearBitmapClosure clear(g1h, _scan_state, _initial_evacuation);
g1h->collection_set_iterate_increment_from(&clear, &_hr_claimer, worker_id);
}
-
- // Now apply the closure to all remaining log entries.
- if (_initial_evacuation) {
- assert(merge_remset_phase == G1GCPhaseTimes::MergeRS, "Wrong merge phase");
- G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeLB, worker_id);
-
- G1MergeLogBufferCardsClosure cl(g1h, _scan_state);
- apply_closure_to_dirty_card_buffers(&cl, worker_id);
-
- p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards);
- p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards);
- }
}
};
-void G1RemSet::print_merge_heap_roots_stats() {
- LogTarget(Debug, gc, remset) lt;
- if (lt.is_enabled()) {
- LogStream ls(lt);
+static void merge_refinement_table() {
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
- size_t num_visited_cards = _scan_state->num_visited_cards();
+ G1ConcurrentRefineSweepState& state = g1h->concurrent_refine()->sweep_state_for_merge();
+ WorkerThreads* workers = g1h->workers();
- size_t total_dirty_region_cards = _scan_state->num_cards_in_dirty_regions();
-
- G1CollectedHeap* g1h = G1CollectedHeap::heap();
- size_t total_old_region_cards =
- (g1h->num_committed_regions() - (g1h->num_free_regions() - g1h->collection_set()->cur_length())) * G1HeapRegion::CardsPerRegion;
-
- ls.print_cr("Visited cards %zu Total dirty %zu (%.2lf%%) Total old %zu (%.2lf%%)",
- num_visited_cards,
- total_dirty_region_cards,
- percent_of(num_visited_cards, total_dirty_region_cards),
- total_old_region_cards,
- percent_of(num_visited_cards, total_old_region_cards));
- }
+ MergeRefinementTableTask cl(state.sweep_table(), workers->active_workers());
+ log_debug(gc, ergo)("Running %s using %u workers", cl.name(), workers->active_workers());
+ workers->run_task(&cl);
}
void G1RemSet::merge_heap_roots(bool initial_evacuation) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1GCPhaseTimes* pt = g1h->phase_times();
+ // 1. Prepare the merging process
{
Ticks start = Ticks::now();
@@ -1425,28 +1217,42 @@ void G1RemSet::merge_heap_roots(bool initial_evacuation) {
}
}
- WorkerThreads* workers = g1h->workers();
- size_t const increment_length = g1h->collection_set()->regions_cur_length();
+ // 2. (Optionally) Merge the refinement table into the card table (if needed).
+ G1ConcurrentRefineSweepState& state = g1h->concurrent_refine()->sweep_state();
+ if (initial_evacuation && state.is_in_progress()) {
+ Ticks start = Ticks::now();
- uint const num_workers = initial_evacuation ? workers->active_workers() :
- MIN2(workers->active_workers(), (uint)increment_length);
+ merge_refinement_table();
+ g1h->phase_times()->record_merge_refinement_table_time((Ticks::now() - start).seconds() * MILLIUNITS);
+ }
+
+ // 3. Merge other heap roots.
Ticks start = Ticks::now();
{
+ WorkerThreads* workers = g1h->workers();
+
+ size_t const increment_length = g1h->collection_set()->groups_increment_length();
+
+ uint const num_workers = initial_evacuation ? workers->active_workers() :
+ MIN2(workers->active_workers(), (uint)increment_length);
+
G1MergeHeapRootsTask cl(_scan_state, num_workers, initial_evacuation);
log_debug(gc, ergo)("Running %s using %u workers for %zu regions",
cl.name(), num_workers, increment_length);
workers->run_task(&cl, num_workers);
}
- print_merge_heap_roots_stats();
-
if (initial_evacuation) {
pt->record_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0);
} else {
pt->record_or_add_optional_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0);
}
+
+ if (VerifyDuringGC && initial_evacuation) {
+ g1h->verifier()->verify_card_tables_clean(false /* both_card_tables */);
+ }
}
void G1RemSet::complete_evac_phase(bool has_more_than_one_evacuation_phase) {
@@ -1482,86 +1288,20 @@ inline void check_card_ptr(CardTable::CardValue* card_ptr, G1CardTable* ct) {
#endif
}
-bool G1RemSet::clean_card_before_refine(CardValue** const card_ptr_addr) {
- assert(!SafepointSynchronize::is_at_safepoint(), "Only call concurrently");
-
- CardValue* card_ptr = *card_ptr_addr;
- // Find the start address represented by the card.
- HeapWord* start = _ct->addr_for(card_ptr);
- // And find the region containing it.
- G1HeapRegion* r = _g1h->heap_region_containing_or_null(start);
-
- // If this is a (stale) card into an uncommitted region, exit.
- if (r == nullptr) {
- return false;
- }
-
- check_card_ptr(card_ptr, _ct);
-
- // If the card is no longer dirty, nothing to do.
- // We cannot load the card value before the "r == nullptr" check above, because G1
- // could uncommit parts of the card table covering uncommitted regions.
- if (*card_ptr != G1CardTable::dirty_card_val()) {
- return false;
- }
-
- // This check is needed for some uncommon cases where we should
- // ignore the card.
- //
- // The region could be young. Cards for young regions are
- // distinctly marked (set to g1_young_gen), so the post-barrier will
- // filter them out. However, that marking is performed
- // concurrently. A write to a young object could occur before the
- // card has been marked young, slipping past the filter.
- //
- // The card could be stale, because the region has been freed since
- // the card was recorded. In this case the region type could be
- // anything. If (still) free or (reallocated) young, just ignore
- // it. If (reallocated) old or humongous, the later card trimming
- // and additional checks in iteration may detect staleness. At
- // worst, we end up processing a stale card unnecessarily.
- //
- // In the normal (non-stale) case, the synchronization between the
- // enqueueing of the card and processing it here will have ensured
- // we see the up-to-date region type here.
- if (!r->is_old_or_humongous()) {
- return false;
- }
-
- // Trim the region designated by the card to what's been allocated
- // in the region. The card could be stale, or the card could cover
- // (part of) an object at the end of the allocated space and extend
- // beyond the end of allocation.
-
- // Non-humongous objects are either allocated in the old regions during GC.
- // So if region is old then top is stable.
- // Humongous object allocation sets top last; if top has not yet been set,
- // this is a stale card and we'll end up with an empty intersection.
- // If this is not a stale card, the synchronization between the
- // enqueuing of the card and processing it here will have ensured
- // we see the up-to-date top here.
- HeapWord* scan_limit = r->top();
-
- if (scan_limit <= start) {
- // If the trimmed region is empty, the card must be stale.
- return false;
- }
-
- // Okay to clean and process the card now. There are still some
- // stale card cases that may be detected by iteration and dealt with
- // as iteration failure.
- *const_cast(card_ptr) = G1CardTable::clean_card_val();
-
- return true;
-}
-
-void G1RemSet::refine_card_concurrently(CardValue* const card_ptr,
- const uint worker_id) {
+G1RemSet::RefineResult G1RemSet::refine_card_concurrently(CardValue* const card_ptr,
+ const uint worker_id) {
assert(!_g1h->is_stw_gc_active(), "Only call concurrently");
- check_card_ptr(card_ptr, _ct);
+ G1CardTable* ct = _g1h->refinement_table();
+ check_card_ptr(card_ptr, ct);
+
+ // That card is already known to contain a reference to the collection set. Skip
+ // further processing.
+ if (*card_ptr == G1CardTable::g1_to_cset_card) {
+ return AlreadyToCSet;
+ }
// Construct the MemRegion representing the card.
- HeapWord* start = _ct->addr_for(card_ptr);
+ HeapWord* start = ct->addr_for(card_ptr);
// And find the region containing it.
G1HeapRegion* r = _g1h->heap_region_containing(start);
// This reload of the top is safe even though it happens after the full
@@ -1571,7 +1311,7 @@ void G1RemSet::refine_card_concurrently(CardValue* const card_ptr,
// cannot span across safepoint, so we don't need to worry about top being
// changed during safepoint.
HeapWord* scan_limit = r->top();
- assert(scan_limit > start, "sanity");
+ assert(scan_limit > start, "sanity region %u (%s) scan_limit " PTR_FORMAT " start " PTR_FORMAT, r->hrm_index(), r->get_short_type_str(), p2i(scan_limit), p2i(start));
// Don't use addr_for(card_ptr + 1) which can ask for
// a card beyond the heap.
@@ -1581,43 +1321,21 @@ void G1RemSet::refine_card_concurrently(CardValue* const card_ptr,
G1ConcurrentRefineOopClosure conc_refine_cl(_g1h, worker_id);
if (r->oops_on_memregion_seq_iterate_careful(dirty_region, &conc_refine_cl) != nullptr) {
- return;
+ if (conc_refine_cl.has_ref_to_cset()) {
+ return HasRefToCSet;
+ } else if (conc_refine_cl.has_ref_to_old()) {
+ return HasRefToOld;
+ } else {
+ return NoCrossRegion;
+ }
}
-
// If unable to process the card then we encountered an unparsable
// part of the heap (e.g. a partially allocated object, so only
// temporarily a problem) while processing a stale card. Despite
// the card being stale, we can't simply ignore it, because we've
- // already marked the card cleaned, so taken responsibility for
+ // already marked the card as cleaned, so taken responsibility for
// ensuring the card gets scanned.
- //
- // However, the card might have gotten re-dirtied and re-enqueued
- // while we worked. (In fact, it's pretty likely.)
- if (*card_ptr == G1CardTable::dirty_card_val()) {
- return;
- }
-
- enqueue_for_reprocessing(card_ptr);
-}
-
-// Re-dirty and re-enqueue the card to retry refinement later.
-// This is used to deal with a rare race condition in concurrent refinement.
-void G1RemSet::enqueue_for_reprocessing(CardValue* card_ptr) {
- // We can't use the thread-local queue, because that might be the queue
- // that is being processed by us; we could be a Java thread conscripted to
- // perform refinement on our queue's current buffer. This situation only
- // arises from rare race condition, so it's not worth any significant
- // development effort or clever lock-free queue implementation. Instead
- // we use brute force, allocating and enqueuing an entire buffer for just
- // this card. Since buffers are processed in FIFO order and we try to
- // keep some in the queue, it is likely that the racing state will have
- // resolved by the time this card comes up for reprocessing.
- *card_ptr = G1CardTable::dirty_card_val();
- G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
- void** buffer = dcqs.allocate_buffer();
- size_t index = dcqs.buffer_capacity() - 1;
- buffer[index] = card_ptr;
- dcqs.enqueue_completed_buffer(BufferNode::make_node_from_buffer(buffer, index));
+ return CouldNotParse;
}
void G1RemSet::print_periodic_summary_info(const char* header, uint period_count, bool show_thread_times) {
diff --git a/src/hotspot/share/gc/g1/g1RemSet.hpp b/src/hotspot/share/gc/g1/g1RemSet.hpp
index 50cc029a9a1..8b2353cdbb3 100644
--- a/src/hotspot/share/gc/g1/g1RemSet.hpp
+++ b/src/hotspot/share/gc/g1/g1RemSet.hpp
@@ -26,6 +26,7 @@
#define SHARE_GC_G1_G1REMSET_HPP
#include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1CardTableClaimTable.hpp"
#include "gc/g1/g1GCPhaseTimes.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1OopClosures.hpp"
@@ -65,20 +66,15 @@ private:
G1CollectedHeap* _g1h;
- G1CardTable* _ct;
- G1Policy* _g1p;
-
- void print_merge_heap_roots_stats();
+ G1Policy* _g1p;
void assert_scan_top_is_null(uint hrm_index) NOT_DEBUG_RETURN;
- void enqueue_for_reprocessing(CardValue* card_ptr);
-
public:
// Initialize data that depends on the heap size being known.
void initialize(uint max_num_regions);
- G1RemSet(G1CollectedHeap* g1h, G1CardTable* ct);
+ G1RemSet(G1CollectedHeap* g1h);
~G1RemSet();
// Scan all cards in the non-collection set regions that potentially contain
@@ -101,7 +97,7 @@ public:
// Print coarsening stats.
void print_coarsen_stats();
- // Creates a task for cleaining up temporary data structures and the
+ // Creates a task for cleaning up temporary data structures and the
// card table, removing temporary duplicate detection information.
G1AbstractSubTask* create_cleanup_after_scan_heap_roots_task();
// Excludes the given region from heap root scanning.
@@ -122,16 +118,19 @@ public:
G1GCPhaseTimes::GCParPhases scan_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase);
- // Two methods for concurrent refinement support, executed concurrently to
- // the mutator:
- // Cleans the card at "*card_ptr_addr" before refinement, returns true iff the
- // card needs later refinement.
- bool clean_card_before_refine(CardValue** const card_ptr_addr);
+ enum RefineResult {
+ HasRefToCSet, // The (dirty) card has a reference to the collection set.
+ AlreadyToCSet, // The card is already one marked as having a reference to the collection set.
+ HasRefToOld, // The dirty card contains references to other old regions (not the collection set).
+ NoCrossRegion, // There is no interesting reference in the card any more. The mutator changed all
+ // references to such after dirtying the card.
+ CouldNotParse // The card is unparsable, need to retry later.
+ };
// Refine the region corresponding to "card_ptr". Must be called after
// being filtered by clean_card_before_refine(), and after proper
// fence/synchronization.
- void refine_card_concurrently(CardValue* const card_ptr,
- const uint worker_id);
+ RefineResult refine_card_concurrently(CardValue* const card_ptr,
+ const uint worker_id);
// Print accumulated summary info from the start of the VM.
void print_summary_info();
diff --git a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp
index 49cc993dac2..3e9cf938097 100644
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp
@@ -27,7 +27,6 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
#include "gc/g1/g1RemSet.hpp"
@@ -37,39 +36,61 @@
#include "runtime/javaThread.hpp"
void G1RemSetSummary::update() {
- class CollectData : public ThreadClosure {
+ G1ConcurrentRefine* refine = G1CollectedHeap::heap()->concurrent_refine();
+
+ class CollectWorkerData : public ThreadClosure {
G1RemSetSummary* _summary;
uint _counter;
public:
- CollectData(G1RemSetSummary * summary) : _summary(summary), _counter(0) {}
+ CollectWorkerData(G1RemSetSummary* summary) : _summary(summary), _counter(0) {}
virtual void do_thread(Thread* t) {
G1ConcurrentRefineThread* crt = static_cast(t);
- _summary->set_refine_thread_cpu_time(_counter, crt->cpu_time());
+ _summary->set_worker_thread_cpu_time(_counter, crt->cpu_time());
_counter++;
}
} collector(this);
- G1CollectedHeap* g1h = G1CollectedHeap::heap();
- g1h->concurrent_refine()->threads_do(&collector);
+ refine->worker_threads_do(&collector);
+
+ class CollectControlData : public ThreadClosure {
+ G1RemSetSummary* _summary;
+ public:
+ CollectControlData(G1RemSetSummary* summary) : _summary(summary) {}
+ virtual void do_thread(Thread* t) {
+ G1ConcurrentRefineThread* crt = static_cast(t);
+ _summary->set_control_thread_cpu_time(crt->cpu_time());
+ }
+ } control(this);
+
+ refine->control_thread_do(&control);
}
-void G1RemSetSummary::set_refine_thread_cpu_time(uint thread, jlong value) {
- assert(_refine_threads_cpu_times != nullptr, "just checking");
- assert(thread < _num_refine_threads, "just checking");
- _refine_threads_cpu_times[thread] = value;
+void G1RemSetSummary::set_worker_thread_cpu_time(uint thread, jlong value) {
+ assert(_worker_threads_cpu_times != nullptr, "just checking");
+ assert(thread < _num_worker_threads, "just checking");
+ _worker_threads_cpu_times[thread] = value;
}
-jlong G1RemSetSummary::refine_thread_cpu_time(uint thread) const {
- assert(_refine_threads_cpu_times != nullptr, "just checking");
- assert(thread < _num_refine_threads, "just checking");
- return _refine_threads_cpu_times[thread];
+void G1RemSetSummary::set_control_thread_cpu_time(jlong value) {
+ _control_thread_cpu_time = value;
+}
+
+jlong G1RemSetSummary::worker_thread_cpu_time(uint thread) const {
+ assert(_worker_threads_cpu_times != nullptr, "just checking");
+ assert(thread < _num_worker_threads, "just checking");
+ return _worker_threads_cpu_times[thread];
+}
+
+jlong G1RemSetSummary::control_thread_cpu_time() const {
+ return _control_thread_cpu_time;
}
G1RemSetSummary::G1RemSetSummary(bool should_update) :
- _num_refine_threads(G1ConcRefinementThreads),
- _refine_threads_cpu_times(NEW_C_HEAP_ARRAY(jlong, _num_refine_threads, mtGC)) {
+ _num_worker_threads(G1ConcRefinementThreads),
+ _worker_threads_cpu_times(NEW_C_HEAP_ARRAY(jlong, _num_worker_threads, mtGC)),
+ _control_thread_cpu_time(0) {
- memset(_refine_threads_cpu_times, 0, sizeof(jlong) * _num_refine_threads);
+ memset(_worker_threads_cpu_times, 0, sizeof(jlong) * _num_worker_threads);
if (should_update) {
update();
@@ -77,23 +98,25 @@ G1RemSetSummary::G1RemSetSummary(bool should_update) :
}
G1RemSetSummary::~G1RemSetSummary() {
- FREE_C_HEAP_ARRAY(jlong, _refine_threads_cpu_times);
+ FREE_C_HEAP_ARRAY(jlong, _worker_threads_cpu_times);
}
void G1RemSetSummary::set(G1RemSetSummary* other) {
assert(other != nullptr, "just checking");
- assert(_num_refine_threads == other->_num_refine_threads, "just checking");
+ assert(_num_worker_threads == other->_num_worker_threads, "just checking");
- memcpy(_refine_threads_cpu_times, other->_refine_threads_cpu_times, sizeof(jlong) * _num_refine_threads);
+ memcpy(_worker_threads_cpu_times, other->_worker_threads_cpu_times, sizeof(jlong) * _num_worker_threads);
+ _control_thread_cpu_time = other->_control_thread_cpu_time;
}
void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {
assert(other != nullptr, "just checking");
- assert(_num_refine_threads == other->_num_refine_threads, "just checking");
+ assert(_num_worker_threads == other->_num_worker_threads, "just checking");
- for (uint i = 0; i < _num_refine_threads; i++) {
- set_refine_thread_cpu_time(i, other->refine_thread_cpu_time(i) - refine_thread_cpu_time(i));
+ for (uint i = 0; i < _num_worker_threads; i++) {
+ set_worker_thread_cpu_time(i, other->worker_thread_cpu_time(i) - worker_thread_cpu_time(i));
}
+ _control_thread_cpu_time = other->_control_thread_cpu_time - _control_thread_cpu_time;
}
class G1PerRegionTypeRemSetCounters {
@@ -376,9 +399,10 @@ public:
void G1RemSetSummary::print_on(outputStream* out, bool show_thread_times) {
if (show_thread_times) {
out->print_cr(" Concurrent refinement threads times (s)");
+ out->print_cr(" Control %5.2f Workers", (double)control_thread_cpu_time() / NANOSECS_PER_SEC);
out->print(" ");
- for (uint i = 0; i < _num_refine_threads; i++) {
- out->print(" %5.2f", (double)refine_thread_cpu_time(i) / NANOSECS_PER_SEC);
+ for (uint i = 0; i < _num_worker_threads; i++) {
+ out->print(" %5.2f", (double)worker_thread_cpu_time(i) / NANOSECS_PER_SEC);
}
out->cr();
}
diff --git a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp
index 373f38952c8..dd7d55d5a2e 100644
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp
@@ -33,10 +33,12 @@ class G1RemSet;
// A G1RemSetSummary manages statistical information about the remembered set.
class G1RemSetSummary {
- size_t _num_refine_threads;
- jlong* _refine_threads_cpu_times;
+ size_t _num_worker_threads;
+ jlong* _worker_threads_cpu_times;
+ jlong _control_thread_cpu_time;
- void set_refine_thread_cpu_time(uint thread, jlong value);
+ void set_worker_thread_cpu_time(uint thread, jlong value);
+ void set_control_thread_cpu_time(jlong value);
// Update this summary with current data from various places.
void update();
@@ -53,7 +55,8 @@ public:
void print_on(outputStream* out, bool show_thread_times);
- jlong refine_thread_cpu_time(uint thread) const;
+ jlong worker_thread_cpu_time(uint thread) const;
+ jlong control_thread_cpu_time() const;
};
#endif // SHARE_GC_G1_G1REMSETSUMMARY_HPP
diff --git a/src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.cpp b/src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.cpp
new file mode 100644
index 00000000000..2f7acd9b710
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "gc/g1/g1CollectedHeap.hpp"
+#include "gc/g1/g1Policy.hpp"
+#include "gc/g1/g1ReviseYoungLengthTask.hpp"
+#include "gc/g1/g1ServiceThread.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
+
+
+jlong G1ReviseYoungLengthTask::reschedule_delay_ms() const {
+ G1Policy* policy = G1CollectedHeap::heap()->policy();
+ size_t available_bytes;
+ if (policy->try_get_available_bytes_estimate(available_bytes)) {
+ double predicted_time_to_next_gc_ms = policy->predict_time_to_next_gc_ms(available_bytes);
+
+ // Use a prime number close to 50ms as minimum time, different to other components
+ // that derive their wait time from the try_get_available_bytes_estimate() call
+ // to minimize interference.
+ uint64_t const min_wait_time_ms = 47;
+
+ return policy->adjust_wait_time_ms(predicted_time_to_next_gc_ms, min_wait_time_ms);
+ } else {
+ // Failed to get estimate of available bytes. Try again asap.
+ return 1;
+ }
+}
+
+class G1ReviseYoungLengthTask::RemSetSamplingClosure : public G1HeapRegionClosure {
+ size_t _sampled_code_root_rs_length;
+
+public:
+ RemSetSamplingClosure() : _sampled_code_root_rs_length(0) { }
+
+ bool do_heap_region(G1HeapRegion* r) override {
+ G1HeapRegionRemSet* rem_set = r->rem_set();
+ _sampled_code_root_rs_length += rem_set->code_roots_list_length();
+ return false;
+ }
+
+ size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
+};
+
+void G1ReviseYoungLengthTask::adjust_young_list_target_length() {
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ G1Policy* policy = g1h->policy();
+
+ assert(policy->use_adaptive_young_list_length(), "should not call otherwise");
+
+ size_t pending_cards;
+ size_t current_to_collection_set_cards;
+ {
+ MutexLocker x(G1ReviseYoungLength_lock, Mutex::_no_safepoint_check_flag);
+ pending_cards = policy->current_pending_cards();
+ current_to_collection_set_cards = policy->current_to_collection_set_cards();
+ }
+
+ RemSetSamplingClosure cl;
+ g1h->collection_set()->iterate(&cl);
+
+ policy->revise_young_list_target_length(pending_cards,
+ current_to_collection_set_cards,
+ cl.sampled_code_root_rs_length());
+}
+
+G1ReviseYoungLengthTask::G1ReviseYoungLengthTask(const char* name) :
+ G1ServiceTask(name) { }
+
+void G1ReviseYoungLengthTask::execute() {
+ SuspendibleThreadSetJoiner sts;
+
+ adjust_young_list_target_length();
+
+ schedule(reschedule_delay_ms());
+}
diff --git a/src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.hpp b/src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.hpp
new file mode 100644
index 00000000000..baa8af75fb7
--- /dev/null
+++ b/src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
+#define SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
+
+#include "gc/g1/g1CardSetMemory.hpp"
+#include "gc/g1/g1HeapRegionRemSet.hpp"
+#include "gc/g1/g1MonotonicArenaFreePool.hpp"
+#include "gc/g1/g1ServiceThread.hpp"
+#include "utilities/growableArray.hpp"
+#include "utilities/ticks.hpp"
+
+// ServiceTask to revise the young generation target length.
+class G1ReviseYoungLengthTask : public G1ServiceTask {
+
+ // The delay used to reschedule this task.
+ jlong reschedule_delay_ms() const;
+
+ class RemSetSamplingClosure; // Helper class for calculating remembered set summary.
+
+ // Adjust the target length (in regions) of the young gen, based on the
+ // current length of the remembered sets.
+ //
+ // At the end of the GC G1 determines the length of the young gen based on
+ // how much time the next GC can take, and when the next GC may occur
+ // according to the MMU.
+ //
+ // The assumption is that a significant part of the GC is spent on scanning
+ // the remembered sets (and many other components), so this thread constantly
+ // reevaluates the prediction for the remembered set scanning costs, and potentially
+ // resizes the young gen. This may do a premature GC or even increase the young
+ // gen size to keep pause time length goal.
+ void adjust_young_list_target_length();
+
+public:
+ explicit G1ReviseYoungLengthTask(const char* name);
+
+ void execute() override;
+};
+
+#endif // SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
\ No newline at end of file
diff --git a/src/hotspot/share/gc/g1/g1ThreadLocalData.hpp b/src/hotspot/share/gc/g1/g1ThreadLocalData.hpp
index d0dcb59d7f0..858081b0581 100644
--- a/src/hotspot/share/gc/g1/g1ThreadLocalData.hpp
+++ b/src/hotspot/share/gc/g1/g1ThreadLocalData.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
#define SHARE_GC_G1_G1THREADLOCALDATA_HPP
#include "gc/g1/g1BarrierSet.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1RegionPinCache.hpp"
#include "gc/shared/gc_globals.hpp"
#include "gc/shared/satbMarkQueue.hpp"
@@ -36,7 +36,7 @@
class G1ThreadLocalData {
private:
SATBMarkQueue _satb_mark_queue;
- G1DirtyCardQueue _dirty_card_queue;
+ G1CardTable::CardValue* _byte_map_base;
// Per-thread cache of pinned object count to reduce atomic operation traffic
// due to region pinning. Holds the last region where the mutator pinned an
@@ -45,8 +45,8 @@ private:
G1ThreadLocalData() :
_satb_mark_queue(&G1BarrierSet::satb_mark_queue_set()),
- _dirty_card_queue(&G1BarrierSet::dirty_card_queue_set()),
- _pin_cache() {}
+ _byte_map_base(nullptr),
+ _pin_cache() { }
static G1ThreadLocalData* data(Thread* thread) {
assert(UseG1GC, "Sanity");
@@ -57,10 +57,6 @@ private:
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _satb_mark_queue);
}
- static ByteSize dirty_card_queue_offset() {
- return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _dirty_card_queue);
- }
-
public:
static void create(Thread* thread) {
new (data(thread)) G1ThreadLocalData();
@@ -74,10 +70,6 @@ public:
return data(thread)->_satb_mark_queue;
}
- static G1DirtyCardQueue& dirty_card_queue(Thread* thread) {
- return data(thread)->_dirty_card_queue;
- }
-
static ByteSize satb_mark_queue_active_offset() {
return satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active();
}
@@ -90,14 +82,20 @@ public:
return satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf();
}
- static ByteSize dirty_card_queue_index_offset() {
- return dirty_card_queue_offset() + G1DirtyCardQueue::byte_offset_of_index();
+ static ByteSize card_table_base_offset() {
+ return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _byte_map_base);
}
- static ByteSize dirty_card_queue_buffer_offset() {
- return dirty_card_queue_offset() + G1DirtyCardQueue::byte_offset_of_buf();
+ static void set_byte_map_base(Thread* thread, G1CardTable::CardValue* new_byte_map_base) {
+ data(thread)->_byte_map_base = new_byte_map_base;
}
+#ifndef PRODUCT
+ static G1CardTable::CardValue* get_byte_map_base(Thread* thread) {
+ return data(thread)->_byte_map_base;
+ }
+#endif
+
static G1RegionPinCache& pin_count_cache(Thread* thread) {
return data(thread)->_pin_cache;
}
diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.cpp b/src/hotspot/share/gc/g1/g1YoungCollector.cpp
index ee25e5fc028..e97e59575e3 100644
--- a/src/hotspot/share/gc/g1/g1YoungCollector.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungCollector.cpp
@@ -39,7 +39,6 @@
#include "gc/g1/g1MonitoringSupport.hpp"
#include "gc/g1/g1ParScanThreadState.inline.hpp"
#include "gc/g1/g1Policy.hpp"
-#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1RegionPinCache.inline.hpp"
#include "gc/g1/g1RemSet.hpp"
#include "gc/g1/g1RootProcessor.hpp"
@@ -914,13 +913,8 @@ class G1STWRefProcProxyTask : public RefProcProxyTask {
TaskTerminator _terminator;
G1ScannerTasksQueueSet& _task_queues;
- // Special closure for enqueuing discovered fields: during enqueue the card table
- // may not be in shape to properly handle normal barrier calls (e.g. card marks
- // in regions that failed evacuation, scribbling of various values by card table
- // scan code). Additionally the regular barrier enqueues into the "global"
- // DCQS, but during GC we need these to-be-refined entries in the GC local queue
- // so that after clearing the card table, the redirty cards phase will properly
- // mark all dirty cards to be picked up by refinement.
+ // G1 specific closure for marking discovered fields. Need to mark the card in the
+ // refinement table as the card table is in use by garbage collection.
class G1EnqueueDiscoveredFieldClosure : public EnqueueDiscoveredFieldClosure {
G1CollectedHeap* _g1h;
G1ParScanThreadState* _pss;
diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.hpp b/src/hotspot/share/gc/g1/g1YoungCollector.hpp
index 2c4929958fe..76d443b1a9f 100644
--- a/src/hotspot/share/gc/g1/g1YoungCollector.hpp
+++ b/src/hotspot/share/gc/g1/g1YoungCollector.hpp
@@ -45,7 +45,6 @@ class G1MonotonicArenaMemoryStats;
class G1NewTracer;
class G1ParScanThreadStateSet;
class G1Policy;
-class G1RedirtyCardsQueueSet;
class G1RemSet;
class G1SurvivorRegions;
class G1YoungGCAllocationFailureInjector;
diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
index 5b13e8fc206..2737def7e84 100644
--- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
@@ -287,7 +287,7 @@ public:
_chunk_bitmap(mtGC) {
_num_evac_fail_regions = _evac_failure_regions->num_regions_evac_failed();
- _num_chunks_per_region = G1CollectedHeap::get_chunks_per_region();
+ _num_chunks_per_region = G1CollectedHeap::get_chunks_per_region_for_scan();
_chunk_size = static_cast(G1HeapRegion::GrainWords / _num_chunks_per_region);
@@ -300,7 +300,7 @@ public:
double worker_cost() const override {
assert(_evac_failure_regions->has_regions_evac_failed(), "Should not call this if there were no evacuation failures");
- double workers_per_region = (double)G1CollectedHeap::get_chunks_per_region() / G1RestoreRetainedRegionChunksPerWorker;
+ double workers_per_region = (double)G1CollectedHeap::get_chunks_per_region_for_scan() / G1RestoreRetainedRegionChunksPerWorker;
return workers_per_region * _evac_failure_regions->num_regions_evac_failed();
}
@@ -480,43 +480,6 @@ public:
}
};
-class RedirtyLoggedCardTableEntryClosure : public G1CardTableEntryClosure {
- size_t _num_dirtied;
- G1CollectedHeap* _g1h;
- G1CardTable* _g1_ct;
- G1EvacFailureRegions* _evac_failure_regions;
-
- G1HeapRegion* region_for_card(CardValue* card_ptr) const {
- return _g1h->heap_region_containing(_g1_ct->addr_for(card_ptr));
- }
-
- bool will_become_free(G1HeapRegion* hr) const {
- // A region will be freed by during the FreeCollectionSet phase if the region is in the
- // collection set and has not had an evacuation failure.
- return _g1h->is_in_cset(hr) && !_evac_failure_regions->contains(hr->hrm_index());
- }
-
-public:
- RedirtyLoggedCardTableEntryClosure(G1CollectedHeap* g1h, G1EvacFailureRegions* evac_failure_regions) :
- G1CardTableEntryClosure(),
- _num_dirtied(0),
- _g1h(g1h),
- _g1_ct(g1h->card_table()),
- _evac_failure_regions(evac_failure_regions) { }
-
- void do_card_ptr(CardValue* card_ptr) override {
- G1HeapRegion* hr = region_for_card(card_ptr);
-
- // Should only dirty cards in regions that won't be freed.
- if (!will_become_free(hr)) {
- *card_ptr = G1CardTable::dirty_card_val();
- _num_dirtied++;
- }
- }
-
- size_t num_dirtied() const { return _num_dirtied; }
-};
-
class G1PostEvacuateCollectionSetCleanupTask2::ProcessEvacuationFailedRegionsTask : public G1AbstractSubTask {
G1EvacFailureRegions* _evac_failure_regions;
G1HeapRegionClaimer _claimer;
@@ -572,48 +535,6 @@ public:
}
};
-class G1PostEvacuateCollectionSetCleanupTask2::RedirtyLoggedCardsTask : public G1AbstractSubTask {
- BufferNodeList* _rdc_buffers;
- uint _num_buffer_lists;
- G1EvacFailureRegions* _evac_failure_regions;
-
-public:
- RedirtyLoggedCardsTask(G1EvacFailureRegions* evac_failure_regions, BufferNodeList* rdc_buffers, uint num_buffer_lists) :
- G1AbstractSubTask(G1GCPhaseTimes::RedirtyCards),
- _rdc_buffers(rdc_buffers),
- _num_buffer_lists(num_buffer_lists),
- _evac_failure_regions(evac_failure_regions) { }
-
- double worker_cost() const override {
- // Needs more investigation.
- return G1CollectedHeap::heap()->workers()->active_workers();
- }
-
- void do_work(uint worker_id) override {
- RedirtyLoggedCardTableEntryClosure cl(G1CollectedHeap::heap(), _evac_failure_regions);
-
- uint start = worker_id;
- for (uint i = 0; i < _num_buffer_lists; i++) {
- uint index = (start + i) % _num_buffer_lists;
-
- BufferNode* next = AtomicAccess::load(&_rdc_buffers[index]._head);
- BufferNode* tail = AtomicAccess::load(&_rdc_buffers[index]._tail);
-
- while (next != nullptr) {
- BufferNode* node = next;
- next = AtomicAccess::cmpxchg(&_rdc_buffers[index]._head, node, (node != tail ) ? node->next() : nullptr);
- if (next == node) {
- cl.apply_to_buffer(node, worker_id);
- next = (node != tail ) ? node->next() : nullptr;
- } else {
- break; // If there is contention, move to the next BufferNodeList
- }
- }
- }
- record_work_item(worker_id, 0, cl.num_dirtied());
- }
-};
-
// Helper class to keep statistics for the collection set freeing
class FreeCSetStats {
size_t _before_used_bytes; // Usage in regions successfully evacuate
@@ -797,7 +718,6 @@ public:
JFREventForRegion event(r, _worker_id);
TimerForRegion timer(timer_for_region(r));
-
if (r->is_young()) {
assert_tracks_surviving_words(r);
r->record_surv_words_in_group(_surviving_young_words[r->young_index_in_cset()]);
@@ -908,24 +828,34 @@ public:
}
};
-class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsTask : public G1AbstractSubTask {
+class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsAndSwapCardTableTask : public G1AbstractSubTask {
G1JavaThreadsListClaimer _claimer;
// There is not much work per thread so the number of threads per worker is high.
static const uint ThreadsPerWorker = 250;
public:
- ResizeTLABsTask() : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker) { }
+ ResizeTLABsAndSwapCardTableTask()
+ : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker)
+ {
+ G1BarrierSet::g1_barrier_set()->swap_global_card_table();
+ }
void do_work(uint worker_id) override {
- class ResizeClosure : public ThreadClosure {
+
+ class ResizeAndSwapCardTableClosure : public ThreadClosure {
public:
void do_thread(Thread* thread) {
- static_cast(thread)->tlab().resize();
+ if (UseTLAB && ResizeTLAB) {
+ static_cast(thread)->tlab().resize();
+ }
+
+ G1BarrierSet::g1_barrier_set()->update_card_table_base(thread);
}
- } cl;
- _claimer.apply(&cl);
+ } resize_and_swap_cl;
+
+ _claimer.apply(&resize_and_swap_cl);
}
double worker_cost() const override {
@@ -968,13 +898,8 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
if (evac_failure_regions->has_regions_evac_failed()) {
add_parallel_task(new ProcessEvacuationFailedRegionsTask(evac_failure_regions));
}
- add_parallel_task(new RedirtyLoggedCardsTask(evac_failure_regions,
- per_thread_states->rdc_buffers(),
- per_thread_states->num_workers()));
- if (UseTLAB && ResizeTLAB) {
- add_parallel_task(new ResizeTLABsTask());
- }
+ add_parallel_task(new ResizeTLABsAndSwapCardTableTask());
add_parallel_task(new FreeCollectionSetTask(evacuation_info,
per_thread_states->surviving_young_words(),
evac_failure_regions));
diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
index ad850af2eac..bc3a08e2080 100644
--- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
+++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -55,9 +55,8 @@ public:
// - Eagerly Reclaim Humongous Objects (s)
// - Update Derived Pointers (s)
// - Clear Retained Region Data (on evacuation failure)
-// - Redirty Logged Cards
// - Free Collection Set
-// - Resize TLABs
+// - Resize TLABs and Swap Card Table
// - Reset the reusable PartialArrayStateManager.
class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
class EagerlyReclaimHumongousObjectsTask;
@@ -66,9 +65,8 @@ class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
#endif
class ProcessEvacuationFailedRegionsTask;
- class RedirtyLoggedCardsTask;
class FreeCollectionSetTask;
- class ResizeTLABsTask;
+ class ResizeTLABsAndSwapCardTableTask;
class ResetPartialArrayStateManagerTask;
public:
diff --git a/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.cpp b/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.cpp
index 7214d624def..b11213ddeb3 100644
--- a/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.cpp
@@ -24,7 +24,6 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1RegionPinCache.inline.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/g1/g1YoungGCPreEvacuateTasks.hpp"
@@ -35,23 +34,21 @@
#include "runtime/thread.inline.hpp"
#include "runtime/threads.hpp"
-class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : public G1AbstractSubTask {
+class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABs : public G1AbstractSubTask {
G1JavaThreadsListClaimer _claimer;
// Per worker thread statistics.
ThreadLocalAllocStats* _local_tlab_stats;
- G1ConcurrentRefineStats* _local_refinement_stats;
uint _num_workers;
// There is relatively little work to do per thread.
static const uint ThreadsPerWorker = 250;
- struct RetireTLABAndFlushLogsClosure : public ThreadClosure {
+ struct RetireTLABClosure : public ThreadClosure {
ThreadLocalAllocStats _tlab_stats;
- G1ConcurrentRefineStats _refinement_stats;
- RetireTLABAndFlushLogsClosure() : _tlab_stats(), _refinement_stats() { }
+ RetireTLABClosure() : _tlab_stats() { }
void do_thread(Thread* thread) override {
assert(thread->is_Java_thread(), "must be");
@@ -61,37 +58,29 @@ class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : pu
if (UseTLAB) {
thread->retire_tlab(&_tlab_stats);
}
- // Concatenate logs.
- G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
- _refinement_stats += qset.concatenate_log_and_stats(thread);
// Flush region pin count cache.
G1ThreadLocalData::pin_count_cache(thread).flush();
}
};
public:
- JavaThreadRetireTLABAndFlushLogs() :
- G1AbstractSubTask(G1GCPhaseTimes::RetireTLABsAndFlushLogs),
+ JavaThreadRetireTLABs() :
+ G1AbstractSubTask(G1GCPhaseTimes::RetireTLABs),
_claimer(ThreadsPerWorker),
_local_tlab_stats(nullptr),
- _local_refinement_stats(nullptr),
_num_workers(0) {
}
- ~JavaThreadRetireTLABAndFlushLogs() {
- static_assert(std::is_trivially_destructible::value, "must be");
- FREE_C_HEAP_ARRAY(G1ConcurrentRefineStats, _local_refinement_stats);
-
+ ~JavaThreadRetireTLABs() {
static_assert(std::is_trivially_destructible::value, "must be");
FREE_C_HEAP_ARRAY(ThreadLocalAllocStats, _local_tlab_stats);
}
void do_work(uint worker_id) override {
- RetireTLABAndFlushLogsClosure tc;
+ RetireTLABClosure tc;
_claimer.apply(&tc);
_local_tlab_stats[worker_id] = tc._tlab_stats;
- _local_refinement_stats[worker_id] = tc._refinement_stats;
}
double worker_cost() const override {
@@ -101,11 +90,9 @@ public:
void set_max_workers(uint max_workers) override {
_num_workers = max_workers;
_local_tlab_stats = NEW_C_HEAP_ARRAY(ThreadLocalAllocStats, _num_workers, mtGC);
- _local_refinement_stats = NEW_C_HEAP_ARRAY(G1ConcurrentRefineStats, _num_workers, mtGC);
for (uint i = 0; i < _num_workers; i++) {
::new (&_local_tlab_stats[i]) ThreadLocalAllocStats();
- ::new (&_local_refinement_stats[i]) G1ConcurrentRefineStats();
}
}
@@ -116,85 +103,15 @@ public:
}
return result;
}
-
- G1ConcurrentRefineStats refinement_stats() const {
- G1ConcurrentRefineStats result;
- for (uint i = 0; i < _num_workers; i++) {
- result += _local_refinement_stats[i];
- }
- return result;
- }
-};
-
-class G1PreEvacuateCollectionSetBatchTask::NonJavaThreadFlushLogs : public G1AbstractSubTask {
- struct FlushLogsClosure : public ThreadClosure {
- G1ConcurrentRefineStats _refinement_stats;
-
- FlushLogsClosure() : _refinement_stats() { }
-
- void do_thread(Thread* thread) override {
- G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
- _refinement_stats += qset.concatenate_log_and_stats(thread);
-
- assert(G1ThreadLocalData::pin_count_cache(thread).count() == 0, "NonJava thread has pinned Java objects");
- }
- } _tc;
-
-public:
- NonJavaThreadFlushLogs() : G1AbstractSubTask(G1GCPhaseTimes::NonJavaThreadFlushLogs), _tc() { }
-
- void do_work(uint worker_id) override {
- Threads::non_java_threads_do(&_tc);
- }
-
- double worker_cost() const override {
- return 1.0;
- }
-
- G1ConcurrentRefineStats refinement_stats() const { return _tc._refinement_stats; }
};
G1PreEvacuateCollectionSetBatchTask::G1PreEvacuateCollectionSetBatchTask() :
G1BatchedTask("Pre Evacuate Prepare", G1CollectedHeap::heap()->phase_times()),
- _old_pending_cards(G1BarrierSet::dirty_card_queue_set().num_cards()),
- _java_retire_task(new JavaThreadRetireTLABAndFlushLogs()),
- _non_java_retire_task(new NonJavaThreadFlushLogs()) {
+ _java_retire_task(new JavaThreadRetireTLABs()) {
- // Disable mutator refinement until concurrent refinement decides otherwise.
- G1BarrierSet::dirty_card_queue_set().set_mutator_refinement_threshold(SIZE_MAX);
-
- add_serial_task(_non_java_retire_task);
add_parallel_task(_java_retire_task);
}
-static void verify_empty_dirty_card_logs() {
-#ifdef ASSERT
- ResourceMark rm;
-
- struct Verifier : public ThreadClosure {
- Verifier() {}
- void do_thread(Thread* t) override {
- G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
- assert(queue.is_empty(), "non-empty dirty card queue for thread %s", t->name());
- }
- } verifier;
- Threads::threads_do(&verifier);
-#endif
-}
-
G1PreEvacuateCollectionSetBatchTask::~G1PreEvacuateCollectionSetBatchTask() {
_java_retire_task->tlab_stats().publish();
-
- G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
-
- G1ConcurrentRefineStats total_refinement_stats;
- total_refinement_stats += _java_retire_task->refinement_stats();
- total_refinement_stats += _non_java_retire_task->refinement_stats();
- qset.update_refinement_stats(total_refinement_stats);
-
- verify_empty_dirty_card_logs();
-
- size_t pending_cards = qset.num_cards();
- size_t thread_buffer_cards = pending_cards - _old_pending_cards;
- G1CollectedHeap::heap()->policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
}
diff --git a/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.hpp b/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.hpp
index 791031d979f..7574862872c 100644
--- a/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.hpp
+++ b/src/hotspot/share/gc/g1/g1YoungGCPreEvacuateTasks.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,18 +28,13 @@
#include "gc/g1/g1BatchedTask.hpp"
// Set of pre evacuate collection set tasks containing ("s" means serial):
-// - Retire TLAB and Flush Logs (Java threads)
+// - Retire TLABs (Java threads)
// - Flush pin count cache (Java threads)
-// - Flush Logs (s) (Non-Java threads)
class G1PreEvacuateCollectionSetBatchTask : public G1BatchedTask {
- class JavaThreadRetireTLABAndFlushLogs;
- class NonJavaThreadFlushLogs;
-
- size_t _old_pending_cards;
+ class JavaThreadRetireTLABs;
// References to the tasks to retain access to statistics.
- JavaThreadRetireTLABAndFlushLogs* _java_retire_task;
- NonJavaThreadFlushLogs* _non_java_retire_task;
+ JavaThreadRetireTLABs* _java_retire_task;
public:
G1PreEvacuateCollectionSetBatchTask();
diff --git a/src/hotspot/share/gc/g1/g1_globals.hpp b/src/hotspot/share/gc/g1/g1_globals.hpp
index 1c712492f74..b338c11d5be 100644
--- a/src/hotspot/share/gc/g1/g1_globals.hpp
+++ b/src/hotspot/share/gc/g1/g1_globals.hpp
@@ -162,6 +162,11 @@
"a single expand attempt.") \
range(0, 100) \
\
+ product(size_t, G1PerThreadPendingCardThreshold, 256, DIAGNOSTIC, \
+ "Number of pending cards allowed on the card table per GC " \
+ "worker thread before considering starting refinement.") \
+ range(0, UINT_MAX) \
+ \
product(uint, G1ShrinkByPercentOfAvailable, 50, DIAGNOSTIC, \
"When shrinking, maximum % of free space to free for a single " \
"shrink attempt.") \
@@ -188,10 +193,6 @@
"bound of acceptable deviation range.") \
constraint(G1CPUUsageShrinkConstraintFunc, AfterErgo) \
\
- product(size_t, G1UpdateBufferSize, 256, \
- "Size of an update buffer") \
- constraint(G1UpdateBufferSizeConstraintFunc, AfterErgo) \
- \
product(uint, G1RSetUpdatingPauseTimePercent, 10, \
"A target percentage of time that is allowed to be spend on " \
"processing remembered set update buffers during the collection " \
diff --git a/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.cpp b/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.cpp
index 488a9c7aac9..2b084b387bc 100644
--- a/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.cpp
+++ b/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.cpp
@@ -206,12 +206,6 @@ JVMFlag::Error G1SATBBufferSizeConstraintFunc(size_t value, bool verbose) {
verbose);
}
-JVMFlag::Error G1UpdateBufferSizeConstraintFunc(size_t value, bool verbose) {
- return buffer_size_constraint_helper(FLAG_MEMBER_ENUM(G1UpdateBufferSize),
- value,
- verbose);
-}
-
JVMFlag::Error gc_cpu_usage_threshold_helper(JVMFlagsEnum flagid,
uint value,
bool verbose) {
diff --git a/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.hpp b/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.hpp
index 89f05d73dcc..b2c7bb6dc96 100644
--- a/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.hpp
+++ b/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.hpp
@@ -47,7 +47,6 @@
\
/* G1 PtrQueue buffer size constraints */ \
f(size_t, G1SATBBufferSizeConstraintFunc) \
- f(size_t, G1UpdateBufferSizeConstraintFunc) \
\
/* G1 GC deviation counter threshold constraints */ \
f(uint, G1CPUUsageExpandConstraintFunc) \
diff --git a/src/hotspot/share/gc/g1/vmStructs_g1.hpp b/src/hotspot/share/gc/g1/vmStructs_g1.hpp
index 651808b4ba0..67c930e1b63 100644
--- a/src/hotspot/share/gc/g1/vmStructs_g1.hpp
+++ b/src/hotspot/share/gc/g1/vmStructs_g1.hpp
@@ -82,8 +82,7 @@
declare_constant(G1HeapRegionType::StartsHumongousTag) \
declare_constant(G1HeapRegionType::ContinuesHumongousTag) \
declare_constant(G1HeapRegionType::OldMask) \
- declare_constant(BarrierSet::G1BarrierSet) \
- declare_constant(G1CardTable::g1_young_gen)
+ declare_constant(BarrierSet::G1BarrierSet)
#define VM_TYPES_G1GC(declare_type, \
declare_toplevel_type, \
@@ -100,7 +99,6 @@
declare_toplevel_type(PtrQueue) \
declare_toplevel_type(G1HeapRegionType) \
declare_toplevel_type(SATBMarkQueue) \
- declare_toplevel_type(G1DirtyCardQueue) \
\
declare_toplevel_type(G1CollectedHeap*) \
declare_toplevel_type(G1HeapRegion*) \
diff --git a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
index 21841330fa7..213e8f95d63 100644
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
@@ -275,38 +275,46 @@ HeapWord* ParallelScavengeHeap::mem_allocate(size_t size) {
return mem_allocate_work(size, is_tlab);
}
+HeapWord* ParallelScavengeHeap::mem_allocate_cas_noexpand(size_t size, bool is_tlab) {
+ // Try young-gen first.
+ HeapWord* result = young_gen()->allocate(size);
+ if (result != nullptr) {
+ return result;
+ }
+
+ // Try allocating from the old gen for non-TLAB in certain scenarios.
+ if (!is_tlab) {
+ if (!should_alloc_in_eden(size) || _is_heap_almost_full) {
+ result = old_gen()->cas_allocate_noexpand(size);
+ if (result != nullptr) {
+ return result;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
for (uint loop_count = 0; /* empty */; ++loop_count) {
- // Try young-gen first.
- HeapWord* result = young_gen()->allocate(size);
+ HeapWord* result = mem_allocate_cas_noexpand(size, is_tlab);
if (result != nullptr) {
return result;
}
- // Try allocating from the old gen for non-TLAB in certain scenarios.
- if (!is_tlab) {
- if (!should_alloc_in_eden(size) || _is_heap_almost_full) {
- result = old_gen()->cas_allocate_noexpand(size);
- if (result != nullptr) {
- return result;
- }
- }
- }
-
- // We don't want to have multiple collections for a single filled generation.
- // To prevent this, each thread tracks the total_collections() value, and if
- // the count has changed, does not do a new collection.
- //
- // The collection count must be read only while holding the heap lock. VM
- // operations also hold the heap lock during collections. There is a lock
- // contention case where thread A blocks waiting on the Heap_lock, while
- // thread B is holding it doing a collection. When thread A gets the lock,
- // the collection count has already changed. To prevent duplicate collections,
- // The policy MUST attempt allocations during the same period it reads the
- // total_collections() value!
+ // Read total_collections() under the lock so that multiple
+ // allocation-failures result in one GC.
uint gc_count;
{
MutexLocker ml(Heap_lock);
+
+ // Re-try after acquiring the lock, because a GC might have occurred
+ // while waiting for this lock.
+ result = mem_allocate_cas_noexpand(size, is_tlab);
+ if (result != nullptr) {
+ return result;
+ }
+
gc_count = total_collections();
}
@@ -469,14 +477,9 @@ void ParallelScavengeHeap::collect(GCCause::Cause cause) {
VMThread::execute(&op);
}
-bool ParallelScavengeHeap::must_clear_all_soft_refs() {
- return _gc_cause == GCCause::_metadata_GC_clear_soft_refs ||
- _gc_cause == GCCause::_wb_full_gc;
-}
-
void ParallelScavengeHeap::collect_at_safepoint(bool full) {
assert(!GCLocker::is_active(), "precondition");
- bool clear_soft_refs = must_clear_all_soft_refs();
+ bool clear_soft_refs = GCCause::should_clear_all_soft_refs(_gc_cause);
if (!full) {
bool success = PSScavenge::invoke(clear_soft_refs);
diff --git a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
index bd701ae8be3..fea827430ca 100644
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
@@ -96,12 +96,11 @@ class ParallelScavengeHeap : public CollectedHeap {
void update_parallel_worker_threads_cpu_time();
- bool must_clear_all_soft_refs();
-
HeapWord* allocate_new_tlab(size_t min_size, size_t requested_size, size_t* actual_size) override;
inline bool should_alloc_in_eden(size_t size) const;
+ HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
HeapWord* mem_allocate_work(size_t size, bool is_tlab);
HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
index e63e189861f..f4383e573af 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@@ -297,29 +297,6 @@ void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
}
-void
-ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end)
-{
- assert(is_region_aligned(beg), "not RegionSize aligned");
- assert(is_region_aligned(end), "not RegionSize aligned");
-
- size_t cur_region = addr_to_region_idx(beg);
- const size_t end_region = addr_to_region_idx(end);
- HeapWord* addr = beg;
- while (cur_region < end_region) {
- _region_data[cur_region].set_destination(addr);
- _region_data[cur_region].set_destination_count(0);
- _region_data[cur_region].set_source_region(cur_region);
-
- // Update live_obj_size so the region appears completely full.
- size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size();
- _region_data[cur_region].set_live_obj_size(live_size);
-
- ++cur_region;
- addr += RegionSize;
- }
-}
-
// The total live words on src_region would overflow the target space, so find
// the overflowing object and record the split point. The invariant is that an
// obj should not cross space boundary.
@@ -894,7 +871,6 @@ void PSParallelCompact::summary_phase(bool should_do_max_compaction)
if (dense_prefix_end != old_space->bottom()) {
fill_dense_prefix_end(id);
- _summary_data.summarize_dense_prefix(old_space->bottom(), dense_prefix_end);
}
// Compacting objs in [dense_prefix_end, old_space->top())
@@ -1089,9 +1065,7 @@ public:
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(_worker_id);
- MarkingNMethodClosure mark_and_push_in_blobs(&cm->_mark_and_push_closure,
- !NMethodToOopClosure::FixRelocations,
- true /* keepalive nmethods */);
+ MarkingNMethodClosure mark_and_push_in_blobs(&cm->_mark_and_push_closure);
thread->oops_do(&cm->_mark_and_push_closure, &mark_and_push_in_blobs);
@@ -1383,9 +1357,7 @@ public:
_nworkers(nworkers) {
ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_stw_fullgc_adjust);
- if (nworkers > 1) {
- Threads::change_thread_claim_token();
- }
+ Threads::change_thread_claim_token();
}
~PSAdjustTask() {
@@ -1539,17 +1511,16 @@ void PSParallelCompact::forward_to_new_addr() {
#ifdef ASSERT
void PSParallelCompact::verify_forward() {
HeapWord* const old_dense_prefix_addr = dense_prefix(SpaceId(old_space_id));
- RegionData* old_region = _summary_data.region(_summary_data.addr_to_region_idx(old_dense_prefix_addr));
- HeapWord* bump_ptr = old_region->partial_obj_size() != 0
- ? old_dense_prefix_addr + old_region->partial_obj_size()
- : old_dense_prefix_addr;
+ // The destination addr for the first live obj after dense-prefix.
+ HeapWord* bump_ptr = old_dense_prefix_addr
+ + _summary_data.addr_to_region_ptr(old_dense_prefix_addr)->partial_obj_size();
SpaceId bump_ptr_space = old_space_id;
for (uint id = old_space_id; id < last_space_id; ++id) {
MutableSpace* sp = PSParallelCompact::space(SpaceId(id));
- HeapWord* dense_prefix_addr = dense_prefix(SpaceId(id));
+ // Only verify objs after dense-prefix, because those before dense-prefix are not moved (forwarded).
+ HeapWord* cur_addr = dense_prefix(SpaceId(id));
HeapWord* top = sp->top();
- HeapWord* cur_addr = dense_prefix_addr;
while (cur_addr < top) {
cur_addr = mark_bitmap()->find_obj_beg(cur_addr, top);
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
index d5ed641f485..4d212499b4c 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
@@ -360,11 +360,6 @@ public:
inline RegionData* region(size_t region_idx) const;
inline size_t region(const RegionData* const region_ptr) const;
- // Fill in the regions covering [beg, end) so that no data moves; i.e., the
- // destination of region n is simply the start of region n. Both arguments
- // beg and end must be region-aligned.
- void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
-
HeapWord* summarize_split_space(size_t src_region, SplitInfo& split_info,
HeapWord* destination, HeapWord* target_end,
HeapWord** target_next);
diff --git a/src/hotspot/share/gc/parallel/psScavenge.cpp b/src/hotspot/share/gc/parallel/psScavenge.cpp
index d32a8d239d1..0af2ab1fd68 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp
@@ -99,7 +99,7 @@ static void scavenge_roots_work(ParallelRootType::Value root_type, uint worker_i
case ParallelRootType::code_cache:
{
- MarkingNMethodClosure code_closure(&roots_to_old_closure, NMethodToOopClosure::FixRelocations, false /* keepalive nmethods */);
+ NMethodToOopClosure code_closure(&roots_to_old_closure, NMethodToOopClosure::FixRelocations);
ScavengableNMethods::nmethods_do(&code_closure);
}
break;
@@ -216,25 +216,24 @@ public:
};
class PSThreadRootsTaskClosure : public ThreadClosure {
- uint _worker_id;
+ PSPromotionManager* _pm;
public:
- PSThreadRootsTaskClosure(uint worker_id) : _worker_id(worker_id) { }
+ PSThreadRootsTaskClosure(PSPromotionManager* pm) : _pm(pm) {}
virtual void do_thread(Thread* thread) {
assert(ParallelScavengeHeap::heap()->is_stw_gc_active(), "called outside gc");
- PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(_worker_id);
- PSScavengeRootsClosure roots_closure(pm);
+ PSScavengeRootsClosure roots_closure(_pm);
// No need to visit nmethods, because they are handled by ScavengableNMethods.
thread->oops_do(&roots_closure, nullptr);
// Do the real work
- pm->drain_stacks(false);
+ _pm->drain_stacks(false);
}
};
class ScavengeRootsTask : public WorkerTask {
- StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do
+ ThreadsClaimTokenScope _threads_claim_token_scope; // needed for Threads::possibly_parallel_threads_do
OopStorageSetStrongParState _oop_storage_strong_par_state;
SequentialSubTasksDone _subtasks;
PSOldGen* _old_gen;
@@ -247,7 +246,7 @@ public:
ScavengeRootsTask(PSOldGen* old_gen,
uint active_workers) :
WorkerTask("ScavengeRootsTask"),
- _strong_roots_scope(active_workers),
+ _threads_claim_token_scope(),
_subtasks(ParallelRootType::sentinel),
_old_gen(old_gen),
_gen_top(old_gen->object_space()->top()),
@@ -263,12 +262,12 @@ public:
virtual void work(uint worker_id) {
assert(worker_id < _active_workers, "Sanity");
ResourceMark rm;
+ PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(worker_id);
if (!_is_old_gen_empty) {
// There are only old-to-young pointers if there are objects
// in the old gen.
{
- PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(worker_id);
PSCardTable* card_table = ParallelScavengeHeap::heap()->card_table();
// The top of the old gen changes during scavenge when objects are promoted.
@@ -288,14 +287,14 @@ public:
scavenge_roots_work(static_cast(root_type), worker_id);
}
- PSThreadRootsTaskClosure closure(worker_id);
- Threads::possibly_parallel_threads_do(_active_workers > 1 /* is_par */, &closure);
+ PSThreadRootsTaskClosure thread_closure(pm);
+ Threads::possibly_parallel_threads_do(_active_workers > 1 /* is_par */, &thread_closure);
// Scavenge OopStorages
{
- PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(worker_id);
- PSScavengeRootsClosure closure(pm);
- _oop_storage_strong_par_state.oops_do(&closure);
+ PSScavengeRootsClosure root_closure(pm);
+ _oop_storage_strong_par_state.oops_do(&root_closure);
+
// Do the real work
pm->drain_stacks(false);
}
diff --git a/src/hotspot/share/gc/serial/defNewGeneration.cpp b/src/hotspot/share/gc/serial/defNewGeneration.cpp
index b5d65793b90..bcd131a5fa2 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@@ -600,13 +600,11 @@ bool DefNewGeneration::collect(bool clear_all_soft_refs) {
&old_gen_cl);
{
- StrongRootsScope srs(0);
RootScanClosure oop_closure{this};
CLDScanClosure cld_closure{this};
- MarkingNMethodClosure nmethod_closure(&oop_closure,
- NMethodToOopClosure::FixRelocations,
- false /* keepalive_nmethods */);
+ NMethodToOopClosure nmethod_closure(&oop_closure,
+ NMethodToOopClosure::FixRelocations);
// Starting tracing from roots, there are 4 kinds of roots in young-gc.
//
diff --git a/src/hotspot/share/gc/serial/serialFullGC.cpp b/src/hotspot/share/gc/serial/serialFullGC.cpp
index fc63b81b7ce..d45454a768f 100644
--- a/src/hotspot/share/gc/serial/serialFullGC.cpp
+++ b/src/hotspot/share/gc/serial/serialFullGC.cpp
@@ -485,9 +485,7 @@ void SerialFullGC::phase1_mark(bool clear_all_softrefs) {
{
StrongRootsScope srs(0);
- MarkingNMethodClosure mark_code_closure(&follow_root_closure,
- !NMethodToOopClosure::FixRelocations,
- true);
+ MarkingNMethodClosure mark_code_closure(&follow_root_closure);
// Start tracing from roots, there are 3 kinds of roots in full-gc.
//
diff --git a/src/hotspot/share/gc/serial/serialHeap.cpp b/src/hotspot/share/gc/serial/serialHeap.cpp
index 662a6be695b..f26e4427062 100644
--- a/src/hotspot/share/gc/serial/serialHeap.cpp
+++ b/src/hotspot/share/gc/serial/serialHeap.cpp
@@ -292,27 +292,44 @@ HeapWord* SerialHeap::expand_heap_and_allocate(size_t size, bool is_tlab) {
return result;
}
+HeapWord* SerialHeap::mem_allocate_cas_noexpand(size_t size, bool is_tlab) {
+ HeapWord* result = _young_gen->par_allocate(size);
+ if (result != nullptr) {
+ return result;
+ }
+ // Try old-gen allocation for non-TLAB.
+ if (!is_tlab) {
+ // If it's too large for young-gen or heap is too full.
+ if (size > heap_word_size(_young_gen->capacity_before_gc()) || _is_heap_almost_full) {
+ result = _old_gen->par_allocate(size);
+ if (result != nullptr) {
+ return result;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
HeapWord* result = nullptr;
for (uint try_count = 1; /* break */; try_count++) {
- result = _young_gen->par_allocate(size);
+ result = mem_allocate_cas_noexpand(size, is_tlab);
if (result != nullptr) {
break;
}
- // Try old-gen allocation for non-TLAB.
- if (!is_tlab) {
- // If it's too large for young-gen or heap is too full.
- if (size > heap_word_size(_young_gen->capacity_before_gc()) || _is_heap_almost_full) {
- result = _old_gen->par_allocate(size);
- if (result != nullptr) {
- break;
- }
- }
- }
uint gc_count_before; // Read inside the Heap_lock locked region.
{
MutexLocker ml(Heap_lock);
+
+ // Re-try after acquiring the lock, because a GC might have occurred
+ // while waiting for this lock.
+ result = mem_allocate_cas_noexpand(size, is_tlab);
+ if (result != nullptr) {
+ break;
+ }
+
gc_count_before = total_collections();
}
@@ -340,11 +357,6 @@ HeapWord* SerialHeap::mem_allocate(size_t size) {
false /* is_tlab */);
}
-bool SerialHeap::must_clear_all_soft_refs() {
- return _gc_cause == GCCause::_metadata_GC_clear_soft_refs ||
- _gc_cause == GCCause::_wb_full_gc;
-}
-
bool SerialHeap::is_young_gc_safe() const {
if (!_young_gen->to()->is_empty()) {
return false;
@@ -497,7 +509,7 @@ void SerialHeap::scan_evacuated_objs(YoungGenScanClosure* young_cl,
void SerialHeap::collect_at_safepoint(bool full) {
assert(!GCLocker::is_active(), "precondition");
- bool clear_soft_refs = must_clear_all_soft_refs();
+ bool clear_soft_refs = GCCause::should_clear_all_soft_refs(_gc_cause);
if (!full) {
bool success = do_young_collection(clear_soft_refs);
diff --git a/src/hotspot/share/gc/serial/serialHeap.hpp b/src/hotspot/share/gc/serial/serialHeap.hpp
index 72778981eee..388da13b1b0 100644
--- a/src/hotspot/share/gc/serial/serialHeap.hpp
+++ b/src/hotspot/share/gc/serial/serialHeap.hpp
@@ -31,7 +31,6 @@
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/oopStorageParState.hpp"
#include "gc/shared/preGCValues.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "utilities/growableArray.hpp"
class CardTableRS;
@@ -104,10 +103,6 @@ private:
void do_full_collection(bool clear_all_soft_refs) override;
- // Does the "cause" of GC indicate that
- // we absolutely __must__ clear soft refs?
- bool must_clear_all_soft_refs();
-
bool is_young_gc_safe() const;
void gc_prologue();
@@ -227,6 +222,7 @@ private:
// Try to allocate space by expanding the heap.
HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
+ HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
HeapWord* mem_allocate_work(size_t size, bool is_tlab);
MemoryPool* _eden_pool;
diff --git a/src/hotspot/share/gc/shared/cardTable.cpp b/src/hotspot/share/gc/shared/cardTable.cpp
index 76b8eb4d718..34f1847befe 100644
--- a/src/hotspot/share/gc/shared/cardTable.cpp
+++ b/src/hotspot/share/gc/shared/cardTable.cpp
@@ -225,6 +225,9 @@ uintx CardTable::ct_max_alignment_constraint() {
#ifndef PRODUCT
void CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
+ if (mr.is_empty()) {
+ return;
+ }
CardValue* start = byte_for(mr.start());
CardValue* end = byte_for(mr.last());
bool failures = false;
@@ -255,7 +258,8 @@ void CardTable::verify_dirty_region(MemRegion mr) {
}
#endif
-void CardTable::print_on(outputStream* st) const {
- st->print_cr("Card table byte_map: [" PTR_FORMAT "," PTR_FORMAT "] _byte_map_base: " PTR_FORMAT,
+void CardTable::print_on(outputStream* st, const char* description) const {
+ st->print_cr("%s table byte_map: [" PTR_FORMAT "," PTR_FORMAT "] _byte_map_base: " PTR_FORMAT,
+ description,
p2i(_byte_map), p2i(_byte_map + _byte_map_size), p2i(_byte_map_base));
}
diff --git a/src/hotspot/share/gc/shared/cardTable.hpp b/src/hotspot/share/gc/shared/cardTable.hpp
index ee41be06be0..63dcfe7aecb 100644
--- a/src/hotspot/share/gc/shared/cardTable.hpp
+++ b/src/hotspot/share/gc/shared/cardTable.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -203,12 +203,12 @@ public:
virtual bool is_in_young(const void* p) const = 0;
- // Print a description of the memory for the card table
- virtual void print_on(outputStream* st) const;
+ // Print card table information.
+ void print_on(outputStream* st, const char* description = "Card") const;
// val_equals -> it will check that all cards covered by mr equal val
// !val_equals -> it will check that all cards covered by mr do not equal val
- void verify_region(MemRegion mr, CardValue val, bool val_equals) PRODUCT_RETURN;
+ virtual void verify_region(MemRegion mr, CardValue val, bool val_equals) PRODUCT_RETURN;
void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
};
diff --git a/src/hotspot/share/gc/shared/collectedHeap.cpp b/src/hotspot/share/gc/shared/collectedHeap.cpp
index 71017817d14..9b6956ca75a 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp
@@ -276,7 +276,6 @@ bool CollectedHeap::is_oop(oop object) const {
CollectedHeap::CollectedHeap() :
_capacity_at_last_gc(0),
_used_at_last_gc(0),
- _soft_ref_policy(),
_is_stw_gc_active(false),
_last_whole_heap_examined_time_ns(os::javaTimeNanos()),
_total_collections(0),
diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp
index 57bd9316731..f4f5ce79074 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp
@@ -27,7 +27,6 @@
#include "gc/shared/gcCause.hpp"
#include "gc/shared/gcWhen.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "gc/shared/verifyOption.hpp"
#include "memory/allocation.hpp"
#include "memory/metaspace.hpp"
@@ -104,8 +103,6 @@ class CollectedHeap : public CHeapObj {
size_t _capacity_at_last_gc;
size_t _used_at_last_gc;
- SoftRefPolicy _soft_ref_policy;
-
// First, set it to java_lang_Object.
// Then, set it to FillerObject after the FillerObject_klass loading is complete.
static Klass* _filler_object_klass;
@@ -395,9 +392,6 @@ protected:
}
}
- // Return the SoftRefPolicy for the heap;
- SoftRefPolicy* soft_ref_policy() { return &_soft_ref_policy; }
-
virtual MemoryUsage memory_usage();
virtual GrowableArray memory_managers() = 0;
virtual GrowableArray memory_pools() = 0;
diff --git a/src/hotspot/share/gc/shared/gcCause.hpp b/src/hotspot/share/gc/shared/gcCause.hpp
index f775d41340d..aac4b801b74 100644
--- a/src/hotspot/share/gc/shared/gcCause.hpp
+++ b/src/hotspot/share/gc/shared/gcCause.hpp
@@ -103,6 +103,13 @@ class GCCause : public AllStatic {
cause == _codecache_GC_aggressive);
}
+ // Does the "cause" of GC indicate that
+ // we absolutely __must__ clear soft refs?
+ inline static bool should_clear_all_soft_refs(GCCause::Cause cause) {
+ return cause == GCCause::_metadata_GC_clear_soft_refs ||
+ cause == GCCause::_wb_full_gc;
+ }
+
// Return a string describing the GCCause.
static const char* to_string(GCCause::Cause cause);
};
diff --git a/src/hotspot/share/gc/shared/gcVMOperations.cpp b/src/hotspot/share/gc/shared/gcVMOperations.cpp
index 1299f64995f..97bae4f6d44 100644
--- a/src/hotspot/share/gc/shared/gcVMOperations.cpp
+++ b/src/hotspot/share/gc/shared/gcVMOperations.cpp
@@ -29,7 +29,6 @@
#include "gc/shared/gcId.hpp"
#include "gc/shared/gcLocker.hpp"
#include "gc/shared/gcVMOperations.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "interpreter/oopMapCache.hpp"
#include "logging/log.hpp"
#include "memory/classLoaderMetaspace.hpp"
diff --git a/src/hotspot/share/gc/shared/gc_globals.hpp b/src/hotspot/share/gc/shared/gc_globals.hpp
index 240068f10c0..0b245026d68 100644
--- a/src/hotspot/share/gc/shared/gc_globals.hpp
+++ b/src/hotspot/share/gc/shared/gc_globals.hpp
@@ -289,7 +289,7 @@
"size on systems with small physical memory size") \
range(0.0, 100.0) \
\
- product(double, InitialRAMPercentage, 1.5625, \
+ product(double, InitialRAMPercentage, 0.2, \
"Percentage of real memory used for initial heap size") \
range(0.0, 100.0) \
\
diff --git a/src/hotspot/share/gc/shared/workerDataArray.hpp b/src/hotspot/share/gc/shared/workerDataArray.hpp
index b2a81bc9482..587f9bbd167 100644
--- a/src/hotspot/share/gc/shared/workerDataArray.hpp
+++ b/src/hotspot/share/gc/shared/workerDataArray.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@ template
class WorkerDataArray : public CHeapObj {
friend class WDAPrinter;
public:
- static const uint MaxThreadWorkItems = 9;
+ static const uint MaxThreadWorkItems = 10;
private:
T* _data;
uint _length;
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
index dfae9040242..c7067b2e5ab 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
@@ -26,7 +26,6 @@
#include "gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp"
#include "gc/shenandoah/shenandoahCollectionSet.hpp"
#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
-#include "gc/shenandoah/shenandoahEvacInfo.hpp"
#include "gc/shenandoah/shenandoahGeneration.hpp"
#include "gc/shenandoah/shenandoahGenerationalHeap.inline.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
@@ -185,59 +184,16 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio
heap->shenandoah_policy()->record_mixed_cycle();
}
- size_t cset_percent = (total_garbage == 0) ? 0 : (collection_set->garbage() * 100 / total_garbage);
- size_t collectable_garbage = collection_set->garbage() + immediate_garbage;
- size_t collectable_garbage_percent = (total_garbage == 0) ? 0 : (collectable_garbage * 100 / total_garbage);
+ collection_set->summarize(total_garbage, immediate_garbage, immediate_regions);
- log_info(gc, ergo)("Collectable Garbage: %zu%s (%zu%%), "
- "Immediate: %zu%s (%zu%%), %zu regions, "
- "CSet: %zu%s (%zu%%), %zu regions",
-
- byte_size_in_proper_unit(collectable_garbage),
- proper_unit_for_byte_size(collectable_garbage),
- collectable_garbage_percent,
-
- byte_size_in_proper_unit(immediate_garbage),
- proper_unit_for_byte_size(immediate_garbage),
- immediate_percent,
- immediate_regions,
-
- byte_size_in_proper_unit(collection_set->garbage()),
- proper_unit_for_byte_size(collection_set->garbage()),
- cset_percent,
- collection_set->count());
-
- if (collection_set->garbage() > 0) {
- size_t young_evac_bytes = collection_set->get_young_bytes_reserved_for_evacuation();
- size_t promote_evac_bytes = collection_set->get_young_bytes_to_be_promoted();
- size_t old_evac_bytes = collection_set->get_old_bytes_reserved_for_evacuation();
- size_t total_evac_bytes = young_evac_bytes + promote_evac_bytes + old_evac_bytes;
- log_info(gc, ergo)("Evacuation Targets: YOUNG: %zu%s, "
- "PROMOTE: %zu%s, "
- "OLD: %zu%s, "
- "TOTAL: %zu%s",
- byte_size_in_proper_unit(young_evac_bytes), proper_unit_for_byte_size(young_evac_bytes),
- byte_size_in_proper_unit(promote_evac_bytes), proper_unit_for_byte_size(promote_evac_bytes),
- byte_size_in_proper_unit(old_evac_bytes), proper_unit_for_byte_size(old_evac_bytes),
- byte_size_in_proper_unit(total_evac_bytes), proper_unit_for_byte_size(total_evac_bytes));
-
- ShenandoahEvacuationInformation evacInfo;
- evacInfo.set_collection_set_regions(collection_set->count());
- evacInfo.set_collection_set_used_before(collection_set->used());
- evacInfo.set_collection_set_used_after(collection_set->live());
- evacInfo.set_collected_old(old_evac_bytes);
- evacInfo.set_collected_promoted(promote_evac_bytes);
- evacInfo.set_collected_young(young_evac_bytes);
- evacInfo.set_regions_promoted_humongous(humongous_regions_promoted);
- evacInfo.set_regions_promoted_regular(regular_regions_promoted_in_place);
- evacInfo.set_regular_promoted_garbage(regular_regions_promoted_garbage);
- evacInfo.set_regular_promoted_free(regular_regions_promoted_free);
- evacInfo.set_regions_immediate(immediate_regions);
- evacInfo.set_immediate_size(immediate_garbage);
- evacInfo.set_free_regions(free_regions);
-
- ShenandoahTracer().report_evacuation_info(&evacInfo);
- }
+ ShenandoahTracer::report_evacuation_info(collection_set,
+ free_regions,
+ humongous_regions_promoted,
+ regular_regions_promoted_in_place,
+ regular_regions_promoted_garbage,
+ regular_regions_promoted_free,
+ immediate_regions,
+ immediate_garbage);
}
@@ -268,15 +224,3 @@ size_t ShenandoahGenerationalHeuristics::add_preselected_regions_to_collection_s
return cur_young_garbage;
}
-void ShenandoahGenerationalHeuristics::log_cset_composition(ShenandoahCollectionSet* cset) const {
- size_t collected_old = cset->get_old_bytes_reserved_for_evacuation();
- size_t collected_promoted = cset->get_young_bytes_to_be_promoted();
- size_t collected_young = cset->get_young_bytes_reserved_for_evacuation();
-
- log_info(gc, ergo)(
- "Chosen CSet evacuates young: %zu%s (of which at least: %zu%s are to be promoted), "
- "old: %zu%s",
- byte_size_in_proper_unit(collected_young), proper_unit_for_byte_size(collected_young),
- byte_size_in_proper_unit(collected_promoted), proper_unit_for_byte_size(collected_promoted),
- byte_size_in_proper_unit(collected_old), proper_unit_for_byte_size(collected_old));
-}
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
index 6708c63f042..31c016bb4b7 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
@@ -51,9 +51,8 @@ protected:
size_t add_preselected_regions_to_collection_set(ShenandoahCollectionSet* cset,
const RegionData* data,
size_t size) const;
-
- void log_cset_composition(ShenandoahCollectionSet* cset) const;
};
#endif //SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHGENERATIONALHEURISTICS_HPP
+
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
index 331bd040575..93f9b18ad9f 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
@@ -42,8 +42,6 @@ void ShenandoahGlobalHeuristics::choose_collection_set_from_regiondata(Shenandoa
QuickSort::sort(data, (int) size, compare_by_garbage);
choose_global_collection_set(cset, data, size, actual_free, 0 /* cur_young_garbage */);
-
- log_cset_composition(cset);
}
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
index b151a75e6e7..c8a0c3dc518 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
@@ -153,27 +153,7 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
choose_collection_set_from_regiondata(collection_set, candidates, cand_idx, immediate_garbage + free);
}
- size_t cset_percent = (total_garbage == 0) ? 0 : (collection_set->garbage() * 100 / total_garbage);
- size_t collectable_garbage = collection_set->garbage() + immediate_garbage;
- size_t collectable_garbage_percent = (total_garbage == 0) ? 0 : (collectable_garbage * 100 / total_garbage);
-
- log_info(gc, ergo)("Collectable Garbage: %zu%s (%zu%%), "
- "Immediate: %zu%s (%zu%%), %zu regions, "
- "CSet: %zu%s (%zu%%), %zu regions",
-
- byte_size_in_proper_unit(collectable_garbage),
- proper_unit_for_byte_size(collectable_garbage),
- collectable_garbage_percent,
-
- byte_size_in_proper_unit(immediate_garbage),
- proper_unit_for_byte_size(immediate_garbage),
- immediate_percent,
- immediate_regions,
-
- byte_size_in_proper_unit(collection_set->garbage()),
- proper_unit_for_byte_size(collection_set->garbage()),
- cset_percent,
- collection_set->count());
+ collection_set->summarize(total_garbage, immediate_garbage, immediate_regions);
}
void ShenandoahHeuristics::record_cycle_start() {
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.cpp
index 2d0bbfd5e4a..2361a50e76d 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.cpp
@@ -412,7 +412,7 @@ void ShenandoahOldHeuristics::prepare_for_old_collections() {
size_t defrag_count = 0;
size_t total_uncollected_old_regions = _last_old_region - _last_old_collection_candidate;
- if (cand_idx > _last_old_collection_candidate) {
+ if ((ShenandoahGenerationalHumongousReserve > 0) && (cand_idx > _last_old_collection_candidate)) {
// Above, we have added into the set of mixed-evacuation candidates all old-gen regions for which the live memory
// that they contain is below a particular old-garbage threshold. Regions that were not selected for the collection
// set hold enough live memory that it is not considered efficient (by "garbage-first standards") to compact these
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp
index d236be8c9e6..15d1058d7cd 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp
@@ -55,8 +55,6 @@ void ShenandoahYoungHeuristics::choose_collection_set_from_regiondata(Shenandoah
size_t cur_young_garbage = add_preselected_regions_to_collection_set(cset, data, size);
choose_young_collection_set(cset, data, size, actual_free, cur_young_garbage);
-
- log_cset_composition(cset);
}
void ShenandoahYoungHeuristics::choose_young_collection_set(ShenandoahCollectionSet* cset,
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
index 93c218e9e8b..3d9fa10b0fc 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
@@ -543,7 +543,8 @@ void ShenandoahAsserts::assert_control_or_vm_thread_at_safepoint(bool at_safepoi
}
void ShenandoahAsserts::assert_generations_reconciled(const char* file, int line) {
- if (!SafepointSynchronize::is_at_safepoint()) {
+ if (!ShenandoahSafepoint::is_at_shenandoah_safepoint()) {
+ // Only shenandoah safepoint operations participate in the active/gc generation scheme
return;
}
@@ -554,7 +555,7 @@ void ShenandoahAsserts::assert_generations_reconciled(const char* file, int line
return;
}
- ShenandoahMessageBuffer msg("Active(%d) & GC(%d) Generations aren't reconciled", agen->type(), ggen->type());
+ ShenandoahMessageBuffer msg("Active(%s) & GC(%s) Generations aren't reconciled", agen->name(), ggen->name());
report_vm_error(file, line, msg.buffer());
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp
index a58b9311183..745d45ace1e 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.cpp
@@ -200,3 +200,38 @@ void ShenandoahCollectionSet::print_on(outputStream* out) const {
}
assert(regions == count(), "Must match");
}
+
+void ShenandoahCollectionSet::summarize(size_t total_garbage, size_t immediate_garbage, size_t immediate_regions) const {
+ const LogTarget(Info, gc, ergo) lt;
+ LogStream ls(lt);
+ if (lt.is_enabled()) {
+ const size_t cset_percent = (total_garbage == 0) ? 0 : (garbage() * 100 / total_garbage);
+ const size_t collectable_garbage = garbage() + immediate_garbage;
+ const size_t collectable_garbage_percent = (total_garbage == 0) ? 0 : (collectable_garbage * 100 / total_garbage);
+ const size_t immediate_percent = (total_garbage == 0) ? 0 : (immediate_garbage * 100 / total_garbage);
+
+ ls.print_cr("Collectable Garbage: " PROPERFMT " (%zu%%), "
+ "Immediate: " PROPERFMT " (%zu%%), %zu regions, "
+ "CSet: " PROPERFMT " (%zu%%), %zu regions",
+ PROPERFMTARGS(collectable_garbage),
+ collectable_garbage_percent,
+
+ PROPERFMTARGS(immediate_garbage),
+ immediate_percent,
+ immediate_regions,
+
+ PROPERFMTARGS(garbage()),
+ cset_percent,
+ count());
+
+ if (garbage() > 0) {
+ const size_t young_evac_bytes = get_young_bytes_reserved_for_evacuation();
+ const size_t promote_evac_bytes = get_young_bytes_to_be_promoted();
+ const size_t old_evac_bytes = get_old_bytes_reserved_for_evacuation();
+ const size_t total_evac_bytes = young_evac_bytes + promote_evac_bytes + old_evac_bytes;
+ ls.print_cr("Evacuation Targets: "
+ "YOUNG: " PROPERFMT ", " "PROMOTE: " PROPERFMT ", " "OLD: " PROPERFMT ", " "TOTAL: " PROPERFMT,
+ PROPERFMTARGS(young_evac_bytes), PROPERFMTARGS(promote_evac_bytes), PROPERFMTARGS(old_evac_bytes), PROPERFMTARGS(total_evac_bytes));
+ }
+ }
+}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.hpp
index 4f9f6fc2052..d4a590a3d89 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.hpp
@@ -103,17 +103,26 @@ public:
inline bool is_in(oop obj) const;
inline bool is_in_loc(void* loc) const;
+ // Prints a detailed accounting of all regions in the collection set when gc+cset=debug
void print_on(outputStream* out) const;
- // It is not known how many of these bytes will be promoted.
- inline size_t get_young_bytes_reserved_for_evacuation();
- inline size_t get_old_bytes_reserved_for_evacuation();
+ // Prints a summary of the collection set when gc+ergo=info
+ void summarize(size_t total_garbage, size_t immediate_garbage, size_t immediate_regions) const;
- inline size_t get_young_bytes_to_be_promoted();
+ // Returns the amount of live bytes in young regions in the collection set. It is not known how many of these bytes will be promoted.
+ inline size_t get_young_bytes_reserved_for_evacuation() const;
- size_t get_young_available_bytes_collected() { return _young_available_bytes_collected; }
+ // Returns the amount of live bytes in old regions in the collection set.
+ inline size_t get_old_bytes_reserved_for_evacuation() const;
- inline size_t get_old_garbage();
+ // Returns the amount of live bytes in young regions with an age above the tenuring threshold.
+ inline size_t get_young_bytes_to_be_promoted() const;
+
+ // Returns the amount of free bytes in young regions in the collection set.
+ size_t get_young_available_bytes_collected() const { return _young_available_bytes_collected; }
+
+ // Returns the amount of garbage in old regions in the collection set.
+ inline size_t get_old_garbage() const;
bool is_preselected(size_t region_idx) {
assert(_preselected_regions != nullptr, "Missing etsablish after abandon");
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.inline.hpp
index 791e9c73b28..4adcec4fbb5 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectionSet.inline.hpp
@@ -54,19 +54,19 @@ bool ShenandoahCollectionSet::is_in_loc(void* p) const {
return _biased_cset_map[index] == 1;
}
-size_t ShenandoahCollectionSet::get_old_bytes_reserved_for_evacuation() {
+size_t ShenandoahCollectionSet::get_old_bytes_reserved_for_evacuation() const {
return _old_bytes_to_evacuate;
}
-size_t ShenandoahCollectionSet::get_young_bytes_reserved_for_evacuation() {
+size_t ShenandoahCollectionSet::get_young_bytes_reserved_for_evacuation() const {
return _young_bytes_to_evacuate - _young_bytes_to_promote;
}
-size_t ShenandoahCollectionSet::get_young_bytes_to_be_promoted() {
+size_t ShenandoahCollectionSet::get_young_bytes_to_be_promoted() const {
return _young_bytes_to_promote;
}
-size_t ShenandoahCollectionSet::get_old_garbage() {
+size_t ShenandoahCollectionSet::get_old_garbage() const {
return _old_garbage;
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
index 111e0073470..6d3b93ac406 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
@@ -114,6 +114,8 @@ void ShenandoahConcurrentGC::entry_concurrent_update_refs_prepare(ShenandoahHeap
bool ShenandoahConcurrentGC::collect(GCCause::Cause cause) {
ShenandoahHeap* const heap = ShenandoahHeap::heap();
+ _generation->ref_processor()->set_soft_reference_policy(
+ GCCause::should_clear_all_soft_refs(cause));
ShenandoahBreakpointGCScope breakpoint_gc_scope(cause);
@@ -732,7 +734,6 @@ void ShenandoahConcurrentGC::op_init_mark() {
// Weak reference processing
ShenandoahReferenceProcessor* rp = _generation->ref_processor();
rp->reset_thread_locals();
- rp->set_soft_reference_policy(heap->soft_ref_policy()->should_clear_all_soft_refs());
// Make above changes visible to worker threads
OrderAccess::fence();
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
index 6290101bc49..f4005e45f39 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
@@ -34,6 +34,7 @@
#include "gc/shenandoah/shenandoahGeneration.hpp"
#include "gc/shenandoah/shenandoahHeap.inline.hpp"
#include "gc/shenandoah/shenandoahMonitoringSupport.hpp"
+#include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
#include "gc/shenandoah/shenandoahUtils.hpp"
#include "logging/log.hpp"
#include "memory/metaspaceStats.hpp"
@@ -118,7 +119,7 @@ void ShenandoahControlThread::run_service() {
// Blow all soft references on this cycle, if handling allocation failure,
// either implicit or explicit GC request, or we are requested to do so unconditionally.
if (alloc_failure_pending || is_gc_requested || ShenandoahAlwaysClearSoftRefs) {
- heap->soft_ref_policy()->set_should_clear_all_soft_refs(true);
+ heap->global_generation()->ref_processor()->set_soft_reference_policy(true);
}
const bool gc_requested = (mode != none);
@@ -193,7 +194,7 @@ void ShenandoahControlThread::run_service() {
heap->set_forced_counters_update(false);
// Retract forceful part of soft refs policy
- heap->soft_ref_policy()->set_should_clear_all_soft_refs(false);
+ heap->global_generation()->ref_processor()->set_soft_reference_policy(false);
// Clear metaspace oom flag, if current cycle unloaded classes
if (heap->unload_classes()) {
@@ -210,12 +211,11 @@ void ShenandoahControlThread::run_service() {
ResourceMark rm;
LogStream ls(lt);
heap->phase_timings()->print_cycle_on(&ls);
-#ifdef NOT_PRODUCT
- ShenandoahEvacuationTracker* evac_tracker = heap->evac_tracker();
- ShenandoahCycleStats evac_stats = evac_tracker->flush_cycle_to_global();
- evac_tracker->print_evacuations_on(&ls, &evac_stats.workers,
- &evac_stats.mutators);
-#endif
+ if (ShenandoahEvacTracking) {
+ ShenandoahEvacuationTracker* evac_tracker = heap->evac_tracker();
+ ShenandoahCycleStats evac_stats = evac_tracker->flush_cycle_to_global();
+ evac_tracker->print_evacuations_on(&ls, &evac_stats.workers, &evac_stats.mutators);
+ }
}
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahEvacInfo.hpp b/src/hotspot/share/gc/shenandoah/shenandoahEvacInfo.hpp
deleted file mode 100644
index 8069fd13afa..00000000000
--- a/src/hotspot/share/gc/shenandoah/shenandoahEvacInfo.hpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHEVACINFO_HPP
-#define SHARE_GC_SHENANDOAH_SHENANDOAHEVACINFO_HPP
-
-#include "memory/allocation.hpp"
-
-class ShenandoahEvacuationInformation : public StackObj {
- // Values for ShenandoahEvacuationInformation jfr event, sizes stored as bytes
- size_t _collection_set_regions;
- size_t _collection_set_used_before;
- size_t _collection_set_used_after;
- size_t _collected_old;
- size_t _collected_promoted;
- size_t _collected_young;
- size_t _free_regions;
- size_t _regions_promoted_humongous;
- size_t _regions_promoted_regular;
- size_t _regular_promoted_garbage;
- size_t _regular_promoted_free;
- size_t _regions_immediate;
- size_t _immediate_size;
-
-public:
- ShenandoahEvacuationInformation() :
- _collection_set_regions(0), _collection_set_used_before(0), _collection_set_used_after(0),
- _collected_old(0), _collected_promoted(0), _collected_young(0), _free_regions(0),
- _regions_promoted_humongous(0), _regions_promoted_regular(0), _regular_promoted_garbage(0),
- _regular_promoted_free(0), _regions_immediate(0), _immediate_size(0) { }
-
- void set_collection_set_regions(size_t collection_set_regions) {
- _collection_set_regions = collection_set_regions;
- }
-
- void set_collection_set_used_before(size_t used) {
- _collection_set_used_before = used;
- }
-
- void set_collection_set_used_after(size_t used) {
- _collection_set_used_after = used;
- }
-
- void set_collected_old(size_t collected) {
- _collected_old = collected;
- }
-
- void set_collected_promoted(size_t collected) {
- _collected_promoted = collected;
- }
-
- void set_collected_young(size_t collected) {
- _collected_young = collected;
- }
-
- void set_free_regions(size_t freed) {
- _free_regions = freed;
- }
-
- void set_regions_promoted_humongous(size_t humongous) {
- _regions_promoted_humongous = humongous;
- }
-
- void set_regions_promoted_regular(size_t regular) {
- _regions_promoted_regular = regular;
- }
-
- void set_regular_promoted_garbage(size_t garbage) {
- _regular_promoted_garbage = garbage;
- }
-
- void set_regular_promoted_free(size_t free) {
- _regular_promoted_free = free;
- }
-
- void set_regions_immediate(size_t immediate) {
- _regions_immediate = immediate;
- }
-
- void set_immediate_size(size_t size) {
- _immediate_size = size;
- }
-
- size_t collection_set_regions() { return _collection_set_regions; }
- size_t collection_set_used_before() { return _collection_set_used_before; }
- size_t collection_set_used_after() { return _collection_set_used_after; }
- size_t collected_old() { return _collected_old; }
- size_t collected_promoted() { return _collected_promoted; }
- size_t collected_young() { return _collected_young; }
- size_t regions_promoted_humongous() { return _regions_promoted_humongous; }
- size_t regions_promoted_regular() { return _regions_promoted_regular; }
- size_t regular_promoted_garbage() { return _regular_promoted_garbage; }
- size_t regular_promoted_free() { return _regular_promoted_free; }
- size_t free_regions() { return _free_regions; }
- size_t regions_immediate() { return _regions_immediate; }
- size_t immediate_size() { return _immediate_size; }
-};
-
-#endif // SHARE_GC_SHENANDOAH_SHENANDOAHEVACINFO_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahEvacTracker.cpp b/src/hotspot/share/gc/shenandoah/shenandoahEvacTracker.cpp
index 7883e2c5b29..bc8fad713af 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahEvacTracker.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahEvacTracker.cpp
@@ -108,8 +108,10 @@ void ShenandoahEvacuationStats::ShenandoahEvacuations::print_on(outputStream* st
void ShenandoahEvacuationStats::print_on(outputStream* st) const {
st->print("Young: "); _young.print_on(st);
- st->print("Promotion: "); _promotion.print_on(st);
- st->print("Old: "); _old.print_on(st);
+ if (ShenandoahHeap::heap()->mode()->is_generational()) {
+ st->print("Promotion: "); _promotion.print_on(st);
+ st->print("Old: "); _old.print_on(st);
+ }
if (_use_age_table) {
_age_table->print_on(st);
@@ -123,25 +125,28 @@ void ShenandoahEvacuationTracker::print_global_on(outputStream* st) {
void ShenandoahEvacuationTracker::print_evacuations_on(outputStream* st,
ShenandoahEvacuationStats* workers,
ShenandoahEvacuationStats* mutators) {
- st->print_cr("Workers: ");
- workers->print_on(st);
- st->cr();
- st->print_cr("Mutators: ");
- mutators->print_on(st);
- st->cr();
+ if (ShenandoahEvacTracking) {
+ st->print_cr("Workers: ");
+ workers->print_on(st);
+ st->cr();
+ st->print_cr("Mutators: ");
+ mutators->print_on(st);
+ st->cr();
+ }
ShenandoahHeap* heap = ShenandoahHeap::heap();
-
- AgeTable young_region_ages(false);
- for (uint i = 0; i < heap->num_regions(); ++i) {
- ShenandoahHeapRegion* r = heap->get_region(i);
- if (r->is_young()) {
- young_region_ages.add(r->age(), r->get_live_data_words());
+ if (heap->mode()->is_generational()) {
+ AgeTable young_region_ages(false);
+ for (uint i = 0; i < heap->num_regions(); ++i) {
+ ShenandoahHeapRegion* r = heap->get_region(i);
+ if (r->is_young()) {
+ young_region_ages.add(r->age(), r->get_live_data_words());
+ }
}
+ st->print("Young regions: ");
+ young_region_ages.print_on(st);
+ st->cr();
}
- st->print("Young regions: ");
- young_region_ages.print_on(st);
- st->cr();
}
class ShenandoahStatAggregator : public ThreadClosure {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
index 25afcfcb10e..fafa3fde437 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
@@ -535,6 +535,8 @@ size_t ShenandoahGeneration::select_aged_regions(size_t old_available) {
const size_t old_garbage_threshold = (ShenandoahHeapRegion::region_size_bytes() * ShenandoahOldGarbageThreshold) / 100;
+ const size_t pip_used_threshold = (ShenandoahHeapRegion::region_size_bytes() * ShenandoahGenerationalMinPIPUsage) / 100;
+
size_t old_consumed = 0;
size_t promo_potential = 0;
size_t candidates = 0;
@@ -557,10 +559,8 @@ size_t ShenandoahGeneration::select_aged_regions(size_t old_available) {
continue;
}
if (heap->is_tenurable(r)) {
- if ((r->garbage() < old_garbage_threshold)) {
- // This tenure-worthy region has too little garbage, so we do not want to expend the copying effort to
- // reclaim the garbage; instead this region may be eligible for promotion-in-place to the
- // old generation.
+ if ((r->garbage() < old_garbage_threshold) && (r->used() > pip_used_threshold)) {
+ // We prefer to promote this region in place because is has a small amount of garbage and a large usage.
HeapWord* tams = ctx->top_at_mark_start(r);
HeapWord* original_top = r->top();
if (!heap->is_concurrent_old_mark_in_progress() && tams == original_top) {
@@ -586,7 +586,7 @@ size_t ShenandoahGeneration::select_aged_regions(size_t old_available) {
// Else, we do not promote this region (either in place or by copy) because it has received new allocations.
// During evacuation, we exclude from promotion regions for which age > tenure threshold, garbage < garbage-threshold,
- // and get_top_before_promote() != tams
+ // used > pip_used_threshold, and get_top_before_promote() != tams
} else {
// Record this promotion-eligible candidate region. After sorting and selecting the best candidates below,
// we may still decide to exclude this promotion-eligible region from the current collection set. If this
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
index ce8d96308ba..761ba02d569 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
@@ -37,6 +37,7 @@
#include "gc/shenandoah/shenandoahMonitoringSupport.hpp"
#include "gc/shenandoah/shenandoahOldGC.hpp"
#include "gc/shenandoah/shenandoahOldGeneration.hpp"
+#include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
#include "gc/shenandoah/shenandoahUtils.hpp"
#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
#include "logging/log.hpp"
@@ -216,8 +217,9 @@ void ShenandoahGenerationalControlThread::run_gc_cycle(const ShenandoahGCRequest
// Blow away all soft references on this cycle, if handling allocation failure,
// either implicit or explicit GC request, or we are requested to do so unconditionally.
- if (request.generation->is_global() && (ShenandoahCollectorPolicy::is_allocation_failure(request.cause) || ShenandoahCollectorPolicy::is_explicit_gc(request.cause) || ShenandoahAlwaysClearSoftRefs)) {
- _heap->soft_ref_policy()->set_should_clear_all_soft_refs(true);
+ if (GCCause::should_clear_all_soft_refs(request.cause) || (request.generation->is_global() &&
+ (ShenandoahCollectorPolicy::is_allocation_failure(request.cause) || ShenandoahCollectorPolicy::is_explicit_gc(request.cause) || ShenandoahAlwaysClearSoftRefs))) {
+ request.generation->ref_processor()->set_soft_reference_policy(true);
}
// GC is starting, bump the internal ID
@@ -289,7 +291,7 @@ void ShenandoahGenerationalControlThread::run_gc_cycle(const ShenandoahGCRequest
_heap->set_forced_counters_update(false);
// Retract forceful part of soft refs policy
- _heap->soft_ref_policy()->set_should_clear_all_soft_refs(false);
+ request.generation->ref_processor()->set_soft_reference_policy(false);
// Clear metaspace oom flag, if current cycle unloaded classes
if (_heap->unload_classes()) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalEvacuationTask.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalEvacuationTask.cpp
index 3a0d7926865..971129beea8 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalEvacuationTask.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalEvacuationTask.cpp
@@ -146,7 +146,13 @@ void ShenandoahGenerationalEvacuationTask::maybe_promote_region(ShenandoahHeapRe
// more garbage than ShenandoahOldGarbageThreshold, we'll promote by evacuation. If there is room for evacuation
// in this cycle, the region will be in the collection set. If there is not room, the region will be promoted
// by evacuation in some future GC cycle.
- promote_humongous(r);
+
+ // We do not promote primitive arrays because there's no performance penalty keeping them in young. When/if they
+ // become garbage, reclaiming the memory from young is much quicker and more efficient than reclaiming them from old.
+ oop obj = cast_to_oop(r->bottom());
+ if (!obj->is_typeArray()) {
+ promote_humongous(r);
+ }
} else if (r->is_regular() && (r->get_top_before_promote() != nullptr)) {
// Likewise, we cannot put promote-in-place regions into the collection set because that would also trigger
// the LRB to copy on reference fetch.
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
index e24109838a1..8a21ae376e1 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
@@ -324,8 +324,11 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
return ShenandoahBarrierSet::resolve_forwarded(p);
}
+ if (ShenandoahEvacTracking) {
+ evac_tracker()->begin_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
+ }
+
// Copy the object:
- NOT_PRODUCT(evac_tracker()->begin_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen));
Copy::aligned_disjoint_words(cast_from_oop(p), copy, size);
oop copy_val = cast_to_oop(copy);
@@ -346,8 +349,10 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
// safe to do this on the public copy (this is also done during concurrent mark).
ContinuationGCSupport::relativize_stack_chunk(copy_val);
- // Record that the evacuation succeeded
- NOT_PRODUCT(evac_tracker()->end_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen));
+ if (ShenandoahEvacTracking) {
+ // Record that the evacuation succeeded
+ evac_tracker()->end_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
+ }
if (target_gen == OLD_GENERATION) {
old_generation()->handle_evacuation(copy, size, from_region->is_young());
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
index c2dca09a344..714c9cb9f5b 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -1371,8 +1371,11 @@ oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, ShenandoahHeapReg
return ShenandoahBarrierSet::resolve_forwarded(p);
}
+ if (ShenandoahEvacTracking) {
+ evac_tracker()->begin_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
+ }
+
// Copy the object:
- NOT_PRODUCT(evac_tracker()->begin_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen));
Copy::aligned_disjoint_words(cast_from_oop(p), copy, size);
// Try to install the new forwarding pointer.
@@ -1382,7 +1385,9 @@ oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, ShenandoahHeapReg
// Successfully evacuated. Our copy is now the public one!
ContinuationGCSupport::relativize_stack_chunk(copy_val);
shenandoah_assert_correct(nullptr, copy_val);
- NOT_PRODUCT(evac_tracker()->end_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen));
+ if (ShenandoahEvacTracking) {
+ evac_tracker()->end_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
+ }
return copy_val;
} else {
// Failed to evacuate. We need to deal with the object that is left behind. Since this
@@ -1612,12 +1617,11 @@ void ShenandoahHeap::print_tracing_info() const {
ResourceMark rm;
LogStream ls(lt);
-#ifdef NOT_PRODUCT
- evac_tracker()->print_global_on(&ls);
-
- ls.cr();
- ls.cr();
-#endif
+ if (ShenandoahEvacTracking) {
+ evac_tracker()->print_global_on(&ls);
+ ls.cr();
+ ls.cr();
+ }
phase_timings()->print_global_on(&ls);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
index eafd1b28b3a..322ac26e254 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@@ -29,7 +29,6 @@
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/markBitMap.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "gc/shenandoah/mode/shenandoahMode.hpp"
#include "gc/shenandoah/shenandoahAllocRequest.hpp"
#include "gc/shenandoah/shenandoahAsserts.hpp"
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.cpp b/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.cpp
index f8726386b5d..4ca6f2fdf49 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.cpp
@@ -221,8 +221,10 @@ void ShenandoahRefProcThreadLocal::set_discovered_list_head(oop head) {
*discovered_list_addr() = head;
}
+AlwaysClearPolicy ShenandoahReferenceProcessor::_always_clear_policy;
+
ShenandoahReferenceProcessor::ShenandoahReferenceProcessor(uint max_workers) :
- _soft_reference_policy(nullptr),
+ _soft_reference_policy(&_always_clear_policy),
_ref_proc_thread_locals(NEW_C_HEAP_ARRAY(ShenandoahRefProcThreadLocal, max_workers, mtGC)),
_pending_list(nullptr),
_pending_list_tail(&_pending_list),
@@ -245,12 +247,11 @@ void ShenandoahReferenceProcessor::set_mark_closure(uint worker_id, ShenandoahMa
}
void ShenandoahReferenceProcessor::set_soft_reference_policy(bool clear) {
- static AlwaysClearPolicy always_clear_policy;
static LRUMaxHeapPolicy lru_max_heap_policy;
if (clear) {
log_info(gc, ref)("Clearing All SoftReferences");
- _soft_reference_policy = &always_clear_policy;
+ _soft_reference_policy = &_always_clear_policy;
} else {
_soft_reference_policy = &lru_max_heap_policy;
}
@@ -284,7 +285,7 @@ bool ShenandoahReferenceProcessor::is_softly_live(oop reference, ReferenceType t
// Ask SoftReference policy
const jlong clock = java_lang_ref_SoftReference::clock();
assert(clock != 0, "Clock not initialized");
- assert(_soft_reference_policy != nullptr, "Policy not initialized");
+ assert(_soft_reference_policy != nullptr, "Should never be null");
return !_soft_reference_policy->should_clear_reference(reference, clock);
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.hpp b/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.hpp
index 682c4268754..11099f1303d 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.hpp
@@ -127,6 +127,8 @@ public:
class ShenandoahReferenceProcessor : public ReferenceDiscoverer {
private:
+ static AlwaysClearPolicy _always_clear_policy;
+
ReferencePolicy* _soft_reference_policy;
ShenandoahRefProcThreadLocal* _ref_proc_thread_locals;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp
index fa3fa90b2f5..6aebec28163 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp
@@ -152,7 +152,7 @@ public:
// we risk executing that code cache blob, and crashing.
template
void ShenandoahSTWRootScanner::roots_do(T* oops, uint worker_id) {
- MarkingNMethodClosure nmethods_cl(oops, !NMethodToOopClosure::FixRelocations, true /*FIXME*/);
+ MarkingNMethodClosure nmethods_cl(oops);
CLDToOopClosure clds(oops, ClassLoaderData::_claim_strong);
ResourceMark rm;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp
index c2bfea664fd..260c1e0276f 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahSTWMark.cpp
@@ -77,7 +77,6 @@ void ShenandoahSTWMark::mark() {
ShenandoahReferenceProcessor* rp = _generation->ref_processor();
shenandoah_assert_generations_reconciled();
rp->reset_thread_locals();
- rp->set_soft_reference_policy(heap->soft_ref_policy()->should_clear_all_soft_refs());
// Init mark, do not expect forwarded pointers in roots
if (ShenandoahVerify) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahTrace.cpp b/src/hotspot/share/gc/shenandoah/shenandoahTrace.cpp
index dd153718c9f..a786f8ae216 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahTrace.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahTrace.cpp
@@ -22,31 +22,31 @@
*
*/
-#include "gc/shenandoah/shenandoahEvacInfo.hpp"
+#include "gc/shenandoah/shenandoahCollectionSet.inline.hpp"
#include "gc/shenandoah/shenandoahTrace.hpp"
#include "jfr/jfrEvents.hpp"
-void ShenandoahTracer::report_evacuation_info(ShenandoahEvacuationInformation* info) {
- send_evacuation_info_event(info);
-}
+void ShenandoahTracer::report_evacuation_info(const ShenandoahCollectionSet* cset,
+ size_t free_regions, size_t regions_promoted_humongous, size_t regions_promoted_regular,
+ size_t regular_promoted_garbage, size_t regular_promoted_free, size_t regions_immediate,
+ size_t immediate_size) {
-void ShenandoahTracer::send_evacuation_info_event(ShenandoahEvacuationInformation* info) {
EventShenandoahEvacuationInformation e;
if (e.should_commit()) {
e.set_gcId(GCId::current());
- e.set_cSetRegions(info->collection_set_regions());
- e.set_cSetUsedBefore(info->collection_set_used_before());
- e.set_cSetUsedAfter(info->collection_set_used_after());
- e.set_collectedOld(info->collected_old());
- e.set_collectedPromoted(info->collected_promoted());
- e.set_collectedYoung(info->collected_young());
- e.set_regionsPromotedHumongous(info->regions_promoted_humongous());
- e.set_regionsPromotedRegular(info->regions_promoted_regular());
- e.set_regularPromotedGarbage(info->regular_promoted_garbage());
- e.set_regularPromotedFree(info->regular_promoted_free());
- e.set_freeRegions(info->free_regions());
- e.set_regionsImmediate(info->regions_immediate());
- e.set_immediateBytes(info->immediate_size());
+ e.set_cSetRegions(cset->count());
+ e.set_cSetUsedBefore(cset->used());
+ e.set_cSetUsedAfter(cset->live());
+ e.set_collectedOld(cset->get_old_bytes_reserved_for_evacuation());
+ e.set_collectedPromoted(cset->get_young_bytes_to_be_promoted());
+ e.set_collectedYoung(cset->get_young_bytes_reserved_for_evacuation());
+ e.set_regionsPromotedHumongous(regions_promoted_humongous);
+ e.set_regionsPromotedRegular(regions_promoted_regular);
+ e.set_regularPromotedGarbage(regular_promoted_garbage);
+ e.set_regularPromotedFree(regular_promoted_free);
+ e.set_freeRegions(free_regions);
+ e.set_regionsImmediate(regions_immediate);
+ e.set_immediateBytes(immediate_size);
e.commit();
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahTrace.hpp b/src/hotspot/share/gc/shenandoah/shenandoahTrace.hpp
index a5351f4ef28..116968103de 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahTrace.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahTrace.hpp
@@ -28,15 +28,17 @@
#include "gc/shared/gcTrace.hpp"
#include "memory/allocation.hpp"
-class ShenandoahEvacuationInformation;
+class ShenandoahCollectionSet;
class ShenandoahTracer : public GCTracer, public CHeapObj {
public:
ShenandoahTracer() : GCTracer(Shenandoah) {}
- void report_evacuation_info(ShenandoahEvacuationInformation* info);
-private:
- void send_evacuation_info_event(ShenandoahEvacuationInformation* info);
+ // Sends a JFR event (if enabled) summarizing the composition of the collection set
+ static void report_evacuation_info(const ShenandoahCollectionSet* cset,
+ size_t free_regions, size_t regions_promoted_humongous, size_t regions_promoted_regular,
+ size_t regular_promoted_garbage, size_t regular_promoted_free, size_t regions_immediate,
+ size_t immediate_size);
};
#endif
diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp
index 1321baa6366..d1531c51236 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp
@@ -34,6 +34,20 @@
range, \
constraint) \
\
+ product(uintx, ShenandoahGenerationalMinPIPUsage, 30, EXPERIMENTAL, \
+ "(Generational mode only) What percent of a heap region " \
+ "should be used before we consider promoting a region in " \
+ "place? Regions with less than this amount of used will " \
+ "promoted by evacuation. A benefit of promoting in place " \
+ "is that less work is required by the GC at the time the " \
+ "region is promoted. A disadvantage of promoting in place " \
+ "is that this introduces fragmentation of old-gen memory, " \
+ "with old-gen regions scattered throughout the heap. Regions " \
+ "that have been promoted in place may need to be evacuated at " \
+ "a later time in order to compact old-gen memory to enable " \
+ "future humongous allocations.") \
+ range(0,100) \
+ \
product(uintx, ShenandoahGenerationalHumongousReserve, 0, EXPERIMENTAL, \
"(Generational mode only) What percent of the heap should be " \
"reserved for humongous objects if possible. Old-generation " \
@@ -165,7 +179,7 @@
"collector accepts. In percents of heap region size.") \
range(0,100) \
\
- product(uintx, ShenandoahOldGarbageThreshold, 15, EXPERIMENTAL, \
+ product(uintx, ShenandoahOldGarbageThreshold, 25, EXPERIMENTAL, \
"How much garbage an old region has to contain before it would " \
"be taken for collection.") \
range(0,100) \
@@ -394,6 +408,13 @@
"events.") \
range(0,100) \
\
+ product(bool, ShenandoahEvacTracking, false, DIAGNOSTIC, \
+ "Collect additional metrics about evacuations. Enabling this " \
+ "tracks how many objects and how many bytes were evacuated, and " \
+ "how many were abandoned. The information will be categorized " \
+ "by thread type (worker or mutator) and evacuation type (young, " \
+ "old, or promotion.") \
+ \
product(uintx, ShenandoahMinYoungPercentage, 20, EXPERIMENTAL, \
"The minimum percentage of the heap to use for the young " \
"generation. Heuristics will not adjust the young generation " \
diff --git a/src/hotspot/share/gc/z/zAddress.inline.hpp b/src/hotspot/share/gc/z/zAddress.inline.hpp
index 64beb5ba35d..f088e4a87d1 100644
--- a/src/hotspot/share/gc/z/zAddress.inline.hpp
+++ b/src/hotspot/share/gc/z/zAddress.inline.hpp
@@ -149,18 +149,18 @@ inline bool operator>=(offset_type first, offset_type##_end second) {
inline uintptr_t untype(zoffset offset) {
const uintptr_t value = static_cast(offset);
- assert(value < ZAddressOffsetMax, "must have no other bits");
+ assert(value < ZAddressOffsetMax, "Offset out of bounds (" PTR_FORMAT " < " PTR_FORMAT ")", value, ZAddressOffsetMax);
return value;
}
inline uintptr_t untype(zoffset_end offset) {
const uintptr_t value = static_cast(offset);
- assert(value <= ZAddressOffsetMax, "must have no other bits");
+ assert(value <= ZAddressOffsetMax, "Offset out of bounds (" PTR_FORMAT " <= " PTR_FORMAT ")", value, ZAddressOffsetMax);
return value;
}
inline zoffset to_zoffset(uintptr_t value) {
- assert(value < ZAddressOffsetMax, "must have no other bits");
+ assert(value < ZAddressOffsetMax, "Value out of bounds (" PTR_FORMAT " < " PTR_FORMAT ")", value, ZAddressOffsetMax);
return zoffset(value);
}
@@ -186,7 +186,7 @@ inline zoffset_end to_zoffset_end(zoffset start, size_t size) {
}
inline zoffset_end to_zoffset_end(uintptr_t value) {
- assert(value <= ZAddressOffsetMax, "Overflow");
+ assert(value <= ZAddressOffsetMax, "Value out of bounds (" PTR_FORMAT " <= " PTR_FORMAT ")", value, ZAddressOffsetMax);
return zoffset_end(value);
}
@@ -200,18 +200,18 @@ CREATE_ZOFFSET_OPERATORS(zoffset)
inline uintptr_t untype(zbacking_offset offset) {
const uintptr_t value = static_cast(offset);
- assert(value < ZBackingOffsetMax, "must have no other bits");
+ assert(value < ZBackingOffsetMax, "Offset out of bounds (" PTR_FORMAT " < " PTR_FORMAT ")", value, ZAddressOffsetMax);
return value;
}
inline uintptr_t untype(zbacking_offset_end offset) {
const uintptr_t value = static_cast(offset);
- assert(value <= ZBackingOffsetMax, "must have no other bits");
+ assert(value <= ZBackingOffsetMax, "Offset out of bounds (" PTR_FORMAT " <= " PTR_FORMAT ")", value, ZAddressOffsetMax);
return value;
}
inline zbacking_offset to_zbacking_offset(uintptr_t value) {
- assert(value < ZBackingOffsetMax, "must have no other bits");
+ assert(value < ZBackingOffsetMax, "Value out of bounds (" PTR_FORMAT " < " PTR_FORMAT ")", value, ZAddressOffsetMax);
return zbacking_offset(value);
}
@@ -228,7 +228,7 @@ inline zbacking_offset_end to_zbacking_offset_end(zbacking_offset start, size_t
}
inline zbacking_offset_end to_zbacking_offset_end(uintptr_t value) {
- assert(value <= ZBackingOffsetMax, "must have no other bits");
+ assert(value <= ZBackingOffsetMax, "Value out of bounds (" PTR_FORMAT " <= " PTR_FORMAT ")", value, ZAddressOffsetMax);
return zbacking_offset_end(value);
}
@@ -242,18 +242,18 @@ CREATE_ZOFFSET_OPERATORS(zbacking_offset)
inline uint32_t untype(zbacking_index index) {
const uint32_t value = static_cast(index);
- assert(value < ZBackingIndexMax, "must have no other bits");
+ assert(value < ZBackingIndexMax, "Offset out of bounds (" UINT32_FORMAT_X_0 " < " UINT32_FORMAT_X_0 ")", value, ZBackingIndexMax);
return value;
}
inline uint32_t untype(zbacking_index_end index) {
const uint32_t value = static_cast(index);
- assert(value <= ZBackingIndexMax, "must have no other bits");
+ assert(value <= ZBackingIndexMax, "Offset out of bounds (" UINT32_FORMAT_X_0 " <= " UINT32_FORMAT_X_0 ")", value, ZBackingIndexMax);
return value;
}
inline zbacking_index to_zbacking_index(uint32_t value) {
- assert(value < ZBackingIndexMax, "must have no other bits");
+ assert(value < ZBackingIndexMax, "Value out of bounds (" UINT32_FORMAT_X_0 " < " UINT32_FORMAT_X_0 ")", value, ZBackingIndexMax);
return zbacking_index(value);
}
@@ -266,12 +266,12 @@ inline zbacking_index_end to_zbacking_index_end(zbacking_index start, size_t siz
const uint32_t start_value = untype(start);
const uint32_t value = start_value + checked_cast(size);
assert(value <= ZBackingIndexMax && start_value <= value,
- "Overflow start: %x size: %zu value: %x", start_value, size, value);
+ "Overflow start: " UINT32_FORMAT_X_0 " size: %zu value: " UINT32_FORMAT_X_0 "", start_value, size, value);
return zbacking_index_end(value);
}
inline zbacking_index_end to_zbacking_index_end(uint32_t value) {
- assert(value <= ZBackingIndexMax, "must have no other bits");
+ assert(value <= ZBackingIndexMax, "Value out of bounds (" UINT32_FORMAT_X_0 " <= " UINT32_FORMAT_X_0 ")", value, ZBackingIndexMax);
return zbacking_index_end(value);
}
@@ -287,7 +287,7 @@ CREATE_ZOFFSET_OPERATORS(zbacking_index)
inline zbacking_index to_zbacking_index(zbacking_offset offset) {
const uintptr_t value = untype(offset);
- assert(is_aligned(value, ZGranuleSize), "must be granule aligned");
+ assert(is_aligned(value, ZGranuleSize), "Must be granule aligned: " PTR_FORMAT, value);
return to_zbacking_index((uint32_t)(value >> ZGranuleSizeShift));
}
@@ -420,7 +420,7 @@ inline bool is_null_any(zpointer ptr) {
// Is it null - colored or not?
inline bool is_null_assert_load_good(zpointer ptr) {
const bool result = is_null_any(ptr);
- assert(!result || ZPointer::is_load_good(ptr), "Got bad colored null");
+ assert(!result || ZPointer::is_load_good(ptr), "Got bad colored null: " PTR_FORMAT, untype(ptr));
return result;
}
@@ -620,7 +620,7 @@ inline zaddress ZPointer::uncolor_store_good(zpointer ptr) {
}
inline zaddress_unsafe ZPointer::uncolor_unsafe(zpointer ptr) {
- assert(ZPointer::is_store_bad(ptr), "Unexpected ptr");
+ assert(ZPointer::is_store_bad(ptr), "Should be store bad: " PTR_FORMAT, untype(ptr));
const uintptr_t raw_addr = untype(ptr);
return to_zaddress_unsafe(raw_addr >> ZPointer::load_shift_lookup(raw_addr));
}
@@ -642,7 +642,7 @@ inline bool ZPointer::is_load_good_or_null(zpointer ptr) {
// the barrier as if it was null. This should be harmless as such
// addresses should ever be passed through the barrier.
const bool result = !is_load_bad(ptr);
- assert((is_load_good(ptr) || is_null(ptr)) == result, "Bad address");
+ assert((is_load_good(ptr) || is_null(ptr)) == result, "Bad address: " PTR_FORMAT, untype(ptr));
return result;
}
@@ -673,7 +673,7 @@ inline bool ZPointer::is_mark_good_or_null(zpointer ptr) {
// the barrier as if it was null. This should be harmless as such
// addresses should ever be passed through the barrier.
const bool result = !is_mark_bad(ptr);
- assert((is_mark_good(ptr) || is_null(ptr)) == result, "Bad address");
+ assert((is_mark_good(ptr) || is_null(ptr)) == result, "Bad address: " PTR_FORMAT, untype(ptr));
return result;
}
@@ -694,7 +694,7 @@ inline bool ZPointer::is_store_good_or_null(zpointer ptr) {
// the barrier as if it was null. This should be harmless as such
// addresses should ever be passed through the barrier.
const bool result = !is_store_bad(ptr);
- assert((is_store_good(ptr) || is_null(ptr)) == result, "Bad address");
+ assert((is_store_good(ptr) || is_null(ptr)) == result, "Bad address: " PTR_FORMAT, untype(ptr));
return result;
}
diff --git a/src/hotspot/share/gc/z/zCollectedHeap.hpp b/src/hotspot/share/gc/z/zCollectedHeap.hpp
index c124976c80f..bbcddec917f 100644
--- a/src/hotspot/share/gc/z/zCollectedHeap.hpp
+++ b/src/hotspot/share/gc/z/zCollectedHeap.hpp
@@ -25,7 +25,6 @@
#define SHARE_GC_Z_ZCOLLECTEDHEAP_HPP
#include "gc/shared/collectedHeap.hpp"
-#include "gc/shared/softRefPolicy.hpp"
#include "gc/z/zBarrierSet.hpp"
#include "gc/z/zHeap.hpp"
#include "gc/z/zInitialize.hpp"
diff --git a/src/hotspot/share/gc/z/zNMethodTable.cpp b/src/hotspot/share/gc/z/zNMethodTable.cpp
index bbc8f56b654..f73014085f7 100644
--- a/src/hotspot/share/gc/z/zNMethodTable.cpp
+++ b/src/hotspot/share/gc/z/zNMethodTable.cpp
@@ -194,14 +194,6 @@ void ZNMethodTable::register_nmethod(nmethod* nm) {
}
}
-void ZNMethodTable::wait_until_iteration_done() {
- assert(CodeCache_lock->owned_by_self(), "Lock must be held");
-
- while (_iteration.in_progress() || _iteration_secondary.in_progress()) {
- CodeCache_lock->wait_without_safepoint_check();
- }
-}
-
void ZNMethodTable::unregister_nmethod(nmethod* nm) {
MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
diff --git a/src/hotspot/share/gc/z/zNMethodTable.hpp b/src/hotspot/share/gc/z/zNMethodTable.hpp
index e160ac1b39a..a8b9029caeb 100644
--- a/src/hotspot/share/gc/z/zNMethodTable.hpp
+++ b/src/hotspot/share/gc/z/zNMethodTable.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -64,8 +64,6 @@ public:
static void register_nmethod(nmethod* nm);
static void unregister_nmethod(nmethod* nm);
- static void wait_until_iteration_done();
-
static void nmethods_do_begin(bool secondary);
static void nmethods_do_end(bool secondary);
static void nmethods_do(bool secondary, NMethodClosure* cl);
diff --git a/src/hotspot/share/gc/z/zNMethodTableIteration.hpp b/src/hotspot/share/gc/z/zNMethodTableIteration.hpp
index fc8acd2589c..34bd7d9b4f8 100644
--- a/src/hotspot/share/gc/z/zNMethodTableIteration.hpp
+++ b/src/hotspot/share/gc/z/zNMethodTableIteration.hpp
@@ -35,11 +35,11 @@ private:
size_t _size;
ZCACHE_ALIGNED volatile size_t _claimed;
+ bool in_progress() const;
+
public:
ZNMethodTableIteration();
- bool in_progress() const;
-
void nmethods_do_begin(ZNMethodTableEntry* table, size_t size);
void nmethods_do_end();
void nmethods_do(NMethodClosure* cl);
diff --git a/src/hotspot/share/gc/z/zStat.cpp b/src/hotspot/share/gc/z/zStat.cpp
index f703b3a1791..03aa9061184 100644
--- a/src/hotspot/share/gc/z/zStat.cpp
+++ b/src/hotspot/share/gc/z/zStat.cpp
@@ -1410,11 +1410,12 @@ ZStatWorkersStats ZStatWorkers::stats() {
//
void ZStatLoad::print() {
double loadavg[3] = {};
- os::loadavg(loadavg, ARRAY_SIZE(loadavg));
- log_info(gc, load)("Load: %.2f (%.0f%%) / %.2f (%.0f%%) / %.2f (%.0f%%)",
- loadavg[0], percent_of(loadavg[0], (double) ZCPU::count()),
- loadavg[1], percent_of(loadavg[1], (double) ZCPU::count()),
- loadavg[2], percent_of(loadavg[2], (double) ZCPU::count()));
+ if (os::loadavg(loadavg, ARRAY_SIZE(loadavg)) != -1) {
+ log_info(gc, load)("Load: %.2f (%.0f%%) / %.2f (%.0f%%) / %.2f (%.0f%%)",
+ loadavg[0], percent_of(loadavg[0], (double) ZCPU::count()),
+ loadavg[1], percent_of(loadavg[1], (double) ZCPU::count()),
+ loadavg[2], percent_of(loadavg[2], (double) ZCPU::count()));
+ }
}
//
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdKlassQueue.cpp b/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdKlassQueue.cpp
index e821b528707..9c57374d6c6 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdKlassQueue.cpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdKlassQueue.cpp
@@ -29,6 +29,7 @@
#include "jfr/support/jfrThreadLocal.hpp"
#include "jfr/utilities/jfrEpochQueue.inline.hpp"
#include "jfr/utilities/jfrTypes.hpp"
+#include "memory/metaspace.hpp"
#include "oops/compressedKlass.inline.hpp"
#include "utilities/macros.hpp"
@@ -73,14 +74,13 @@ static size_t element_size(bool compressed) {
return compressed ? NARROW_ELEMENT_SIZE : ELEMENT_SIZE;
}
-static bool can_compress_element(const Klass* klass) {
- return CompressedKlassPointers::is_encodable(klass) &&
- JfrTraceId::load_raw(klass) < uncompressed_threshold;
+static bool can_compress_element(traceid id) {
+ return Metaspace::using_class_space() && id < uncompressed_threshold;
}
static size_t element_size(const Klass* klass) {
assert(klass != nullptr, "invariant");
- return element_size(can_compress_element(klass));
+ return element_size(can_compress_element(JfrTraceId::load_raw(klass)));
}
static bool is_unloaded(traceid id, bool previous_epoch) {
@@ -136,8 +136,7 @@ static inline void store_traceid(JfrEpochQueueNarrowKlassElement* element, trace
}
static void store_compressed_element(traceid id, const Klass* klass, u1* pos) {
- assert(can_compress_element(klass), "invariant");
- assert(id == JfrTraceId::load_raw(klass), "invariant");
+ assert(can_compress_element(id), "invariant");
JfrEpochQueueNarrowKlassElement* const element = new (pos) JfrEpochQueueNarrowKlassElement();
store_traceid(element, id);
element->compressed_klass = encode(klass);
@@ -153,7 +152,7 @@ static void store_element(const Klass* klass, u1* pos) {
assert(pos != nullptr, "invariant");
assert(klass != nullptr, "invariant");
const traceid id = JfrTraceId::load_raw(klass);
- if (can_compress_element(klass)) {
+ if (can_compress_element(id)) {
store_compressed_element(id, klass, pos);
return;
}
diff --git a/src/hotspot/share/jvmci/jvmciRuntime.cpp b/src/hotspot/share/jvmci/jvmciRuntime.cpp
index 137782f93ef..e75527235f0 100644
--- a/src/hotspot/share/jvmci/jvmciRuntime.cpp
+++ b/src/hotspot/share/jvmci/jvmciRuntime.cpp
@@ -589,10 +589,6 @@ void JVMCIRuntime::write_barrier_pre(JavaThread* thread, oopDesc* obj) {
G1BarrierSetRuntime::write_ref_field_pre_entry(obj, thread);
}
-void JVMCIRuntime::write_barrier_post(JavaThread* thread, volatile CardValue* card_addr) {
- G1BarrierSetRuntime::write_ref_field_post_entry(card_addr, thread);
-}
-
#endif // INCLUDE_G1GC
JRT_LEAF(jboolean, JVMCIRuntime::validate_object(JavaThread* thread, oopDesc* parent, oopDesc* child))
diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
index 3ddf7de0510..7ddb9be540a 100644
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@@ -560,6 +560,7 @@
declare_constant(BranchData::not_taken_off_set) \
\
declare_constant_with_value("CardTable::dirty_card", CardTable::dirty_card_val()) \
+ declare_constant_with_value("CardTable::clean_card", CardTable::clean_card_val()) \
declare_constant_with_value("LockStack::_end_offset", LockStack::end_offset()) \
declare_constant_with_value("OMCache::oop_to_oop_difference", OMCache::oop_to_oop_difference()) \
declare_constant_with_value("OMCache::oop_to_monitor_difference", OMCache::oop_to_monitor_difference()) \
@@ -928,7 +929,6 @@
declare_function(JVMCIRuntime::vm_error) \
declare_function(JVMCIRuntime::load_and_clear_exception) \
G1GC_ONLY(declare_function(JVMCIRuntime::write_barrier_pre)) \
- G1GC_ONLY(declare_function(JVMCIRuntime::write_barrier_post)) \
SHENANDOAHGC_ONLY(declare_function(ShenandoahRuntime::load_reference_barrier_strong)) \
SHENANDOAHGC_ONLY(declare_function(ShenandoahRuntime::load_reference_barrier_strong_narrow)) \
SHENANDOAHGC_ONLY(declare_function(ShenandoahRuntime::load_reference_barrier_weak)) \
@@ -947,12 +947,10 @@
static_field(G1HeapRegion, LogOfHRGrainBytes, uint)
#define VM_INT_CONSTANTS_JVMCI_G1GC(declare_constant, declare_constant_with_value, declare_preprocessor_constant) \
- declare_constant_with_value("G1CardTable::g1_young_gen", G1CardTable::g1_young_card_val()) \
declare_constant_with_value("G1ThreadLocalData::satb_mark_queue_active_offset", in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())) \
declare_constant_with_value("G1ThreadLocalData::satb_mark_queue_index_offset", in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())) \
declare_constant_with_value("G1ThreadLocalData::satb_mark_queue_buffer_offset", in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())) \
- declare_constant_with_value("G1ThreadLocalData::dirty_card_queue_index_offset", in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())) \
- declare_constant_with_value("G1ThreadLocalData::dirty_card_queue_buffer_offset", in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()))
+ declare_constant_with_value("G1ThreadLocalData::card_table_base_offset", in_bytes(G1ThreadLocalData::card_table_base_offset())) \
#endif // INCLUDE_G1GC
diff --git a/src/hotspot/share/memory/allocation.cpp b/src/hotspot/share/memory/allocation.cpp
index f158fefdba0..9df91225d6f 100644
--- a/src/hotspot/share/memory/allocation.cpp
+++ b/src/hotspot/share/memory/allocation.cpp
@@ -73,7 +73,7 @@ void* MetaspaceObj::operator new(size_t size, ClassLoaderData* loader_data,
MetaspaceObj::Type type, TRAPS) throw() {
// Klass has its own operator new
assert(type != ClassType, "class has its own operator new");
- return Metaspace::allocate(loader_data, word_size, type, /*use_class_space*/ false, THREAD);
+ return Metaspace::allocate(loader_data, word_size, type, THREAD);
}
void* MetaspaceObj::operator new(size_t size, ClassLoaderData* loader_data,
@@ -81,7 +81,7 @@ void* MetaspaceObj::operator new(size_t size, ClassLoaderData* loader_data,
MetaspaceObj::Type type) throw() {
assert(!Thread::current()->is_Java_thread(), "only allowed by non-Java thread");
assert(type != ClassType, "class has its own operator new");
- return Metaspace::allocate(loader_data, word_size, type, /*use_class_space*/ false);
+ return Metaspace::allocate(loader_data, word_size, type);
}
// This is used for allocating training data. We are allocating training data in many cases where a GC cannot be triggered.
diff --git a/src/hotspot/share/memory/iterator.cpp b/src/hotspot/share/memory/iterator.cpp
index 961130c2b3f..09e924164e8 100644
--- a/src/hotspot/share/memory/iterator.cpp
+++ b/src/hotspot/share/memory/iterator.cpp
@@ -53,16 +53,10 @@ void MarkingNMethodClosure::do_nmethod(nmethod* nm) {
// Process the oops in the nmethod
nm->oops_do(_cl);
- if (_keepalive_nmethods) {
- // CodeCache unloading support
- nm->mark_as_maybe_on_stack();
+ // CodeCache unloading support
+ nm->mark_as_maybe_on_stack();
- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
- bs_nm->disarm(nm);
- }
-
- if (_fix_relocations) {
- nm->fix_oop_relocations();
- }
+ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+ bs_nm->disarm(nm);
}
}
diff --git a/src/hotspot/share/memory/iterator.hpp b/src/hotspot/share/memory/iterator.hpp
index 044951142b0..25820d6de02 100644
--- a/src/hotspot/share/memory/iterator.hpp
+++ b/src/hotspot/share/memory/iterator.hpp
@@ -252,17 +252,14 @@ class NMethodToOopClosure : public NMethodClosure {
NMethodToOopClosure(OopClosure* cl, bool fix_relocations) : _cl(cl), _fix_relocations(fix_relocations) {}
void do_nmethod(nmethod* nm) override;
- bool fix_relocations() const { return _fix_relocations; }
const static bool FixRelocations = true;
};
-class MarkingNMethodClosure : public NMethodToOopClosure {
- bool _keepalive_nmethods;
+class MarkingNMethodClosure : public NMethodClosure {
+ OopClosure* _cl;
public:
- MarkingNMethodClosure(OopClosure* cl, bool fix_relocations, bool keepalive_nmethods) :
- NMethodToOopClosure(cl, fix_relocations),
- _keepalive_nmethods(keepalive_nmethods) {}
+ MarkingNMethodClosure(OopClosure* cl) : _cl(cl) {}
// Called for each nmethod.
virtual void do_nmethod(nmethod* nm);
diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp
index 1e3b8d0594f..e686b324004 100644
--- a/src/hotspot/share/memory/metaspace.cpp
+++ b/src/hotspot/share/memory/metaspace.cpp
@@ -872,7 +872,7 @@ size_t Metaspace::max_allocation_word_size() {
// is suitable for calling from non-Java threads.
// Callers are responsible for checking null.
MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
- MetaspaceObj::Type type, bool use_class_space) {
+ MetaspaceObj::Type type) {
assert(word_size <= Metaspace::max_allocation_word_size(),
"allocation size too large (%zu)", word_size);
@@ -882,7 +882,7 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
// Deal with concurrent unloading failed allocation starvation
MetaspaceCriticalAllocation::block_if_concurrent_purge();
- MetadataType mdtype = use_class_space ? ClassType : NonClassType;
+ MetadataType mdtype = (type == MetaspaceObj::ClassType) ? ClassType : NonClassType;
// Try to allocate metadata.
MetaWord* result = loader_data->metaspace_non_null()->allocate(word_size, mdtype);
@@ -906,7 +906,7 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
}
MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
- MetaspaceObj::Type type, bool use_class_space, TRAPS) {
+ MetaspaceObj::Type type, TRAPS) {
if (HAS_PENDING_EXCEPTION) {
assert(false, "Should not allocate with exception pending");
@@ -914,10 +914,10 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
}
assert(!THREAD->owns_locks(), "allocating metaspace while holding mutex");
- MetaWord* result = allocate(loader_data, word_size, type, use_class_space);
+ MetaWord* result = allocate(loader_data, word_size, type);
if (result == nullptr) {
- MetadataType mdtype = use_class_space ? ClassType : NonClassType;
+ MetadataType mdtype = (type == MetaspaceObj::ClassType) ? ClassType : NonClassType;
tracer()->report_metaspace_allocation_failure(loader_data, word_size, type, mdtype);
// Allocation failed.
diff --git a/src/hotspot/share/memory/metaspace.hpp b/src/hotspot/share/memory/metaspace.hpp
index 408dbf6d23f..01ef4b4dd49 100644
--- a/src/hotspot/share/memory/metaspace.hpp
+++ b/src/hotspot/share/memory/metaspace.hpp
@@ -120,12 +120,12 @@ public:
static constexpr size_t min_allocation_word_size = min_allocation_alignment_words;
static MetaWord* allocate(ClassLoaderData* loader_data, size_t word_size,
- MetaspaceObj::Type type, bool use_class_space, TRAPS);
+ MetaspaceObj::Type type, TRAPS);
// Non-TRAPS version of allocate which can be called by a non-Java thread, that returns
// null on failure.
static MetaWord* allocate(ClassLoaderData* loader_data, size_t word_size,
- MetaspaceObj::Type type, bool use_class_space);
+ MetaspaceObj::Type type);
// Returns true if the pointer points into class space, non-class metaspace, or the
// metadata portion of the CDS archive.
diff --git a/src/hotspot/share/nmt/memBaseline.cpp b/src/hotspot/share/nmt/memBaseline.cpp
index d94aa10eab1..118e3ec64c0 100644
--- a/src/hotspot/share/nmt/memBaseline.cpp
+++ b/src/hotspot/share/nmt/memBaseline.cpp
@@ -27,8 +27,7 @@
#include "memory/metaspaceUtils.hpp"
#include "nmt/memBaseline.hpp"
#include "nmt/memTracker.hpp"
-#include "runtime/javaThread.hpp"
-#include "runtime/safepoint.hpp"
+#include "nmt/regionsTree.inline.hpp"
/*
* Sizes are sorted in descenting order for reporting
@@ -104,38 +103,6 @@ class MallocAllocationSiteWalker : public MallocSiteWalker {
}
};
-// Walk all virtual memory regions for baselining
-class VirtualMemoryAllocationWalker : public VirtualMemoryWalker {
- private:
- typedef LinkedListImpl EntryList;
- EntryList _virtual_memory_regions;
- DEBUG_ONLY(address _last_base;)
- public:
- VirtualMemoryAllocationWalker() {
- DEBUG_ONLY(_last_base = nullptr);
- }
-
- bool do_allocation_site(const ReservedMemoryRegion* rgn) {
- assert(rgn->base() >= _last_base, "region unordered?");
- DEBUG_ONLY(_last_base = rgn->base());
- if (rgn->size() > 0) {
- if (_virtual_memory_regions.add(*rgn) != nullptr) {
- return true;
- } else {
- return false;
- }
- } else {
- // Ignore empty sites.
- return true;
- }
- }
-
- LinkedList* virtual_memory_allocations() {
- return &_virtual_memory_regions;
- }
-};
-
void MemBaseline::baseline_summary() {
MallocMemorySummary::snapshot(&_malloc_memory_snapshot);
{
@@ -158,14 +125,15 @@ bool MemBaseline::baseline_allocation_sites() {
// The malloc sites are collected in size order
_malloc_sites_order = by_size;
- // Virtual memory allocation sites
- VirtualMemoryAllocationWalker virtual_memory_walker;
- if (!MemTracker::walk_virtual_memory(&virtual_memory_walker)) {
- return false;
- }
+ assert(_vma_allocations == nullptr, "must");
- // Virtual memory allocations are collected in call stack order
- _virtual_memory_allocations.move(virtual_memory_walker.virtual_memory_allocations());
+ {
+ MemTracker::NmtVirtualMemoryLocker locker;
+ _vma_allocations = new (mtNMT, std::nothrow) RegionsTree(*VirtualMemoryTracker::Instance::tree());
+ if (_vma_allocations == nullptr) {
+ return false;
+ }
+ }
if (!aggregate_virtual_memory_allocation_sites()) {
return false;
@@ -202,20 +170,28 @@ int compare_allocation_site(const VirtualMemoryAllocationSite& s1,
bool MemBaseline::aggregate_virtual_memory_allocation_sites() {
SortedLinkedList allocation_sites;
- VirtualMemoryAllocationIterator itr = virtual_memory_allocations();
- const ReservedMemoryRegion* rgn;
VirtualMemoryAllocationSite* site;
- while ((rgn = itr.next()) != nullptr) {
- VirtualMemoryAllocationSite tmp(*rgn->call_stack(), rgn->mem_tag());
+ bool failed_oom = false;
+ _vma_allocations->visit_reserved_regions([&](ReservedMemoryRegion& rgn) {
+ VirtualMemoryAllocationSite tmp(*rgn.call_stack(), rgn.mem_tag());
site = allocation_sites.find(tmp);
if (site == nullptr) {
LinkedListNode* node =
allocation_sites.add(tmp);
- if (node == nullptr) return false;
+ if (node == nullptr) {
+ failed_oom = true;
+ return false;
+ }
site = node->data();
}
- site->reserve_memory(rgn->size());
- site->commit_memory(VirtualMemoryTracker::Instance::committed_size(rgn));
+ site->reserve_memory(rgn.size());
+
+ site->commit_memory(_vma_allocations->committed_size(rgn));
+ return true;
+ });
+
+ if (failed_oom) {
+ return false;
}
_virtual_memory_sites.move(&allocation_sites);
diff --git a/src/hotspot/share/nmt/memBaseline.hpp b/src/hotspot/share/nmt/memBaseline.hpp
index 2fff4cc666c..a9d604deb19 100644
--- a/src/hotspot/share/nmt/memBaseline.hpp
+++ b/src/hotspot/share/nmt/memBaseline.hpp
@@ -35,7 +35,6 @@
typedef LinkedListIterator MallocSiteIterator;
typedef LinkedListIterator VirtualMemorySiteIterator;
-typedef LinkedListIterator VirtualMemoryAllocationIterator;
/*
* Baseline a memory snapshot
@@ -71,7 +70,7 @@ class MemBaseline {
LinkedListImpl _malloc_sites;
// All virtual memory allocations
- LinkedListImpl _virtual_memory_allocations;
+ RegionsTree* _vma_allocations;
// Virtual memory allocations by allocation sites, always in by_address
// order
@@ -86,9 +85,14 @@ class MemBaseline {
// create a memory baseline
MemBaseline():
_instance_class_count(0), _array_class_count(0), _thread_count(0),
+ _vma_allocations(nullptr),
_baseline_type(Not_baselined) {
}
+ ~MemBaseline() {
+ delete _vma_allocations;
+ }
+
void baseline(bool summaryOnly = true);
BaselineType baseline_type() const { return _baseline_type; }
@@ -110,9 +114,9 @@ class MemBaseline {
// Virtual memory allocation iterator always returns in virtual memory
// base address order.
- VirtualMemoryAllocationIterator virtual_memory_allocations() {
- assert(!_virtual_memory_allocations.is_empty(), "Not detail baseline");
- return VirtualMemoryAllocationIterator(_virtual_memory_allocations.head());
+ RegionsTree* virtual_memory_allocations() {
+ assert(_vma_allocations != nullptr, "Not detail baseline");
+ return _vma_allocations;
}
// Total reserved memory = total malloc'd memory + total reserved virtual
@@ -185,7 +189,8 @@ class MemBaseline {
_malloc_sites.clear();
_virtual_memory_sites.clear();
- _virtual_memory_allocations.clear();
+ delete _vma_allocations;
+ _vma_allocations = nullptr;
}
private:
diff --git a/src/hotspot/share/nmt/memReporter.cpp b/src/hotspot/share/nmt/memReporter.cpp
index 65d4d76942b..772bda2885b 100644
--- a/src/hotspot/share/nmt/memReporter.cpp
+++ b/src/hotspot/share/nmt/memReporter.cpp
@@ -394,13 +394,11 @@ int MemDetailReporter::report_virtual_memory_allocation_sites() {
void MemDetailReporter::report_virtual_memory_map() {
// Virtual memory map always in base address order
- VirtualMemoryAllocationIterator itr = _baseline.virtual_memory_allocations();
- const ReservedMemoryRegion* rgn;
-
output()->print_cr("Virtual memory map:");
- while ((rgn = itr.next()) != nullptr) {
- report_virtual_memory_region(rgn);
- }
+ _baseline.virtual_memory_allocations()->visit_reserved_regions([&](ReservedMemoryRegion& rgn) {
+ report_virtual_memory_region(&rgn);
+ return true;
+ });
}
void MemDetailReporter::report_virtual_memory_region(const ReservedMemoryRegion* reserved_rgn) {
@@ -421,7 +419,7 @@ void MemDetailReporter::report_virtual_memory_region(const ReservedMemoryRegion*
outputStream* out = output();
const char* scale = current_scale();
const NativeCallStack* stack = reserved_rgn->call_stack();
- bool all_committed = reserved_rgn->size() == VirtualMemoryTracker::Instance::committed_size(reserved_rgn);
+ bool all_committed = reserved_rgn->size() == _baseline.virtual_memory_allocations()->committed_size(*reserved_rgn);
const char* region_type = (all_committed ? "reserved and committed" : "reserved");
out->cr();
print_virtual_memory_region(region_type, reserved_rgn->base(), reserved_rgn->size());
@@ -435,7 +433,7 @@ void MemDetailReporter::report_virtual_memory_region(const ReservedMemoryRegion*
if (all_committed) {
bool reserved_and_committed = false;
- VirtualMemoryTracker::Instance::tree()->visit_committed_regions(*reserved_rgn,
+ _baseline.virtual_memory_allocations()->visit_committed_regions(*reserved_rgn,
[&](CommittedMemoryRegion& committed_rgn) {
if (committed_rgn.equals(*reserved_rgn)) {
// One region spanning the entire reserved region, with the same stack trace.
@@ -468,7 +466,7 @@ void MemDetailReporter::report_virtual_memory_region(const ReservedMemoryRegion*
)
};
- VirtualMemoryTracker::Instance::tree()->visit_committed_regions(*reserved_rgn,
+ _baseline.virtual_memory_allocations()->visit_committed_regions(*reserved_rgn,
[&](CommittedMemoryRegion& crgn) {
print_committed_rgn(crgn);
return true;
diff --git a/src/hotspot/share/nmt/nmtNativeCallStackStorage.cpp b/src/hotspot/share/nmt/nmtNativeCallStackStorage.cpp
index 3e5c1d2f0ea..9a2ecd57ecc 100644
--- a/src/hotspot/share/nmt/nmtNativeCallStackStorage.cpp
+++ b/src/hotspot/share/nmt/nmtNativeCallStackStorage.cpp
@@ -57,3 +57,21 @@ NativeCallStackStorage::NativeCallStackStorage(bool is_detailed_mode, int table_
NativeCallStackStorage::~NativeCallStackStorage() {
FREE_C_HEAP_ARRAY(LinkPtr, _table);
}
+
+NativeCallStackStorage::NativeCallStackStorage(const NativeCallStackStorage& other)
+ : _table_size(other._table_size),
+ _table(nullptr),
+ _stacks(),
+ _is_detailed_mode(other._is_detailed_mode),
+ _fake_stack(other._fake_stack) {
+ if (_is_detailed_mode) {
+ _table = NEW_C_HEAP_ARRAY(TableEntryIndex, _table_size, mtNMT);
+ for (int i = 0; i < _table_size; i++) {
+ _table[i] = other._table[i];
+ }
+ }
+ _stacks.reserve(other._stacks.length());
+ for (int i = 0; i < other._stacks.length(); i++) {
+ _stacks.at_grow(i) = other._stacks.at(i);
+ }
+}
diff --git a/src/hotspot/share/nmt/nmtNativeCallStackStorage.hpp b/src/hotspot/share/nmt/nmtNativeCallStackStorage.hpp
index 6f194cfa5a1..6ead8f49248 100644
--- a/src/hotspot/share/nmt/nmtNativeCallStackStorage.hpp
+++ b/src/hotspot/share/nmt/nmtNativeCallStackStorage.hpp
@@ -95,7 +95,8 @@ public:
}
NativeCallStackStorage(bool is_detailed_mode, int table_size = default_table_size);
-
+ NativeCallStackStorage(const NativeCallStackStorage& other);
+ NativeCallStackStorage& operator=(const NativeCallStackStorage& other) = delete;
~NativeCallStackStorage();
};
diff --git a/src/hotspot/share/nmt/regionsTree.cpp b/src/hotspot/share/nmt/regionsTree.cpp
index 370c69a2485..a2f5a5df67a 100644
--- a/src/hotspot/share/nmt/regionsTree.cpp
+++ b/src/hotspot/share/nmt/regionsTree.cpp
@@ -22,6 +22,8 @@
*
*/
#include "nmt/regionsTree.hpp"
+#include "nmt/regionsTree.inline.hpp"
+#include "nmt/virtualMemoryTracker.hpp"
VMATree::SummaryDiff RegionsTree::commit_region(address addr, size_t size, const NativeCallStack& stack) {
return commit_mapping((VMATree::position)addr, size, make_region_data(stack, mtNone), /*use tag inplace*/ true);
@@ -54,4 +56,13 @@ void RegionsTree::print_on(outputStream* st) {
return true;
});
}
-#endif
\ No newline at end of file
+#endif
+
+size_t RegionsTree::committed_size(const ReservedMemoryRegion& rgn) {
+ size_t result = 0;
+ visit_committed_regions(rgn, [&](CommittedMemoryRegion& crgn) {
+ result += crgn.size();
+ return true;
+ });
+ return result;
+}
diff --git a/src/hotspot/share/nmt/regionsTree.hpp b/src/hotspot/share/nmt/regionsTree.hpp
index bf2ab711b2d..35272c27423 100644
--- a/src/hotspot/share/nmt/regionsTree.hpp
+++ b/src/hotspot/share/nmt/regionsTree.hpp
@@ -40,6 +40,12 @@ class RegionsTree : public VMATree {
public:
RegionsTree(bool with_storage) : VMATree() , _ncs_storage(with_storage), _with_storage(with_storage) { }
+ RegionsTree(const RegionsTree& other)
+ : VMATree(other),
+ _ncs_storage(other._ncs_storage),
+ _with_storage(other._with_storage) {}
+ RegionsTree& operator=(const RegionsTree& other) = delete;
+
ReservedMemoryRegion find_reserved_region(address addr);
SummaryDiff commit_region(address addr, size_t size, const NativeCallStack& stack);
@@ -91,6 +97,8 @@ class RegionsTree : public VMATree {
NativeCallStackStorage::StackIndex si = node.out_stack_index();
return _ncs_storage.get(si);
}
+
+ size_t committed_size(const ReservedMemoryRegion& rgn);
};
-#endif // NMT_REGIONSTREE_HPP
\ No newline at end of file
+#endif // NMT_REGIONSTREE_HPP
diff --git a/src/hotspot/share/nmt/vmatree.cpp b/src/hotspot/share/nmt/vmatree.cpp
index 4f6f8e12185..69887068cb2 100644
--- a/src/hotspot/share/nmt/vmatree.cpp
+++ b/src/hotspot/share/nmt/vmatree.cpp
@@ -744,3 +744,10 @@ void VMATree::SummaryDiff::print_on(outputStream* out) {
}
}
#endif
+
+void VMATree::clear() {
+ _tree.remove_all();
+};
+bool VMATree::is_empty() {
+ return _tree.size() == 0;
+};
diff --git a/src/hotspot/share/nmt/vmatree.hpp b/src/hotspot/share/nmt/vmatree.hpp
index 1b5729054e4..dff2491c69c 100644
--- a/src/hotspot/share/nmt/vmatree.hpp
+++ b/src/hotspot/share/nmt/vmatree.hpp
@@ -30,6 +30,7 @@
#include "nmt/nmtNativeCallStackStorage.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ostream.hpp"
+#include "utilities/rbTree.hpp"
#include "utilities/rbTree.inline.hpp"
#include
@@ -39,7 +40,7 @@
// For example, the state may go from released memory to committed memory,
// or from committed memory of a certain MemTag to committed memory of a different MemTag.
// The set of points is stored in a balanced binary tree for efficient querying and updating.
-class VMATree {
+class VMATree : public CHeapObjBase {
friend class NMTVMATreeTest;
friend class VMTWithVMATreeTest;
// A position in memory.
@@ -65,7 +66,6 @@ private:
static const char* statetype_strings[static_cast(StateType::st_number_of_states)];
public:
- NONCOPYABLE(VMATree);
static const char* statetype_to_string(StateType type) {
assert(type < StateType::st_number_of_states, "must be");
@@ -226,6 +226,11 @@ private:
public:
VMATree() : _tree() {}
+ VMATree(const VMATree& other) : _tree() {
+ bool success = other._tree.copy_into(_tree);
+ assert(success, "VMATree dies on OOM");
+ }
+ VMATree& operator=(VMATree const&) = delete;
struct SingleDiff {
using delta = int64_t;
@@ -329,5 +334,8 @@ public:
_tree.visit_range_in_order(from, to, f);
}
VMARBTree& tree() { return _tree; }
+
+ void clear();
+ bool is_empty();
};
#endif
diff --git a/src/hotspot/share/oops/array.inline.hpp b/src/hotspot/share/oops/array.inline.hpp
index 3fa7fd15fb3..30cf2e38f77 100644
--- a/src/hotspot/share/oops/array.inline.hpp
+++ b/src/hotspot/share/oops/array.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,14 +34,14 @@ template
inline void* Array::operator new(size_t size, ClassLoaderData* loader_data, int length, TRAPS) throw() {
size_t word_size = Array::size(length);
return (void*) Metaspace::allocate(loader_data, word_size,
- MetaspaceObj::array_type(sizeof(T)), false, THREAD);
+ MetaspaceObj::array_type(sizeof(T)), THREAD);
}
template
inline void* Array::operator new(size_t size, ClassLoaderData* loader_data, int length) throw() {
size_t word_size = Array::size(length);
return (void*) Metaspace::allocate(loader_data, word_size,
- MetaspaceObj::array_type(sizeof(T)), false);
+ MetaspaceObj::array_type(sizeof(T)));
}
template
diff --git a/src/hotspot/share/oops/arrayKlass.cpp b/src/hotspot/share/oops/arrayKlass.cpp
index 32a86c7ab24..dc64abd6cd7 100644
--- a/src/hotspot/share/oops/arrayKlass.cpp
+++ b/src/hotspot/share/oops/arrayKlass.cpp
@@ -41,10 +41,6 @@
#include "oops/oop.inline.hpp"
#include "runtime/handles.inline.hpp"
-void* ArrayKlass::operator new(size_t size, ClassLoaderData* loader_data, size_t word_size, TRAPS) throw() {
- return Metaspace::allocate(loader_data, word_size, MetaspaceObj::ClassType, true, THREAD);
-}
-
ArrayKlass::ArrayKlass() {
assert(CDSConfig::is_dumping_static_archive() || CDSConfig::is_using_archive(), "only for CDS");
}
diff --git a/src/hotspot/share/oops/arrayKlass.hpp b/src/hotspot/share/oops/arrayKlass.hpp
index 5bfe46573d3..02d72c3cde8 100644
--- a/src/hotspot/share/oops/arrayKlass.hpp
+++ b/src/hotspot/share/oops/arrayKlass.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -49,8 +49,6 @@ class ArrayKlass: public Klass {
ArrayKlass(Symbol* name, KlassKind kind);
ArrayKlass();
- void* operator new(size_t size, ClassLoaderData* loader_data, size_t word_size, TRAPS) throw();
-
public:
// Testing operation
DEBUG_ONLY(bool is_array_klass_slow() const { return true; })
diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp
index f5e5628c99a..932d1f246ad 100644
--- a/src/hotspot/share/oops/instanceKlass.cpp
+++ b/src/hotspot/share/oops/instanceKlass.cpp
@@ -455,11 +455,6 @@ const char* InstanceKlass::nest_host_error() {
}
}
-void* InstanceKlass::operator new(size_t size, ClassLoaderData* loader_data, size_t word_size,
- bool use_class_space, TRAPS) throw() {
- return Metaspace::allocate(loader_data, word_size, ClassType, use_class_space, THREAD);
-}
-
InstanceKlass* InstanceKlass::allocate_instance_klass(const ClassFileParser& parser, TRAPS) {
const int size = InstanceKlass::size(parser.vtable_size(),
parser.itable_size(),
@@ -472,27 +467,26 @@ InstanceKlass* InstanceKlass::allocate_instance_klass(const ClassFileParser& par
assert(loader_data != nullptr, "invariant");
InstanceKlass* ik;
- const bool use_class_space = UseClassMetaspaceForAllClasses || parser.klass_needs_narrow_id();
// Allocation
if (parser.is_instance_ref_klass()) {
// java.lang.ref.Reference
- ik = new (loader_data, size, use_class_space, THREAD) InstanceRefKlass(parser);
+ ik = new (loader_data, size, THREAD) InstanceRefKlass(parser);
} else if (class_name == vmSymbols::java_lang_Class()) {
// mirror - java.lang.Class
- ik = new (loader_data, size, use_class_space, THREAD) InstanceMirrorKlass(parser);
+ ik = new (loader_data, size, THREAD) InstanceMirrorKlass(parser);
} else if (is_stack_chunk_class(class_name, loader_data)) {
// stack chunk
- ik = new (loader_data, size, use_class_space, THREAD) InstanceStackChunkKlass(parser);
+ ik = new (loader_data, size, THREAD) InstanceStackChunkKlass(parser);
} else if (is_class_loader(class_name, parser)) {
// class loader - java.lang.ClassLoader
- ik = new (loader_data, size, use_class_space, THREAD) InstanceClassLoaderKlass(parser);
+ ik = new (loader_data, size, THREAD) InstanceClassLoaderKlass(parser);
} else {
// normal
- ik = new (loader_data, size, use_class_space, THREAD) InstanceKlass(parser);
+ ik = new (loader_data, size, THREAD) InstanceKlass(parser);
}
- if (ik != nullptr && UseCompressedClassPointers && use_class_space) {
+ if (ik != nullptr && UseCompressedClassPointers) {
assert(CompressedKlassPointers::is_encodable(ik),
"Klass " PTR_FORMAT "needs a narrow Klass ID, but is not encodable", p2i(ik));
}
diff --git a/src/hotspot/share/oops/instanceKlass.hpp b/src/hotspot/share/oops/instanceKlass.hpp
index c2d5e9cc098..a24c38cf259 100644
--- a/src/hotspot/share/oops/instanceKlass.hpp
+++ b/src/hotspot/share/oops/instanceKlass.hpp
@@ -143,8 +143,6 @@ class InstanceKlass: public Klass {
protected:
InstanceKlass(const ClassFileParser& parser, KlassKind kind = Kind, ReferenceType reference_type = REF_NONE);
- void* operator new(size_t size, ClassLoaderData* loader_data, size_t word_size, bool use_class_space, TRAPS) throw();
-
public:
InstanceKlass();
diff --git a/src/hotspot/share/oops/klass.cpp b/src/hotspot/share/oops/klass.cpp
index b6e60b4fa7d..a93875b86a5 100644
--- a/src/hotspot/share/oops/klass.cpp
+++ b/src/hotspot/share/oops/klass.cpp
@@ -279,18 +279,19 @@ static markWord make_prototype(const Klass* kls) {
#ifdef _LP64
if (UseCompactObjectHeaders) {
// With compact object headers, the narrow Klass ID is part of the mark word.
- // We therfore seed the mark word with the narrow Klass ID.
- // Note that only those Klass that can be instantiated have a narrow Klass ID.
- // For those who don't, we leave the klass bits empty and assert if someone
- // tries to use those.
- const narrowKlass nk = CompressedKlassPointers::is_encodable(kls) ?
- CompressedKlassPointers::encode(const_cast(kls)) : 0;
+ // We therefore seed the mark word with the narrow Klass ID.
+ precond(CompressedKlassPointers::is_encodable(kls));
+ const narrowKlass nk = CompressedKlassPointers::encode(const_cast(kls));
prototype = prototype.set_narrow_klass(nk);
}
#endif
return prototype;
}
+void* Klass::operator new(size_t size, ClassLoaderData* loader_data, size_t word_size, TRAPS) throw() {
+ return Metaspace::allocate(loader_data, word_size, MetaspaceObj::ClassType, THREAD);
+}
+
Klass::Klass() : _kind(UnknownKlassKind) {
assert(CDSConfig::is_dumping_static_archive() || CDSConfig::is_using_archive(), "only for cds");
}
@@ -1060,7 +1061,7 @@ void Klass::verify_on(outputStream* st) {
// This can be expensive, but it is worth checking that this klass is actually
// in the CLD graph but not in production.
#ifdef ASSERT
- if (UseCompressedClassPointers && needs_narrow_id()) {
+ if (UseCompressedClassPointers) {
// Stricter checks for both correct alignment and placement
CompressedKlassPointers::check_encodable(this);
} else {
diff --git a/src/hotspot/share/oops/klass.hpp b/src/hotspot/share/oops/klass.hpp
index ad03c1e2ed6..70d9ce3a881 100644
--- a/src/hotspot/share/oops/klass.hpp
+++ b/src/hotspot/share/oops/klass.hpp
@@ -207,6 +207,8 @@ protected:
Klass(KlassKind kind);
Klass();
+ void* operator new(size_t size, ClassLoaderData* loader_data, size_t word_size, TRAPS) throw();
+
public:
int kind() { return _kind; }
@@ -794,10 +796,6 @@ public:
static bool is_valid(Klass* k);
static void on_secondary_supers_verification_failure(Klass* super, Klass* sub, bool linear_result, bool table_result, const char* msg);
-
- // Returns true if this Klass needs to be addressable via narrow Klass ID.
- inline bool needs_narrow_id() const;
-
};
#endif // SHARE_OOPS_KLASS_HPP
diff --git a/src/hotspot/share/oops/klass.inline.hpp b/src/hotspot/share/oops/klass.inline.hpp
index 19d4954ccad..4ac50cbc180 100644
--- a/src/hotspot/share/oops/klass.inline.hpp
+++ b/src/hotspot/share/oops/klass.inline.hpp
@@ -175,13 +175,4 @@ inline bool Klass::search_secondary_supers(Klass *k) const {
return result;
}
-// Returns true if this Klass needs to be addressable via narrow Klass ID.
-inline bool Klass::needs_narrow_id() const {
- // Classes that are never instantiated need no narrow Klass Id, since the
- // only point of having a narrow id is to put it into an object header. Keeping
- // never instantiated classes out of class space lessens the class space pressure.
- // For more details, see JDK-8338526.
- // Note: don't call this function before access flags are initialized.
- return UseClassMetaspaceForAllClasses || (!is_abstract() && !is_interface());
-}
#endif // SHARE_OOPS_KLASS_INLINE_HPP
diff --git a/src/hotspot/share/oops/oop.cpp b/src/hotspot/share/oops/oop.cpp
index 51480c68c22..f874a39bf31 100644
--- a/src/hotspot/share/oops/oop.cpp
+++ b/src/hotspot/share/oops/oop.cpp
@@ -87,7 +87,16 @@ void oopDesc::print_value_on(outputStream* st) const {
java_lang_String::print(obj, st);
print_address_on(st);
} else {
- klass()->oop_print_value_on(obj, st);
+ Klass* k = klass_without_asserts();
+ if (k == nullptr) {
+ st->print("null klass");
+ } else if (!Metaspace::contains(k)) {
+ st->print("klass not in Metaspace");
+ } else if (!k->is_klass()) {
+ st->print("klass not a Klass");
+ } else {
+ k->oop_print_value_on(obj, st);
+ }
}
}
diff --git a/src/hotspot/share/opto/divnode.cpp b/src/hotspot/share/opto/divnode.cpp
index 0d1337909fb..213f2e1e9a8 100644
--- a/src/hotspot/share/opto/divnode.cpp
+++ b/src/hotspot/share/opto/divnode.cpp
@@ -1198,44 +1198,76 @@ Node *ModINode::Ideal(PhaseGVN *phase, bool can_reshape) {
}
//------------------------------Value------------------------------------------
-const Type* ModINode::Value(PhaseGVN* phase) const {
+static const Type* mod_value(const PhaseGVN* phase, const Node* in1, const Node* in2, const BasicType bt) {
+ assert(bt == T_INT || bt == T_LONG, "unexpected basic type");
// Either input is TOP ==> the result is TOP
- const Type *t1 = phase->type( in(1) );
- const Type *t2 = phase->type( in(2) );
- if( t1 == Type::TOP ) return Type::TOP;
- if( t2 == Type::TOP ) return Type::TOP;
+ const Type* t1 = phase->type(in1);
+ const Type* t2 = phase->type(in2);
+ if (t1 == Type::TOP) { return Type::TOP; }
+ if (t2 == Type::TOP) { return Type::TOP; }
// We always generate the dynamic check for 0.
// 0 MOD X is 0
- if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+ if (t1 == TypeInteger::zero(bt)) { return t1; }
+
// X MOD X is 0
- if (in(1) == in(2)) {
- return TypeInt::ZERO;
+ if (in1 == in2) {
+ return TypeInteger::zero(bt);
}
- // Either input is BOTTOM ==> the result is the local BOTTOM
- const Type *bot = bottom_type();
- if( (t1 == bot) || (t2 == bot) ||
- (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
- return bot;
-
- const TypeInt *i1 = t1->is_int();
- const TypeInt *i2 = t2->is_int();
- if( !i1->is_con() || !i2->is_con() ) {
- if( i1->_lo >= 0 && i2->_lo >= 0 )
- return TypeInt::POS;
- // If both numbers are not constants, we know little.
- return TypeInt::INT;
- }
// Mod by zero? Throw exception at runtime!
- if( !i2->get_con() ) return TypeInt::POS;
+ if (t2 == TypeInteger::zero(bt)) {
+ return Type::TOP;
+ }
- // We must be modulo'ing 2 float constants.
- // Check for min_jint % '-1', result is defined to be '0'.
- if( i1->get_con() == min_jint && i2->get_con() == -1 )
- return TypeInt::ZERO;
+ const TypeInteger* i1 = t1->is_integer(bt);
+ const TypeInteger* i2 = t2->is_integer(bt);
+ if (i1->is_con() && i2->is_con()) {
+ // We must be modulo'ing 2 int constants.
+ // Special case: min_jlong % '-1' is UB, and e.g., x86 triggers a division error.
+ // Any value % -1 is 0, so we can return 0 and avoid that scenario.
+ if (i2->get_con_as_long(bt) == -1) {
+ return TypeInteger::zero(bt);
+ }
+ return TypeInteger::make(i1->get_con_as_long(bt) % i2->get_con_as_long(bt), bt);
+ }
+ // We checked that t2 is not the zero constant. Hence, at least i2->_lo or i2->_hi must be non-zero,
+ // and hence its absoute value is bigger than zero. Hence, the magnitude of the divisor (i.e. the
+ // largest absolute value for any value in i2) must be in the range [1, 2^31] or [1, 2^63], depending
+ // on the BasicType.
+ julong divisor_magnitude = MAX2(g_uabs(i2->lo_as_long()), g_uabs(i2->hi_as_long()));
+ // JVMS lrem bytecode: "the magnitude of the result is always less than the magnitude of the divisor"
+ // "less than" means we can subtract 1 to get an inclusive upper bound in [0, 2^31-1] or [0, 2^63-1], respectively
+ jlong hi = static_cast(divisor_magnitude - 1);
+ jlong lo = -hi;
+ // JVMS lrem bytecode: "the result of the remainder operation can be negative only if the dividend
+ // is negative and can be positive only if the dividend is positive"
+ // Note that with a dividend with bounds e.g. lo == -4 and hi == -1 can still result in values
+ // below lo; i.e., -3 % 3 == 0.
+ // That means we cannot restrict the bound that is closer to zero beyond knowing its sign (or zero).
+ if (i1->hi_as_long() <= 0) {
+ // all dividends are not positive, so the result is not positive
+ hi = 0;
+ // if the dividend is known to be closer to zero, use that as a lower limit
+ lo = MAX2(lo, i1->lo_as_long());
+ } else if (i1->lo_as_long() >= 0) {
+ // all dividends are not negative, so the result is not negative
+ lo = 0;
+ // if the dividend is known to be closer to zero, use that as an upper limit
+ hi = MIN2(hi, i1->hi_as_long());
+ } else {
+ // Mixed signs, so we don't know the sign of the result, but the result is
+ // either the dividend itself or a value closer to zero than the dividend,
+ // and it is closer to zero than the divisor.
+ // As we know i1->_lo < 0 and i1->_hi > 0, we can use these bounds directly.
+ lo = MAX2(lo, i1->lo_as_long());
+ hi = MIN2(hi, i1->hi_as_long());
+ }
+ return TypeInteger::make(lo, hi, MAX2(i1->_widen, i2->_widen), bt);
+}
- return TypeInt::make( i1->get_con() % i2->get_con() );
+const Type* ModINode::Value(PhaseGVN* phase) const {
+ return mod_value(phase, in(1), in(2), T_INT);
}
//=============================================================================
@@ -1464,43 +1496,7 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
//------------------------------Value------------------------------------------
const Type* ModLNode::Value(PhaseGVN* phase) const {
- // Either input is TOP ==> the result is TOP
- const Type *t1 = phase->type( in(1) );
- const Type *t2 = phase->type( in(2) );
- if( t1 == Type::TOP ) return Type::TOP;
- if( t2 == Type::TOP ) return Type::TOP;
-
- // We always generate the dynamic check for 0.
- // 0 MOD X is 0
- if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
- // X MOD X is 0
- if (in(1) == in(2)) {
- return TypeLong::ZERO;
- }
-
- // Either input is BOTTOM ==> the result is the local BOTTOM
- const Type *bot = bottom_type();
- if( (t1 == bot) || (t2 == bot) ||
- (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
- return bot;
-
- const TypeLong *i1 = t1->is_long();
- const TypeLong *i2 = t2->is_long();
- if( !i1->is_con() || !i2->is_con() ) {
- if( i1->_lo >= CONST64(0) && i2->_lo >= CONST64(0) )
- return TypeLong::POS;
- // If both numbers are not constants, we know little.
- return TypeLong::LONG;
- }
- // Mod by zero? Throw exception at runtime!
- if( !i2->get_con() ) return TypeLong::POS;
-
- // We must be modulo'ing 2 float constants.
- // Check for min_jint % '-1', result is defined to be '0'.
- if( i1->get_con() == min_jlong && i2->get_con() == -1 )
- return TypeLong::ZERO;
-
- return TypeLong::make( i1->get_con() % i2->get_con() );
+ return mod_value(phase, in(1), in(2), T_LONG);
}
Node *UModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp
index eff482350b6..e6a593770b4 100644
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@@ -3129,6 +3129,14 @@ void ConnectionGraph::find_scalar_replaceable_allocs(GrowableArrayis_LocalVar()) {
+ Node* phi = use->ideal_node();
+ if (phi->Opcode() == Op_Phi && reducible_merges.member(phi) && !can_reduce_phi(phi->as_Phi())) {
+ set_not_scalar_replaceable(jobj NOT_PRODUCT(COMMA "is merged in a non-reducible phi"));
+ reducible_merges.yank(phi);
+ found_nsr_alloc = true;
+ break;
+ }
}
}
}
diff --git a/src/hotspot/share/opto/idealGraphPrinter.cpp b/src/hotspot/share/opto/idealGraphPrinter.cpp
index cbf972166c2..19eaf1b369e 100644
--- a/src/hotspot/share/opto/idealGraphPrinter.cpp
+++ b/src/hotspot/share/opto/idealGraphPrinter.cpp
@@ -448,6 +448,11 @@ void IdealGraphPrinter::visit_node(Node* n, bool edges) {
}
}
}
+ if (n->adr_type() != nullptr) {
+ stringStream adr_type_stream;
+ n->adr_type()->dump_on(&adr_type_stream);
+ print_prop("adr_type", adr_type_stream.freeze());
+ }
if (C->cfg() != nullptr) {
Block* block = C->cfg()->get_block_for_node(node);
diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp
index 5ecc038954d..f5f7a4231f2 100644
--- a/src/hotspot/share/opto/node.cpp
+++ b/src/hotspot/share/opto/node.cpp
@@ -1216,6 +1216,9 @@ bool Node::has_special_unique_user() const {
} else if ((is_IfFalse() || is_IfTrue()) && n->is_If()) {
// See IfNode::fold_compares
return true;
+ } else if (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) {
+ // Condition for XorVMask(VectorMaskCmp(x,y,cond), MaskAll(true)) ==> VectorMaskCmp(x,y,ncond)
+ return true;
} else {
return false;
}
diff --git a/src/hotspot/share/opto/parse2.cpp b/src/hotspot/share/opto/parse2.cpp
index 04b6e49b620..8b44d8b3491 100644
--- a/src/hotspot/share/opto/parse2.cpp
+++ b/src/hotspot/share/opto/parse2.cpp
@@ -2779,7 +2779,7 @@ void Parse::do_one_bytecode() {
if (C->should_print_igv(perBytecode)) {
IdealGraphPrinter* printer = C->igv_printer();
char buffer[256];
- jio_snprintf(buffer, sizeof(buffer), "Bytecode %d: %s", bci(), Bytecodes::name(bc()));
+ jio_snprintf(buffer, sizeof(buffer), "Bytecode %d: %s, map: %d", bci(), Bytecodes::name(bc()), map() == nullptr ? -1 : map()->_idx);
bool old = printer->traverse_outs();
printer->set_traverse_outs(true);
printer->print_graph(buffer);
diff --git a/src/hotspot/share/opto/subnode.hpp b/src/hotspot/share/opto/subnode.hpp
index 57a501ecbc3..5acf31b45c4 100644
--- a/src/hotspot/share/opto/subnode.hpp
+++ b/src/hotspot/share/opto/subnode.hpp
@@ -328,7 +328,9 @@ struct BoolTest {
// a simple char array where each element is the ASCII version of a 'mask'
// enum from above.
mask commute( ) const { return mask("032147658"[_test]-'0'); }
- mask negate( ) const { return mask(_test^4); }
+ mask negate( ) const { return negate_mask(_test); }
+ // Return the negative mask for the given mask, for both signed and unsigned comparison.
+ static mask negate_mask(mask btm) { return mask(btm ^ 4); }
bool is_canonical( ) const { return (_test == BoolTest::ne || _test == BoolTest::lt || _test == BoolTest::le || _test == BoolTest::overflow); }
bool is_less( ) const { return _test == BoolTest::lt || _test == BoolTest::le; }
bool is_greater( ) const { return _test == BoolTest::gt || _test == BoolTest::ge; }
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index 2153a12c402..ae9ef552df4 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -2268,6 +2268,99 @@ Node* OrVNode::Identity(PhaseGVN* phase) {
return redundant_logical_identity(this);
}
+// Returns whether (XorV (VectorMaskCmp) -1) can be optimized by negating the
+// comparison operation.
+bool VectorMaskCmpNode::predicate_can_be_negated() {
+ switch (_predicate) {
+ case BoolTest::eq:
+ case BoolTest::ne:
+ // eq and ne also apply to floating-point special values like NaN and infinities.
+ return true;
+ case BoolTest::le:
+ case BoolTest::ge:
+ case BoolTest::lt:
+ case BoolTest::gt:
+ case BoolTest::ule:
+ case BoolTest::uge:
+ case BoolTest::ult:
+ case BoolTest::ugt: {
+ BasicType bt = vect_type()->element_basic_type();
+ // For float and double, we don't know if either comparison operand is a
+ // NaN, NaN {le|ge|lt|gt} anything is false, resulting in inconsistent
+ // results before and after negation.
+ return is_integral_type(bt);
+ }
+ default:
+ return false;
+ }
+}
+
+// This function transforms the following patterns:
+//
+// For integer types:
+// (XorV (VectorMaskCmp src1 src2 cond) (Replicate -1))
+// => (VectorMaskCmp src1 src2 ncond)
+// (XorVMask (VectorMaskCmp src1 src2 cond) (MaskAll m1))
+// => (VectorMaskCmp src1 src2 ncond)
+// (XorV (VectorMaskCast (VectorMaskCmp src1 src2 cond)) (Replicate -1))
+// => (VectorMaskCast (VectorMaskCmp src1 src2 ncond))
+// (XorVMask (VectorMaskCast (VectorMaskCmp src1 src2 cond)) (MaskAll m1))
+// => (VectorMaskCast (VectorMaskCmp src1 src2 ncond))
+// cond can be eq, ne, le, ge, lt, gt, ule, uge, ult and ugt.
+// ncond is the negative comparison of cond.
+//
+// For float and double types:
+// (XorV (VectorMaskCast (VectorMaskCmp src1 src2 cond)) (Replicate -1))
+// => (VectorMaskCast (VectorMaskCmp src1 src2 ncond))
+// (XorVMask (VectorMaskCast (VectorMaskCmp src1 src2 cond)) (MaskAll m1))
+// => (VectorMaskCast (VectorMaskCmp src1 src2 ncond))
+// cond can be eq or ne.
+Node* XorVNode::Ideal_XorV_VectorMaskCmp(PhaseGVN* phase, bool can_reshape) {
+ Node* in1 = in(1);
+ Node* in2 = in(2);
+ // Transformations for predicated vectors are not supported for now.
+ if (is_predicated_vector() ||
+ in1->is_predicated_vector() ||
+ in2->is_predicated_vector()) {
+ return nullptr;
+ }
+
+ // XorV/XorVMask is commutative, swap VectorMaskCmp/VectorMaskCast to in1.
+ if (VectorNode::is_all_ones_vector(in1)) {
+ swap(in1, in2);
+ }
+
+ bool with_vector_mask_cast = false;
+ // Required conditions:
+ // 1. VectorMaskCast and VectorMaskCmp should only have a single use,
+ // otherwise the optimization may be unprofitable.
+ // 2. The predicate of VectorMaskCmp should be negatable.
+ // 3. The second input should be an all true vector mask.
+ if (in1->Opcode() == Op_VectorMaskCast) {
+ if (in1->outcnt() != 1) {
+ return nullptr;
+ }
+ with_vector_mask_cast = true;
+ in1 = in1->in(1);
+ }
+ if (in1->Opcode() != Op_VectorMaskCmp ||
+ in1->outcnt() != 1 ||
+ !in1->as_VectorMaskCmp()->predicate_can_be_negated() ||
+ !VectorNode::is_all_ones_vector(in2)) {
+ return nullptr;
+ }
+
+ BoolTest::mask neg_cond = BoolTest::negate_mask((in1->as_VectorMaskCmp())->get_predicate());
+ ConINode* predicate_node = phase->intcon(neg_cond);
+ const TypeVect* vt = in1->as_Vector()->vect_type();
+ Node* res = new VectorMaskCmpNode(neg_cond, in1->in(1), in1->in(2), predicate_node, vt);
+ if (with_vector_mask_cast) {
+ // We optimized out a VectorMaskCast, regenerate one to ensure type correctness.
+ res = new VectorMaskCastNode(phase->transform(res), vect_type());
+ }
+ return res;
+}
+
Node* XorVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
// (XorV src src) => (Replicate zero)
// (XorVMask src src) => (MaskAll zero)
@@ -2281,6 +2374,11 @@ Node* XorVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* zero = phase->transform(phase->zerocon(bt));
return VectorNode::scalar2vector(zero, length(), bt, bottom_type()->isa_vectmask() != nullptr);
}
+
+ Node* res = Ideal_XorV_VectorMaskCmp(phase, can_reshape);
+ if (res != nullptr) {
+ return res;
+ }
return VectorNode::Ideal(phase, can_reshape);
}
diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp
index 463680d0a52..53778b61d0e 100644
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@@ -1013,6 +1013,7 @@ class XorVNode : public VectorNode {
XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+ Node* Ideal_XorV_VectorMaskCmp(PhaseGVN* phase, bool can_reshape);
};
//------------------------------XorReductionVNode--------------------------------------
@@ -1676,6 +1677,7 @@ class VectorMaskCmpNode : public VectorNode {
virtual bool cmp( const Node &n ) const {
return VectorNode::cmp(n) && _predicate == ((VectorMaskCmpNode&)n)._predicate;
}
+ bool predicate_can_be_negated();
BoolTest::mask get_predicate() { return _predicate; }
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
diff --git a/src/hotspot/share/prims/jvm.cpp b/src/hotspot/share/prims/jvm.cpp
index 2cbe764994d..ade9b45e2eb 100644
--- a/src/hotspot/share/prims/jvm.cpp
+++ b/src/hotspot/share/prims/jvm.cpp
@@ -2253,12 +2253,26 @@ JVM_END
// Reflection for the verifier /////////////////////////////////////////////////////////////////
// RedefineClasses support: bug 6214132 caused verification to fail.
-// All functions from this section should call the jvmtiThreadSate function:
-// Klass* class_to_verify_considering_redefinition(Klass* klass).
-// The function returns a Klass* of the _scratch_class if the verifier
-// was invoked in the middle of the class redefinition.
-// Otherwise it returns its argument value which is the _the_class Klass*.
-// Please, refer to the description in the jvmtiThreadState.hpp.
+// All functions from this section, unless noted otherwise, should call the functions
+// get_klass_considering_redefinition(), or
+// get_instance_klass_considering_redefinition()
+// These functions return JvmtiThreadState::_scratch_class if the verifier
+// was invoked in the middle of the redefinition of cls.
+// See jvmtiThreadState.hpp for details.
+
+inline Klass* get_klass_considering_redefinition(jclass cls, JavaThread* thread) {
+ Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
+ if (k->is_instance_klass()) {
+ return JvmtiThreadState::class_to_verify_considering_redefinition(InstanceKlass::cast(k), thread);
+ } else {
+ return k;
+ }
+}
+
+inline InstanceKlass* get_instance_klass_considering_redefinition(jclass cls, JavaThread* thread) {
+ InstanceKlass* ik = java_lang_Class::as_InstanceKlass(JNIHandles::resolve_non_null(cls));
+ return JvmtiThreadState::class_to_verify_considering_redefinition(ik, thread);
+}
JVM_ENTRY(jboolean, JVM_IsInterface(JNIEnv *env, jclass cls))
oop mirror = JNIHandles::resolve_non_null(cls);
@@ -2266,26 +2280,24 @@ JVM_ENTRY(jboolean, JVM_IsInterface(JNIEnv *env, jclass cls))
return JNI_FALSE;
}
Klass* k = java_lang_Class::as_Klass(mirror);
- // This isn't necessary since answer is the same since redefinition
+ // This isn't necessary since answer is the same because redefinition
// has already checked this matches for the scratch class.
- // k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
+ // k = get_klass_considering_redefinition(cls, thread)
jboolean result = k->is_interface();
assert(!result || k->is_instance_klass(),
"all interfaces are instance types");
return result;
JVM_END
-
JVM_ENTRY(const char*, JVM_GetClassNameUTF(JNIEnv *env, jclass cls))
+ // No need to call get_klass_considering_redefinition() as redefinition cannot change a class's name.
Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
return k->name()->as_utf8();
JVM_END
JVM_ENTRY(void, JVM_GetClassCPTypes(JNIEnv *env, jclass cls, unsigned char *types))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
+ Klass* k = get_klass_considering_redefinition(cls, thread);
// types will have length zero if this is not an InstanceKlass
// (length is determined by call to JVM_GetClassCPEntriesCount)
if (k->is_instance_klass()) {
@@ -2299,22 +2311,19 @@ JVM_END
JVM_ENTRY(jint, JVM_GetClassCPEntriesCount(JNIEnv *env, jclass cls))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
+ Klass* k = get_klass_considering_redefinition(cls, thread);
return (!k->is_instance_klass()) ? 0 : InstanceKlass::cast(k)->constants()->length();
JVM_END
JVM_ENTRY(jint, JVM_GetClassFieldsCount(JNIEnv *env, jclass cls))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
+ Klass* k = get_klass_considering_redefinition(cls, thread);
return (!k->is_instance_klass()) ? 0 : InstanceKlass::cast(k)->java_fields_count();
JVM_END
JVM_ENTRY(jint, JVM_GetClassMethodsCount(JNIEnv *env, jclass cls))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
+ Klass* k = get_klass_considering_redefinition(cls, thread);
return (!k->is_instance_klass()) ? 0 : InstanceKlass::cast(k)->methods()->length();
JVM_END
@@ -2325,9 +2334,8 @@ JVM_END
// by the results of JVM_GetClass{Fields,Methods}Count, which return
// zero for arrays.
JVM_ENTRY(void, JVM_GetMethodIxExceptionIndexes(JNIEnv *env, jclass cls, jint method_index, unsigned short *exceptions))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
int length = method->checked_exceptions_length();
if (length > 0) {
CheckedExceptionElement* table= method->checked_exceptions_start();
@@ -2339,33 +2347,29 @@ JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxExceptionsCount(JNIEnv *env, jclass cls, jint method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->checked_exceptions_length();
JVM_END
JVM_ENTRY(void, JVM_GetMethodIxByteCode(JNIEnv *env, jclass cls, jint method_index, unsigned char *code))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
memcpy(code, method->code_base(), method->code_size());
JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxByteCodeLength(JNIEnv *env, jclass cls, jint method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->code_size();
JVM_END
JVM_ENTRY(void, JVM_GetMethodIxExceptionTableEntry(JNIEnv *env, jclass cls, jint method_index, jint entry_index, JVM_ExceptionTableEntryType *entry))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
ExceptionTable extable(method);
entry->start_pc = extable.start_pc(entry_index);
entry->end_pc = extable.end_pc(entry_index);
@@ -2375,81 +2379,71 @@ JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxExceptionTableLength(JNIEnv *env, jclass cls, int method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->exception_table_length();
JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxModifiers(JNIEnv *env, jclass cls, int method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->access_flags().as_method_flags();
JVM_END
JVM_ENTRY(jint, JVM_GetFieldIxModifiers(JNIEnv *env, jclass cls, int field_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- return InstanceKlass::cast(k)->field_access_flags(field_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ return ik->field_access_flags(field_index);
JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxLocalsCount(JNIEnv *env, jclass cls, int method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->max_locals();
JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxArgsSize(JNIEnv *env, jclass cls, int method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->size_of_parameters();
JVM_END
JVM_ENTRY(jint, JVM_GetMethodIxMaxStack(JNIEnv *env, jclass cls, int method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->verifier_max_stack();
JVM_END
JVM_ENTRY(jboolean, JVM_IsConstructorIx(JNIEnv *env, jclass cls, int method_index))
ResourceMark rm(THREAD);
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->name() == vmSymbols::object_initializer_name();
JVM_END
JVM_ENTRY(jboolean, JVM_IsVMGeneratedMethodIx(JNIEnv *env, jclass cls, int method_index))
ResourceMark rm(THREAD);
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->is_overpass();
JVM_END
JVM_ENTRY(const char*, JVM_GetMethodIxNameUTF(JNIEnv *env, jclass cls, jint method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->name()->as_utf8();
JVM_END
JVM_ENTRY(const char*, JVM_GetMethodIxSignatureUTF(JNIEnv *env, jclass cls, jint method_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- Method* method = InstanceKlass::cast(k)->methods()->at(method_index);
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ Method* method = ik->methods()->at(method_index);
return method->signature()->as_utf8();
JVM_END
@@ -2462,9 +2456,8 @@ JVM_END
* constant pool, so we must use cp->uncached_x methods when appropriate.
*/
JVM_ENTRY(const char*, JVM_GetCPFieldNameUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_Fieldref:
return cp->uncached_name_ref_at(cp_index)->as_utf8();
@@ -2477,9 +2470,8 @@ JVM_END
JVM_ENTRY(const char*, JVM_GetCPMethodNameUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_InterfaceMethodref:
case JVM_CONSTANT_Methodref:
@@ -2493,9 +2485,8 @@ JVM_END
JVM_ENTRY(const char*, JVM_GetCPMethodSignatureUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_InterfaceMethodref:
case JVM_CONSTANT_Methodref:
@@ -2509,9 +2500,8 @@ JVM_END
JVM_ENTRY(const char*, JVM_GetCPFieldSignatureUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_Fieldref:
return cp->uncached_signature_ref_at(cp_index)->as_utf8();
@@ -2524,18 +2514,16 @@ JVM_END
JVM_ENTRY(const char*, JVM_GetCPClassNameUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
Symbol* classname = cp->klass_name_at(cp_index);
return classname->as_utf8();
JVM_END
JVM_ENTRY(const char*, JVM_GetCPFieldClassNameUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_Fieldref: {
int class_index = cp->uncached_klass_ref_index_at(cp_index);
@@ -2551,9 +2539,8 @@ JVM_END
JVM_ENTRY(const char*, JVM_GetCPMethodClassNameUTF(JNIEnv *env, jclass cls, jint cp_index))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_Methodref:
case JVM_CONSTANT_InterfaceMethodref: {
@@ -2570,18 +2557,15 @@ JVM_END
JVM_ENTRY(jint, JVM_GetCPFieldModifiers(JNIEnv *env, jclass cls, int cp_index, jclass called_cls))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- Klass* k_called = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(called_cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- k_called = JvmtiThreadState::class_to_verify_considering_redefinition(k_called, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
- ConstantPool* cp_called = InstanceKlass::cast(k_called)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ InstanceKlass* ik_called = get_instance_klass_considering_redefinition(called_cls, thread);
+ ConstantPool* cp = ik->constants();
+ ConstantPool* cp_called = ik_called->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_Fieldref: {
Symbol* name = cp->uncached_name_ref_at(cp_index);
Symbol* signature = cp->uncached_signature_ref_at(cp_index);
- InstanceKlass* ik = InstanceKlass::cast(k_called);
- for (JavaFieldStream fs(ik); !fs.done(); fs.next()) {
+ for (JavaFieldStream fs(ik_called); !fs.done(); fs.next()) {
if (fs.name() == name && fs.signature() == signature) {
return fs.access_flags().as_field_flags();
}
@@ -2597,17 +2581,15 @@ JVM_END
JVM_ENTRY(jint, JVM_GetCPMethodModifiers(JNIEnv *env, jclass cls, int cp_index, jclass called_cls))
- Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
- Klass* k_called = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(called_cls));
- k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
- k_called = JvmtiThreadState::class_to_verify_considering_redefinition(k_called, thread);
- ConstantPool* cp = InstanceKlass::cast(k)->constants();
+ InstanceKlass* ik = get_instance_klass_considering_redefinition(cls, thread);
+ InstanceKlass* ik_called = get_instance_klass_considering_redefinition(called_cls, thread);
+ ConstantPool* cp = ik->constants();
switch (cp->tag_at(cp_index).value()) {
case JVM_CONSTANT_Methodref:
case JVM_CONSTANT_InterfaceMethodref: {
Symbol* name = cp->uncached_name_ref_at(cp_index);
Symbol* signature = cp->uncached_signature_ref_at(cp_index);
- Array* methods = InstanceKlass::cast(k_called)->methods();
+ Array* methods = ik_called->methods();
int methods_count = methods->length();
for (int i = 0; i < methods_count; i++) {
Method* method = methods->at(i);
diff --git a/src/hotspot/share/prims/jvmtiAgent.cpp b/src/hotspot/share/prims/jvmtiAgent.cpp
index 16a47042a69..66cb68b44b0 100644
--- a/src/hotspot/share/prims/jvmtiAgent.cpp
+++ b/src/hotspot/share/prims/jvmtiAgent.cpp
@@ -576,25 +576,14 @@ static bool invoke_Agent_OnAttach(JvmtiAgent* agent, outputStream* st) {
}
#if INCLUDE_CDS
-// CDS dumping does not support native JVMTI agent.
-// CDS dumping supports Java agent if the AllowArchivingWithJavaAgent diagnostic option is specified.
static void check_cds_dump(JvmtiAgent* agent) {
if (CDSConfig::new_aot_flags_used()) { // JEP 483
// Agents are allowed with -XX:AOTMode=record and -XX:AOTMode=on/auto.
- // Agents are completely disabled when -XX:AOTMode=create
+ // Agents are completely disabled when -XX:AOTMode=create (see cdsConfig.cpp)
assert(!CDSConfig::is_dumping_final_static_archive(), "agents should have been disabled with -XX:AOTMode=create");
- return;
- }
-
- // This is classic CDS limitations -- we disallow agents by default. They can be used
- // with -XX:+AllowArchivingWithJavaAgent, but that should be used for diagnostic purposes only.
- assert(agent != nullptr, "invariant");
- if (!agent->is_instrument_lib()) {
- vm_exit_during_cds_dumping("CDS dumping does not support native JVMTI agent, name", agent->name());
- }
- if (!AllowArchivingWithJavaAgent) {
- vm_exit_during_cds_dumping(
- "Must enable AllowArchivingWithJavaAgent in order to run Java agent during CDS dumping");
+ } else if (CDSConfig::is_dumping_classic_static_archive() || CDSConfig::is_dumping_dynamic_archive()) {
+ // Classic CDS (static or dynamic dump). Disallow agents.
+ vm_exit_during_cds_dumping("JVMTI agents are not allowed when dumping CDS archives");
}
}
#endif // INCLUDE_CDS
diff --git a/src/hotspot/share/prims/jvmtiEnv.cpp b/src/hotspot/share/prims/jvmtiEnv.cpp
index 3eb507ba5e3..5642cd9ff8f 100644
--- a/src/hotspot/share/prims/jvmtiEnv.cpp
+++ b/src/hotspot/share/prims/jvmtiEnv.cpp
@@ -38,6 +38,7 @@
#include "memory/allocation.hpp"
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
+#include "oops/fieldStreams.inline.hpp"
#include "oops/instanceKlass.hpp"
#include "oops/klass.inline.hpp"
#include "oops/objArrayOop.inline.hpp"
@@ -68,7 +69,6 @@
#include "runtime/objectMonitor.inline.hpp"
#include "runtime/os.hpp"
#include "runtime/osThread.hpp"
-#include "runtime/reflectionUtils.hpp"
#include "runtime/signature.hpp"
#include "runtime/threadHeapSampler.hpp"
#include "runtime/threads.hpp"
@@ -2841,9 +2841,9 @@ JvmtiEnv::GetClassFields(oop k_mirror, jint* field_count_ptr, jfieldID** fields_
InstanceKlass* ik = InstanceKlass::cast(k);
- FilteredJavaFieldStream flds(ik);
+ JavaFieldStream flds(ik);
- int result_count = flds.field_count();
+ int result_count = ik->java_fields_count();
// Allocate the result and fill it in.
jfieldID* result_list = (jfieldID*)jvmtiMalloc(result_count * sizeof(jfieldID));
diff --git a/src/hotspot/share/prims/jvmtiRedefineClasses.cpp b/src/hotspot/share/prims/jvmtiRedefineClasses.cpp
index 4d841592501..74192d724f6 100644
--- a/src/hotspot/share/prims/jvmtiRedefineClasses.cpp
+++ b/src/hotspot/share/prims/jvmtiRedefineClasses.cpp
@@ -99,7 +99,7 @@ VM_RedefineClasses::VM_RedefineClasses(jint class_count,
static inline InstanceKlass* get_ik(jclass def) {
oop mirror = JNIHandles::resolve_non_null(def);
- return InstanceKlass::cast(java_lang_Class::as_Klass(mirror));
+ return java_lang_Class::as_InstanceKlass(mirror);
}
// If any of the classes are being redefined, wait
@@ -1310,12 +1310,12 @@ int VM_RedefineClasses::find_new_operand_index(int old_index) {
class RedefineVerifyMark : public StackObj {
private:
JvmtiThreadState* _state;
- Klass* _scratch_class;
+ InstanceKlass* _scratch_class;
OopHandle _scratch_mirror;
public:
- RedefineVerifyMark(Klass* the_class, Klass* scratch_class,
+ RedefineVerifyMark(InstanceKlass* the_class, InstanceKlass* scratch_class,
JvmtiThreadState* state) : _state(state), _scratch_class(scratch_class)
{
_state->set_class_versions_map(the_class, scratch_class);
diff --git a/src/hotspot/share/prims/jvmtiTagMap.cpp b/src/hotspot/share/prims/jvmtiTagMap.cpp
index 4febb4f3125..a69c7cb7142 100644
--- a/src/hotspot/share/prims/jvmtiTagMap.cpp
+++ b/src/hotspot/share/prims/jvmtiTagMap.cpp
@@ -36,6 +36,7 @@
#include "oops/access.inline.hpp"
#include "oops/arrayOop.hpp"
#include "oops/constantPool.inline.hpp"
+#include "oops/fieldStreams.inline.hpp"
#include "oops/instanceMirrorKlass.hpp"
#include "oops/klass.inline.hpp"
#include "oops/objArrayKlass.hpp"
@@ -58,7 +59,6 @@
#include "runtime/jniHandles.inline.hpp"
#include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp"
-#include "runtime/reflectionUtils.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/threadSMR.hpp"
#include "runtime/timerTrace.hpp"
@@ -429,8 +429,8 @@ int ClassFieldMap::interfaces_field_count(InstanceKlass* ik) {
const Array* interfaces = ik->transitive_interfaces();
int count = 0;
for (int i = 0; i < interfaces->length(); i++) {
- FilteredJavaFieldStream fld(interfaces->at(i));
- count += fld.field_count();
+ count += interfaces->at(i)->java_fields_count();
+
}
return count;
}
@@ -452,11 +452,10 @@ ClassFieldMap* ClassFieldMap::create_map_of_static_fields(Klass* k) {
// Need to calculate start index of this class fields: number of fields in all interfaces and superclasses.
int index = interfaces_field_count(ik);
for (InstanceKlass* super_klass = ik->super(); super_klass != nullptr; super_klass = super_klass->super()) {
- FilteredJavaFieldStream super_fld(super_klass);
- index += super_fld.field_count();
+ index += super_klass->java_fields_count();
}
- for (FilteredJavaFieldStream fld(ik); !fld.done(); fld.next(), index++) {
+ for (JavaFieldStream fld(ik); !fld.done(); fld.next(), index++) {
// ignore instance fields
if (!fld.access_flags().is_static()) {
continue;
@@ -479,13 +478,12 @@ ClassFieldMap* ClassFieldMap::create_map_of_instance_fields(oop obj) {
// fields of the superclasses are reported first, so need to know total field number to calculate field indices
int total_field_number = interfaces_field_count(ik);
for (InstanceKlass* klass = ik; klass != nullptr; klass = klass->super()) {
- FilteredJavaFieldStream fld(klass);
- total_field_number += fld.field_count();
+ total_field_number += klass->java_fields_count();
}
for (InstanceKlass* klass = ik; klass != nullptr; klass = klass->super()) {
- FilteredJavaFieldStream fld(klass);
- int start_index = total_field_number - fld.field_count();
+ JavaFieldStream fld(klass);
+ int start_index = total_field_number - klass->java_fields_count();
for (int index = 0; !fld.done(); fld.next(), index++) {
// ignore static fields
if (fld.access_flags().is_static()) {
diff --git a/src/hotspot/share/prims/jvmtiThreadState.cpp b/src/hotspot/share/prims/jvmtiThreadState.cpp
index 75b3a0f0157..00a48dec111 100644
--- a/src/hotspot/share/prims/jvmtiThreadState.cpp
+++ b/src/hotspot/share/prims/jvmtiThreadState.cpp
@@ -714,8 +714,7 @@ JvmtiVTSuspender::register_all_vthreads_resume() {
}
void
-JvmtiVTSuspender::register_vthread_suspend(oop vt) {
- int64_t id = java_lang_Thread::thread_id(vt);
+JvmtiVTSuspender::register_vthread_suspend(int64_t id) {
MutexLocker ml(JvmtiVThreadSuspend_lock, Mutex::_no_safepoint_check_flag);
if (_SR_mode == SR_all) {
@@ -730,6 +729,12 @@ JvmtiVTSuspender::register_vthread_suspend(oop vt) {
}
}
+void
+JvmtiVTSuspender::register_vthread_suspend(oop vt) {
+ int64_t id = java_lang_Thread::thread_id(vt);
+ register_vthread_suspend(id);
+}
+
void
JvmtiVTSuspender::register_vthread_resume(oop vt) {
int64_t id = java_lang_Thread::thread_id(vt);
diff --git a/src/hotspot/share/prims/jvmtiThreadState.hpp b/src/hotspot/share/prims/jvmtiThreadState.hpp
index 89d4107e216..17bdae4662e 100644
--- a/src/hotspot/share/prims/jvmtiThreadState.hpp
+++ b/src/hotspot/share/prims/jvmtiThreadState.hpp
@@ -27,6 +27,7 @@
#include "jvmtifiles/jvmti.h"
#include "memory/allocation.hpp"
+#include "oops/instanceKlass.hpp"
#include "oops/oopHandle.hpp"
#include "prims/jvmtiEventController.hpp"
#include "prims/jvmtiExport.hpp"
@@ -167,6 +168,7 @@ class JvmtiVTSuspender : AllStatic {
public:
static void register_all_vthreads_suspend();
static void register_all_vthreads_resume();
+ static void register_vthread_suspend(int64_t id);
static void register_vthread_suspend(oop vt);
static void register_vthread_resume(oop vt);
static bool is_vthread_suspended(oop vt);
@@ -208,7 +210,7 @@ class JvmtiThreadState : public CHeapObj {
// Used to send class being redefined/retransformed and kind of transform
// info to the class file load hook event handler.
- Klass* _class_being_redefined;
+ InstanceKlass* _class_being_redefined;
JvmtiClassLoadKind _class_load_kind;
GrowableArray* _classes_being_redefined;
@@ -371,7 +373,7 @@ class JvmtiThreadState : public CHeapObj {
// when class file load hook event is posted.
// It is set while loading redefined class and cleared before the
// class file load hook event is posted.
- inline void set_class_being_redefined(Klass* k, JvmtiClassLoadKind kind) {
+ inline void set_class_being_redefined(InstanceKlass* k, JvmtiClassLoadKind kind) {
_class_being_redefined = k;
_class_load_kind = kind;
}
@@ -381,7 +383,7 @@ class JvmtiThreadState : public CHeapObj {
_class_load_kind = jvmti_class_load_kind_load;
}
- inline Klass* get_class_being_redefined() {
+ inline InstanceKlass* get_class_being_redefined() {
return _class_being_redefined;
}
@@ -420,12 +422,12 @@ class JvmtiThreadState : public CHeapObj