Merge branch 'master' into JDK-8370863-mask-cast-opt

2026-01-28 03:58:21 +00:00 · 2025-12-11 11:27:06 +00:00 · 2025-12-11 11:27:06 +00:00 · d05bd365e5
commit d05bd365e5
parent aa9a08a9fe 6a6ff876c5
583 changed files with 22392 additions and 8181 deletions
--- a/.github/workflows/build-alpine-linux.yml
+++ b/.github/workflows/build-alpine-linux.yml
@ -59,7 +59,7 @@ on:
 jobs:
  build-linux:
    name: build
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
    container:
      image: alpine:3.20

--- a/.github/workflows/build-cross-compile.yml
+++ b/.github/workflows/build-cross-compile.yml
@ -48,7 +48,7 @@ on:
 jobs:
  build-cross-compile:
    name: build
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04

    strategy:
      fail-fast: false
--- a/.github/workflows/build-linux.yml
+++ b/.github/workflows/build-linux.yml
@ -75,7 +75,7 @@ on:
 jobs:
  build-linux:
    name: build
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04

    strategy:
      fail-fast: false
@ -115,9 +115,21 @@ jobs:
          if [[ '${{ inputs.apt-architecture }}' != '' ]]; then
            sudo dpkg --add-architecture ${{ inputs.apt-architecture }}
          fi
-          sudo apt-get update
-          sudo apt-get install --only-upgrade apt
-          sudo apt-get install gcc-${{ inputs.gcc-major-version }}${{ inputs.gcc-package-suffix }} g++-${{ inputs.gcc-major-version }}${{ inputs.gcc-package-suffix }} libxrandr-dev${{ steps.arch.outputs.suffix }} libxtst-dev${{ steps.arch.outputs.suffix }} libcups2-dev${{ steps.arch.outputs.suffix }} libasound2-dev${{ steps.arch.outputs.suffix }} ${{ inputs.apt-extra-packages }}
+          sudo apt update
+          sudo apt install --only-upgrade apt
+          sudo apt install \
+            gcc-${{ inputs.gcc-major-version }}${{ inputs.gcc-package-suffix }} \
+            g++-${{ inputs.gcc-major-version }}${{ inputs.gcc-package-suffix }} \
+            libasound2-dev${{ steps.arch.outputs.suffix }} \
+            libcups2-dev${{ steps.arch.outputs.suffix }} \
+            libfontconfig1-dev${{ steps.arch.outputs.suffix }} \
+            libx11-dev${{ steps.arch.outputs.suffix }} \
+            libxext-dev${{ steps.arch.outputs.suffix }} \
+            libxrandr-dev${{ steps.arch.outputs.suffix }} \
+            libxrender-dev${{ steps.arch.outputs.suffix }} \
+            libxt-dev${{ steps.arch.outputs.suffix }} \
+            libxtst-dev${{ steps.arch.outputs.suffix }} \
+            ${{ inputs.apt-extra-packages }}
          sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${{ inputs.gcc-major-version }} 100 --slave /usr/bin/g++ g++ /usr/bin/g++-${{ inputs.gcc-major-version }}

      - name: 'Configure'
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -57,7 +57,7 @@ jobs:

  prepare:
    name: 'Prepare the run'
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
    env:
      # List of platforms to exclude by default
      EXCLUDED_PLATFORMS: 'alpine-linux-x64'
@ -405,7 +405,7 @@ jobs:
    with:
      platform: linux-x64
      bootjdk-platform: linux-x64
-      runs-on: ubuntu-22.04
+      runs-on: ubuntu-24.04
      dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
      debug-suffix: -debug

@ -419,7 +419,7 @@ jobs:
    with:
      platform: linux-x64
      bootjdk-platform: linux-x64
-      runs-on: ubuntu-22.04
+      runs-on: ubuntu-24.04
      dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
      static-suffix: "-static"

--- a/.jcheck/conf
+++ b/.jcheck/conf
@ -1,7 +1,7 @@
 [general]
 project=jdk
 jbs=JDK
-version=26
+version=27

 [checks]
 error=author,committer,reviewers,merge,issues,executable,symlink,message,hg-tag,whitespace,problemlists,copyright
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # Welcome to the JDK!

 For build instructions please see the
-[online documentation](https://openjdk.org/groups/build/doc/building.html),
+[online documentation](https://git.openjdk.org/jdk/blob/master/doc/building.md),
 or either of these files:

 - [doc/building.html](doc/building.html) (html version)
--- a/doc/building.html
+++ b/doc/building.html
@ -541,6 +541,11 @@ href="#apple-xcode">Apple Xcode</a> on some strategies to deal with
 this.</p>
 <p>It is recommended that you use at least macOS 14 and Xcode 15.4, but
 earlier versions may also work.</p>
+<p>Starting with Xcode 26, introduced in macOS 26, the Metal toolchain
+no longer comes bundled with Xcode, so it needs to be installed
+separately. This can either be done via the Xcode's Settings/Components
+UI, or in the command line calling
+<code>xcodebuild -downloadComponent metalToolchain</code>.</p>
 <p>The standard macOS environment contains the basic tooling needed to
 build, but for external libraries a package manager is recommended. The
 JDK uses <a href="https://brew.sh/">homebrew</a> in the examples, but
--- a/doc/building.md
+++ b/doc/building.md
@ -352,6 +352,11 @@ on some strategies to deal with this.
 It is recommended that you use at least macOS 14 and Xcode 15.4, but
 earlier versions may also work.

+Starting with Xcode 26, introduced in macOS 26, the Metal toolchain no longer
+comes bundled with Xcode, so it needs to be installed separately. This can
+either be done via the Xcode's Settings/Components UI, or in the command line
+calling `xcodebuild -downloadComponent metalToolchain`.
+
 The standard macOS environment contains the basic tooling needed to build, but
 for external libraries a package manager is recommended. The JDK uses
 [homebrew](https://brew.sh/) in the examples, but feel free to use whatever
--- a/doc/hotspot-style.html
+++ b/doc/hotspot-style.html
@ -1037,8 +1037,8 @@ running destructors at exit can lead to problems.</p>
 <p>Some of the approaches used in HotSpot to avoid dynamic
 initialization include:</p>
 <ul>
-<li><p>Use the <code>Deferred&lt;T&gt;</code> class template. Add a call
-to its initialization function at an appropriate place during VM
+<li><p>Use the <code>DeferredStatic&lt;T&gt;</code> class template. Add
+a call to its initialization function at an appropriate place during VM
 initialization. The underlying object is never destroyed.</p></li>
 <li><p>For objects of class type, use a variable whose value is a
 pointer to the class, initialized to <code>nullptr</code>. Provide an
--- a/doc/hotspot-style.md
+++ b/doc/hotspot-style.md
@ -954,7 +954,7 @@ destructors at exit can lead to problems.
 Some of the approaches used in HotSpot to avoid dynamic initialization
 include:

-* Use the `Deferred<T>` class template. Add a call to its initialization
+* Use the `DeferredStatic<T>` class template. Add a call to its initialization
 function at an appropriate place during VM initialization. The underlying
 object is never destroyed.

--- a/doc/starting-next-release.html
+++ b/doc/starting-next-release.html
@ -119,6 +119,9 @@ cover the new source version</li>
 and
 <code>test/langtools/tools/javac/preview/classReaderTest/Client.preview.out</code>:
 update expected messages for preview errors and warnings</li>
+<li><code>test/langtools/tools/javac/versions/Versions.java</code>: add
+new source version to the set of valid sources and add new enum constant
+for the new class file version.</li>
 </ul>
 </body>
 </html>
--- a/make/Bundles.gmk
+++ b/make/Bundles.gmk
@ -125,13 +125,6 @@ define SetupBundleFileBody
 	    && $(TAR) cf - -$(TAR_INCLUDE_PARAM) $$($1_$$d_LIST_FILE) \
 	        $(TAR_IGNORE_EXIT_VALUE) ) \
 	    | ( $(CD) $(SUPPORT_OUTPUTDIR)/bundles/$1/$$($1_SUBDIR) && $(TAR) xf - )$$(NEWLINE) )
-          # Rename stripped pdb files
-          ifeq ($(call isTargetOs, windows)+$(SHIP_DEBUG_SYMBOLS), true+public)
-	    for f in `$(FIND) $(SUPPORT_OUTPUTDIR)/bundles/$1/$$($1_SUBDIR) -name "*.stripped.pdb"`; do \
-	      $(ECHO) Renaming $$$${f} to $$$${f%stripped.pdb}pdb $(LOG_INFO); \
-	      $(MV) $$$${f} $$$${f%stripped.pdb}pdb; \
-	    done
-          endif
          # Unzip any zipped debuginfo files
          ifeq ($$($1_UNZIP_DEBUGINFO), true)
 	    for f in `$(FIND) $(SUPPORT_OUTPUTDIR)/bundles/$1/$$($1_SUBDIR) -name "*.diz"`; do \
@ -222,14 +215,6 @@ ifneq ($(filter product-bundles% legacy-bundles, $(MAKECMDGOALS)), )
  ifeq ($(call isTargetOs, windows), true)
    ifeq ($(SHIP_DEBUG_SYMBOLS), )
      JDK_SYMBOLS_EXCLUDE_PATTERN := %.pdb
-    else
-      ifeq ($(SHIP_DEBUG_SYMBOLS), public)
-        JDK_SYMBOLS_EXCLUDE_PATTERN := \
-            $(filter-out \
-                %.stripped.pdb, \
-                $(filter %.pdb, $(ALL_JDK_FILES)) \
-            )
-      endif
    endif
  endif

@ -244,10 +229,7 @@ ifneq ($(filter product-bundles% legacy-bundles, $(MAKECMDGOALS)), )
      )

  JDK_SYMBOLS_BUNDLE_FILES := \
-      $(filter-out \
-          %.stripped.pdb, \
-          $(call FindFiles, $(SYMBOLS_IMAGE_DIR)) \
-      )
+      $(call FindFiles, $(SYMBOLS_IMAGE_DIR))

  TEST_DEMOS_BUNDLE_FILES := $(filter $(JDK_DEMOS_IMAGE_HOMEDIR)/demo/%, \
      $(ALL_JDK_DEMOS_FILES))
@ -267,14 +249,6 @@ ifneq ($(filter product-bundles% legacy-bundles, $(MAKECMDGOALS)), )
  ifeq ($(call isTargetOs, windows), true)
    ifeq ($(SHIP_DEBUG_SYMBOLS), )
      JRE_SYMBOLS_EXCLUDE_PATTERN := %.pdb
-    else
-      ifeq ($(SHIP_DEBUG_SYMBOLS), public)
-        JRE_SYMBOLS_EXCLUDE_PATTERN := \
-            $(filter-out \
-                %.stripped.pdb, \
-                $(filter %.pdb, $(ALL_JRE_FILES)) \
-            )
-      endif
    endif
  endif

--- a/make/Images.gmk
+++ b/make/Images.gmk
@ -282,29 +282,33 @@ else
 endif
 CMDS_TARGET_SUBDIR := bin

-# Param 1 - either JDK or JRE
+# Copy debug info files into symbols bundle.
+# In case of Windows and --with-external-symbols-in-bundles=public, take care to remove *.stripped.pdb files
 SetupCopyDebuginfo = \
    $(foreach m, $(ALL_$1_MODULES), \
+      $(eval dbgfiles := $(call FindDebuginfoFiles, $(SUPPORT_OUTPUTDIR)/modules_libs/$m)) \
+      $(eval dbgfiles := $(if $(filter true+public,$(call isTargetOs,windows)+$(SHIP_DEBUG_SYMBOLS)), \
+        $(filter-out %.stripped.pdb,$(dbgfiles)),$(dbgfiles)) \
+      ) \
      $(eval $(call SetupCopyFiles, COPY_$1_LIBS_DEBUGINFO_$m, \
          SRC := $(SUPPORT_OUTPUTDIR)/modules_libs/$m, \
          DEST := $($1_IMAGE_DIR)/$(LIBS_TARGET_SUBDIR), \
-          FILES := $(call FindDebuginfoFiles, \
-              $(SUPPORT_OUTPUTDIR)/modules_libs/$m), \
+          FILES := $(dbgfiles), \
      )) \
      $(eval $1_TARGETS += $$(COPY_$1_LIBS_DEBUGINFO_$m)) \
+      $(eval dbgfiles := $(call FindDebuginfoFiles, $(SUPPORT_OUTPUTDIR)/modules_cmds/$m)) \
+      $(eval dbgfiles := $(if $(filter true+public,$(call isTargetOs,windows)+$(SHIP_DEBUG_SYMBOLS)), \
+        $(filter-out %.stripped.pdb,$(dbgfiles)),$(dbgfiles)) \
+      ) \
      $(eval $(call SetupCopyFiles, COPY_$1_CMDS_DEBUGINFO_$m, \
          SRC := $(SUPPORT_OUTPUTDIR)/modules_cmds/$m, \
          DEST := $($1_IMAGE_DIR)/$(CMDS_TARGET_SUBDIR), \
-          FILES := $(call FindDebuginfoFiles, \
-              $(SUPPORT_OUTPUTDIR)/modules_cmds/$m), \
+          FILES := $(dbgfiles), \
      )) \
      $(eval $1_TARGETS += $$(COPY_$1_CMDS_DEBUGINFO_$m)) \
    )

-# No space before argument to avoid having to put $(strip ) everywhere in
-# implementation above.
-$(call SetupCopyDebuginfo,JDK)
-$(call SetupCopyDebuginfo,JRE)
+# No space before argument to avoid having to put $(strip ) everywhere in implementation above.
 $(call SetupCopyDebuginfo,SYMBOLS)

 ################################################################################
--- a/make/RunTests.gmk
+++ b/make/RunTests.gmk
@ -873,7 +873,7 @@ define SetupRunJtregTestBody
    $1_JTREG_BASIC_OPTIONS += -testThreadFactoryPath:$$(JTREG_TEST_THREAD_FACTORY_JAR)
    $1_JTREG_BASIC_OPTIONS += -testThreadFactory:$$(JTREG_TEST_THREAD_FACTORY)
    $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
-	$$(addprefix $$($1_TEST_ROOT)/, ProblemList-$$(JTREG_TEST_THREAD_FACTORY).txt) \
+        $$(addprefix $$($1_TEST_ROOT)/, ProblemList-$$(JTREG_TEST_THREAD_FACTORY).txt) \
    ))
  endif

@ -881,8 +881,8 @@ define SetupRunJtregTestBody
    AGENT := $$(LIBRARY_PREFIX)JvmtiStressAgent$$(SHARED_LIBRARY_SUFFIX)=$$(JTREG_JVMTI_STRESS_AGENT)
    $1_JTREG_BASIC_OPTIONS += -javaoption:'-agentpath:$(TEST_IMAGE_DIR)/hotspot/jtreg/native/$$(AGENT)'
    $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
-	    $$(addprefix $$($1_TEST_ROOT)/, ProblemList-jvmti-stress-agent.txt) \
-        ))
+        $$(addprefix $$($1_TEST_ROOT)/, ProblemList-jvmti-stress-agent.txt) \
+    ))
  endif


@ -1092,7 +1092,7 @@ define SetupRunJtregTestBody
 	$$(call MakeDir, $$($1_TEST_RESULTS_DIR) $$($1_TEST_SUPPORT_DIR) \
 	    $$($1_TEST_TMP_DIR))
 	$$(call ExecuteWithLog, $$($1_TEST_SUPPORT_DIR)/jtreg, \
-            $$(COV_ENVIRONMENT) $$($1_COMMAND_LINE) \
+	    $$(COV_ENVIRONMENT) $$($1_COMMAND_LINE) \
 	)

  $1_RESULT_FILE := $$($1_TEST_RESULTS_DIR)/text/stats.txt
@ -1102,11 +1102,11 @@ define SetupRunJtregTestBody
 	$$(call LogWarn, Test report is stored in $$(strip \
 	    $$(subst $$(TOPDIR)/, , $$($1_TEST_RESULTS_DIR))))

-	# Read jtreg documentation to learn on the test stats categories:
-	# https://github.com/openjdk/jtreg/blob/master/src/share/doc/javatest/regtest/faq.md#what-do-all-those-numbers-in-the-test-results-line-mean
-	# In jtreg, "skipped:" category accounts for tests that threw jtreg.SkippedException at runtime.
-	# At the same time these tests contribute to "passed:" tests.
-	# In here we don't want that and so we substract number of "skipped:" from "passed:".
+        # Read jtreg documentation to learn on the test stats categories:
+        # https://github.com/openjdk/jtreg/blob/master/src/share/doc/javatest/regtest/faq.md#what-do-all-those-numbers-in-the-test-results-line-mean
+        # In jtreg, "skipped:" category accounts for tests that threw jtreg.SkippedException at runtime.
+        # At the same time these tests contribute to "passed:" tests.
+        # In here we don't want that and so we substract number of "skipped:" from "passed:".

 	$$(if $$(wildcard $$($1_RESULT_FILE)), \
 	  $$(eval $1_PASSED_AND_RUNTIME_SKIPPED := $$(shell $$(AWK) '{ gsub(/[,;]/, ""); \
--- a/make/autoconf/basic.m4
+++ b/make/autoconf/basic.m4
@ -353,7 +353,12 @@ AC_DEFUN_ONCE([BASIC_SETUP_DEVKIT],
      [set up toolchain on Mac OS using a path to an Xcode installation])])

  UTIL_DEPRECATED_ARG_WITH(sys-root)
-  UTIL_DEPRECATED_ARG_WITH(tools-dir)
+
+  AC_ARG_WITH([tools-dir], [AS_HELP_STRING([--with-tools-dir],
+      [Point to a nonstandard Visual Studio installation location on Windows by
+      specifying any existing directory 2 or 3 levels below the installation
+      root.])]
+  )

  if test "x$with_xcode_path" != x; then
    if test "x$OPENJDK_BUILD_OS" = "xmacosx"; then
--- a/make/autoconf/flags-ldflags.m4
+++ b/make/autoconf/flags-ldflags.m4
@ -34,7 +34,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS],
  FLAGS_SETUP_LDFLAGS_CPU_DEP([TARGET])

  # Setup the build toolchain
-  FLAGS_SETUP_LDFLAGS_CPU_DEP([BUILD], [OPENJDK_BUILD_])
+  FLAGS_SETUP_LDFLAGS_CPU_DEP([BUILD], [OPENJDK_BUILD_], [BUILD_])

  AC_SUBST(ADLC_LDFLAGS)
 ])
@ -52,11 +52,6 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
    # add --no-as-needed to disable default --as-needed link flag on some GCC toolchains
    # add --icf=all (Identical Code Folding — merges identical functions)
    BASIC_LDFLAGS="-Wl,-z,defs -Wl,-z,relro -Wl,-z,now -Wl,--no-as-needed -Wl,--exclude-libs,ALL"
-    if test "x$LINKER_TYPE" = "xgold"; then
-      if test x$DEBUG_LEVEL = xrelease; then
-        BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,--icf=all"
-      fi
-    fi

    # Linux : remove unused code+data in link step
    if test "x$ENABLE_LINKTIME_GC" = xtrue; then
@ -108,6 +103,9 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],

  # Setup OS-dependent LDFLAGS
  if test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang; then
+    if test x$DEBUG_LEVEL = xrelease; then
+      BASIC_LDFLAGS_JDK_ONLY="$BASIC_LDFLAGS_JDK_ONLY -Wl,-dead_strip"
+    fi
    # FIXME: We should really generalize SetSharedLibraryOrigin instead.
    OS_LDFLAGS_JVM_ONLY="-Wl,-rpath,@loader_path/. -Wl,-rpath,@loader_path/.."
    OS_LDFLAGS="-mmacosx-version-min=$MACOSX_VERSION_MIN -Wl,-reproducible"
@ -166,7 +164,8 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
 ################################################################################
 # $1 - Either BUILD or TARGET to pick the correct OS/CPU variables to check
 #      conditionals against.
-# $2 - Optional prefix for each variable defined.
+# $2 - Optional prefix for each variable defined (OPENJDK_BUILD_ or nothing).
+# $3 - Optional prefix for toolchain variables (BUILD_ or nothing).
 AC_DEFUN([FLAGS_SETUP_LDFLAGS_CPU_DEP],
 [
  # Setup CPU-dependent basic LDFLAGS. These can differ between the target and
@ -200,6 +199,12 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_CPU_DEP],
    fi
  fi

+  if test "x${$3LD_TYPE}" = "xgold"; then
+    if test x$DEBUG_LEVEL = xrelease; then
+      $1_CPU_LDFLAGS="${$1_CPU_LDFLAGS} -Wl,--icf=all"
+    fi
+  fi
+
  # Export variables according to old definitions, prefix with $2 if present.
  LDFLAGS_JDK_COMMON="$BASIC_LDFLAGS $BASIC_LDFLAGS_JDK_ONLY \
      $OS_LDFLAGS $DEBUGLEVEL_LDFLAGS_JDK_ONLY ${$2EXTRA_LDFLAGS}"
--- a/make/autoconf/toolchain.m4
+++ b/make/autoconf/toolchain.m4
@ -516,7 +516,7 @@ AC_DEFUN([TOOLCHAIN_EXTRACT_LD_VERSION],
    if [ [[ "$LINKER_VERSION_STRING" == *gold* ]] ]; then
      [ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
          $SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*) .*/\1/'` ]
-      LINKER_TYPE=gold
+      $1_TYPE=gold
    else
      [ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
          $SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*/\1/'` ]
--- a/make/common/native/Flags.gmk
+++ b/make/common/native/Flags.gmk
@ -229,6 +229,14 @@ define SetupLinkerFlags
  # TOOLCHAIN_TYPE plus OPENJDK_TARGET_OS
  ifeq ($$($1_LINK_TIME_OPTIMIZATION), true)
    $1_EXTRA_LDFLAGS += $(LDFLAGS_LTO)
+    # Instruct the ld64 linker not to delete the temporary object file
+    # generated during Link Time Optimization
+    ifeq ($(call isTargetOs, macosx), true)
+      $1_EXTRA_LDFLAGS += -Wl,-object_path_lto,$$($1_OBJECT_DIR)/$$($1_NAME)_lto_helper.o
+    endif
+    ifeq ($(TOOLCHAIN_TYPE), microsoft)
+      $1_EXTRA_LDFLAGS += -LTCGOUT:$$($1_OBJECT_DIR)/$$($1_NAME).iobj
+    endif
  endif

  $1_EXTRA_LDFLAGS += $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS)) \
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@ -1192,8 +1192,8 @@ var getJibProfilesDependencies = function (input, common) {
            server: "jpg",
            product: "jcov",
            version: "3.0",
-            build_number: "3",
-            file: "bundles/jcov-3.0+3.zip",
+            build_number: "5",
+            file: "bundles/jcov-3.0+5.zip",
            environment_name: "JCOV_HOME",
        },

--- a/make/conf/version-numbers.conf
+++ b/make/conf/version-numbers.conf
@ -26,17 +26,17 @@
 # Default version, product, and vendor information to use,
 # unless overridden by configure

-DEFAULT_VERSION_FEATURE=26
+DEFAULT_VERSION_FEATURE=27
 DEFAULT_VERSION_INTERIM=0
 DEFAULT_VERSION_UPDATE=0
 DEFAULT_VERSION_PATCH=0
 DEFAULT_VERSION_EXTRA1=0
 DEFAULT_VERSION_EXTRA2=0
 DEFAULT_VERSION_EXTRA3=0
-DEFAULT_VERSION_DATE=2026-03-17
-DEFAULT_VERSION_CLASSFILE_MAJOR=70  # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
+DEFAULT_VERSION_DATE=2026-09-15
+DEFAULT_VERSION_CLASSFILE_MAJOR=71  # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
 DEFAULT_VERSION_CLASSFILE_MINOR=0
 DEFAULT_VERSION_DOCS_API_SINCE=11
-DEFAULT_ACCEPTABLE_BOOT_VERSIONS="25 26"
-DEFAULT_JDK_SOURCE_TARGET_VERSION=26
+DEFAULT_ACCEPTABLE_BOOT_VERSIONS="25 26 27"
+DEFAULT_JDK_SOURCE_TARGET_VERSION=27
 DEFAULT_PROMOTED_VERSION_PRE=ea
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@ -151,6 +151,12 @@ JVM_STRIPFLAGS ?= $(STRIPFLAGS)
 # This source set is reused so save in cache.
 $(call FillFindCache, $(JVM_SRC_DIRS))

+ifeq ($(SHIP_DEBUG_SYMBOLS), full)
+  CFLAGS_SHIP_DEBUGINFO := -DSHIP_DEBUGINFO_FULL
+else ifeq ($(SHIP_DEBUG_SYMBOLS), public)
+  CFLAGS_SHIP_DEBUGINFO := -DSHIP_DEBUGINFO_PUBLIC
+endif
+
 ifeq ($(call isTargetOs, windows), true)
  ifeq ($(STATIC_LIBS), true)
    WIN_EXPORT_FILE := $(JVM_OUTPUTDIR)/static-win-exports.def
@ -158,10 +164,6 @@ ifeq ($(call isTargetOs, windows), true)
    WIN_EXPORT_FILE := $(JVM_OUTPUTDIR)/win-exports.def
  endif

-  ifeq ($(SHIP_DEBUG_SYMBOLS), public)
-    CFLAGS_STRIPPED_DEBUGINFO := -DHAS_STRIPPED_DEBUGINFO
-  endif
-
  JVM_LDFLAGS += -def:$(WIN_EXPORT_FILE)
 endif

@ -187,7 +189,7 @@ $(eval $(call SetupJdkLibrary, BUILD_LIBJVM, \
    CFLAGS := $(JVM_CFLAGS), \
    abstract_vm_version.cpp_CXXFLAGS := $(CFLAGS_VM_VERSION), \
    arguments.cpp_CXXFLAGS := $(CFLAGS_VM_VERSION), \
-    whitebox.cpp_CXXFLAGS := $(CFLAGS_STRIPPED_DEBUGINFO), \
+    whitebox.cpp_CXXFLAGS := $(CFLAGS_SHIP_DEBUGINFO), \
    DISABLED_WARNINGS_gcc := $(DISABLED_WARNINGS_gcc), \
    DISABLED_WARNINGS_gcc_ad_$(HOTSPOT_TARGET_CPU_ARCH).cpp := nonnull, \
    DISABLED_WARNINGS_gcc_bytecodeInterpreter.cpp := unused-label, \
--- a/make/modules/java.desktop/lib/ClientLibraries.gmk
+++ b/make/modules/java.desktop/lib/ClientLibraries.gmk
@ -164,6 +164,24 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)

  ifeq ($(USE_EXTERNAL_LIBPNG), false)
    LIBSPLASHSCREEN_HEADER_DIRS += libsplashscreen/libpng
+    LIBSPLASHSCREEN_CFLAGS += -DPNG_NO_MMX_CODE -DPNG_ARM_NEON_OPT=0
+      -DPNG_ARM_NEON_IMPLEMENTATION=0 -DPNG_LOONGARCH_LSX_OPT=0
+
+    ifeq ($(call isTargetOs, linux)+$(call isTargetCpuArch, ppc), true+true)
+      LIBSPLASHSCREEN_CFLAGS += -DPNG_POWERPC_VSX_OPT=0
+    endif
+
+    # The libpng bundled with jdk is a reduced version which does not
+    # contain .png_init_filter_functions_vsx.
+    # Therefore we need to disable PNG_POWERPC_VSX_OPT explicitly by setting
+    # it to 0. If this define is not set, it would be automatically set to 2,
+    # because
+    #   "#if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)"
+    # expands to true. This would results in the fact that
+    # .png_init_filter_functions_vsx is needed in libpng.
+    ifeq ($(call isTargetOs, aix), true)
+      LIBSPLASHSCREEN_CFLAGS += -DPNG_POWERPC_VSX_OPT=0
+    endif
  else
    LIBSPLASHSCREEN_EXCLUDES += libpng
  endif
@ -176,25 +194,7 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
    LIBSPLASHSCREEN_STATIC_LIB_EXCLUDE_OBJS += $(LIBZIP_OBJS)
  endif

-  LIBSPLASHSCREEN_CFLAGS += -DSPLASHSCREEN -DPNG_NO_MMX_CODE \
-      -DPNG_ARM_NEON_OPT=0 -DPNG_ARM_NEON_IMPLEMENTATION=0 \
-      -DPNG_LOONGARCH_LSX_OPT=0
-
-  ifeq ($(call isTargetOs, linux)+$(call isTargetCpuArch, ppc), true+true)
-    LIBSPLASHSCREEN_CFLAGS += -DPNG_POWERPC_VSX_OPT=0
-  endif
-
-  # The external libpng submitted in the jdk is a reduced version
-  # which does not contain .png_init_filter_functions_vsx.
-  # Therefore we need to disable PNG_POWERPC_VSX_OPT explicitly by setting
-  # it to 0. If this define is not set, it would be automatically set to 2,
-  # because
-  #   "#if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)"
-  # expands to true. This would results in the fact that
-  # .png_init_filter_functions_vsx is needed in libpng.
-  ifeq ($(call isTargetOs, aix), true)
-    LIBSPLASHSCREEN_CFLAGS += -DPNG_POWERPC_VSX_OPT=0
-  endif
+  LIBSPLASHSCREEN_CFLAGS += -DSPLASHSCREEN

  ifeq ($(call isTargetOs, macosx), true)
    # libsplashscreen on macosx does not use the unix code
@ -226,7 +226,6 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
      EXCLUDE_FILES := imageioJPEG.c jpegdecoder.c pngtest.c, \
      EXCLUDES := $(LIBSPLASHSCREEN_EXCLUDES), \
      OPTIMIZATION := SIZE, \
-      LINK_TIME_OPTIMIZATION := true, \
      CFLAGS := $(LIBSPLASHSCREEN_CFLAGS) \
          $(GIFLIB_CFLAGS) $(LIBJPEG_CFLAGS) $(PNG_CFLAGS) $(LIBZ_CFLAGS) \
          $(ICONV_CFLAGS), \
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -2003,6 +2003,9 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r

  if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
    uint ireg = ideal_reg();
+    DEBUG_ONLY(int algm = MIN2(RegMask::num_registers(ireg), (int)Matcher::stack_alignment_in_slots()) * VMRegImpl::stack_slot_size);
+    assert((src_lo_rc != rc_stack) || is_aligned(src_offset, algm), "unaligned vector spill sp offset %d (src)", src_offset);
+    assert((dst_lo_rc != rc_stack) || is_aligned(dst_offset, algm), "unaligned vector spill sp offset %d (dst)", dst_offset);
    if (ireg == Op_VecA && masm) {
      int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
--- a/src/hotspot/cpu/aarch64/aarch64_atomic.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_atomic.ad
@ -695,7 +695,7 @@ instruct getAndSetP(indirect mem, iRegP newval, iRegPNoSp oldval) %{
 instruct getAndSetIAcq(indirect mem, iRegI newval, iRegINoSp oldval) %{
  predicate(needs_acquiring_load_exclusive(n));
  match(Set oldval (GetAndSetI mem newval));
-  ins_cost(2*VOLATILE_REF_COST);
+  ins_cost(VOLATILE_REF_COST);
  format %{ "atomic_xchgw_acq  $oldval, $newval, [$mem]" %}
  ins_encode %{
    __ atomic_xchgalw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
@ -706,7 +706,7 @@ instruct getAndSetIAcq(indirect mem, iRegI newval, iRegINoSp oldval) %{
 instruct getAndSetLAcq(indirect mem, iRegL newval, iRegLNoSp oldval) %{
  predicate(needs_acquiring_load_exclusive(n));
  match(Set oldval (GetAndSetL mem newval));
-  ins_cost(2*VOLATILE_REF_COST);
+  ins_cost(VOLATILE_REF_COST);
  format %{ "atomic_xchg_acq  $oldval, $newval, [$mem]" %}
  ins_encode %{
    __ atomic_xchgal($oldval$$Register, $newval$$Register, as_Register($mem$$base));
@ -717,7 +717,7 @@ instruct getAndSetLAcq(indirect mem, iRegL newval, iRegLNoSp oldval) %{
 instruct getAndSetNAcq(indirect mem, iRegN newval, iRegNNoSp oldval) %{
  predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
  match(Set oldval (GetAndSetN mem newval));
-  ins_cost(2*VOLATILE_REF_COST);
+  ins_cost(VOLATILE_REF_COST);
  format %{ "atomic_xchgw_acq  $oldval, $newval, [$mem]" %}
  ins_encode %{
    __ atomic_xchgalw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
@ -728,7 +728,7 @@ instruct getAndSetNAcq(indirect mem, iRegN newval, iRegNNoSp oldval) %{
 instruct getAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp oldval) %{
  predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
  match(Set oldval (GetAndSetP mem newval));
-  ins_cost(2*VOLATILE_REF_COST);
+  ins_cost(VOLATILE_REF_COST);
  format %{ "atomic_xchg_acq  $oldval, $newval, [$mem]" %}
  ins_encode %{
    __ atomic_xchgal($oldval$$Register, $newval$$Register, as_Register($mem$$base));
--- a/src/hotspot/cpu/aarch64/aarch64_atomic_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_atomic_ad.m4
@ -187,7 +187,7 @@ ifelse($1$3,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_Lo
       $3,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
       `dnl')
  match(Set oldval (GetAndSet$1 mem newval));
-  ins_cost(`'ifelse($4,Acq,,2*)VOLATILE_REF_COST);
+  ins_cost(`'ifelse($3,Acq,,2*)VOLATILE_REF_COST);
  format %{ "atomic_xchg$2`'ifelse($3,Acq,_acq)  $oldval, $newval, [$mem]" %}
  ins_encode %{
    __ atomic_xchg`'ifelse($3,Acq,al)$2($oldval$$Register, $newval$$Register, as_Register($mem$$base));
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@ -346,8 +346,14 @@ source %{
  }

  bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
-    // Only SVE has partial vector operations
-    if (UseSVE == 0) {
+    // 1. Only SVE requires partial vector operations.
+    // 2. The vector size in bytes must be smaller than MaxVectorSize.
+    // 3. Predicated vectors have a mask input, which guarantees that
+    //    out-of-bounds lanes remain inactive.
+    int length_in_bytes = vt->length_in_bytes();
+    if (UseSVE == 0 ||
+        length_in_bytes == MaxVectorSize ||
+        node->is_predicated_vector()) {
      return false;
    }

@ -370,21 +376,22 @@ source %{
        return !node->in(1)->is_Con();
      case Op_LoadVector:
      case Op_StoreVector:
-        // We use NEON load/store instructions if the vector length is <= 128 bits.
-        return vt->length_in_bytes() > 16;
      case Op_AddReductionVI:
      case Op_AddReductionVL:
-        // We may prefer using NEON instructions rather than SVE partial operations.
-        return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+        // For these ops, we prefer using NEON instructions rather than SVE
+        // predicated instructions for better performance.
+        return !VM_Version::use_neon_for_vector(length_in_bytes);
      case Op_MinReductionV:
      case Op_MaxReductionV:
-        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
-        // instructions rather than SVE partial operations.
+        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
+        // instructions rather than SVE predicated instructions for
+        // better performance.
        return vt->element_basic_type() == T_LONG ||
-               !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+               !VM_Version::use_neon_for_vector(length_in_bytes);
      default:
-        // For other ops whose vector size is smaller than the max vector size, a
-        // full-sized unpredicated operation does not impact the final vector result.
+        // For other ops whose vector size is smaller than the max vector
+        // size, a full-sized unpredicated operation does not impact the
+        // vector result.
        return false;
    }
  }
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@ -336,8 +336,14 @@ source %{
  }

  bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
-    // Only SVE has partial vector operations
-    if (UseSVE == 0) {
+    // 1. Only SVE requires partial vector operations.
+    // 2. The vector size in bytes must be smaller than MaxVectorSize.
+    // 3. Predicated vectors have a mask input, which guarantees that
+    //    out-of-bounds lanes remain inactive.
+    int length_in_bytes = vt->length_in_bytes();
+    if (UseSVE == 0 ||
+        length_in_bytes == MaxVectorSize ||
+        node->is_predicated_vector()) {
      return false;
    }

@ -360,21 +366,22 @@ source %{
        return !node->in(1)->is_Con();
      case Op_LoadVector:
      case Op_StoreVector:
-        // We use NEON load/store instructions if the vector length is <= 128 bits.
-        return vt->length_in_bytes() > 16;
      case Op_AddReductionVI:
      case Op_AddReductionVL:
-        // We may prefer using NEON instructions rather than SVE partial operations.
-        return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+        // For these ops, we prefer using NEON instructions rather than SVE
+        // predicated instructions for better performance.
+        return !VM_Version::use_neon_for_vector(length_in_bytes);
      case Op_MinReductionV:
      case Op_MaxReductionV:
-        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
-        // instructions rather than SVE partial operations.
+        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
+        // instructions rather than SVE predicated instructions for
+        // better performance.
        return vt->element_basic_type() == T_LONG ||
-               !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+               !VM_Version::use_neon_for_vector(length_in_bytes);
      default:
-        // For other ops whose vector size is smaller than the max vector size, a
-        // full-sized unpredicated operation does not impact the final vector result.
+        // For other ops whose vector size is smaller than the max vector
+        // size, a full-sized unpredicated operation does not impact the
+        // vector result.
        return false;
    }
  }
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@ -5379,7 +5379,6 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  assert (UseCompressedClassPointers, "should only be used for compressed headers");
  assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
  int index = oop_recorder()->find_index(k);
-  assert(! Universe::heap()->is_in(k), "should not be an oop");

  InstructionMark im(this);
  RelocationHolder rspec = metadata_Relocation::spec(index);
--- a/src/hotspot/cpu/aarch64/relocInfo_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/relocInfo_aarch64.cpp
@ -85,7 +85,7 @@ void Relocation::pd_set_call_destination(address x) {
  } else {
    MacroAssembler::pd_patch_instruction(addr(), x);
  }
-  assert(pd_call_destination(addr()) == x, "fail in reloc");
+  guarantee(pd_call_destination(addr()) == x, "fail in reloc");
 }

 void trampoline_stub_Relocation::pd_fix_owner_after_move() {
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@ -1795,10 +1795,13 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
    return size;            // Self copy, no move.

  if (bottom_type()->isa_vect() != nullptr && ideal_reg() == Op_VecX) {
+    int src_offset = ra_->reg2offset(src_lo);
+    int dst_offset = ra_->reg2offset(dst_lo);
+    DEBUG_ONLY(int algm = MIN2(RegMask::num_registers(ideal_reg()), (int)Matcher::stack_alignment_in_slots()) * VMRegImpl::stack_slot_size);
+    assert((src_lo_rc != rc_stack) || is_aligned(src_offset, algm), "unaligned vector spill sp offset %d (src)", src_offset);
+    assert((dst_lo_rc != rc_stack) || is_aligned(dst_offset, algm), "unaligned vector spill sp offset %d (dst)", dst_offset);
    // Memory->Memory Spill.
    if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
-      int src_offset = ra_->reg2offset(src_lo);
-      int dst_offset = ra_->reg2offset(dst_lo);
      if (masm) {
        __ ld(R0, src_offset, R1_SP);
        __ std(R0, dst_offset, R1_SP);
@ -1806,26 +1809,20 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
        __ std(R0, dst_offset+8, R1_SP);
      }
      size += 16;
+#ifndef PRODUCT
+      if (st != nullptr) {
+        st->print("%-7s [R1_SP + #%d] -> [R1_SP + #%d] \t// vector spill copy", "SPILL", src_offset, dst_offset);
+      }
+#endif // !PRODUCT
    }
    // VectorRegister->Memory Spill.
    else if (src_lo_rc == rc_vec && dst_lo_rc == rc_stack) {
      VectorSRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]).to_vsr();
-      int dst_offset = ra_->reg2offset(dst_lo);
      if (PowerArchitecturePPC64 >= 9) {
-        if (is_aligned(dst_offset, 16)) {
-          if (masm) {
-            __ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16_Power9
-          }
-          size += 4;
-        } else {
-          // Other alignment can be used by Vector API (VectorPayload in rearrangeOp,
-          // observed with VectorRearrangeTest.java on Power9).
-          if (masm) {
-            __ addi(R0, R1_SP, dst_offset);
-            __ stxvx(Rsrc, R0); // matches storeV16_Power9 (regarding element ordering)
-          }
-          size += 8;
+        if (masm) {
+          __ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16_Power9
        }
+        size += 4;
      } else {
        if (masm) {
          __ addi(R0, R1_SP, dst_offset);
@ -1833,24 +1830,25 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
        }
        size += 8;
      }
+#ifndef PRODUCT
+      if (st != nullptr) {
+        if (PowerArchitecturePPC64 >= 9) {
+          st->print("%-7s %s, [R1_SP + #%d] \t// vector spill copy", "STXV", Matcher::regName[src_lo], dst_offset);
+        } else {
+          st->print("%-7s R0, R1_SP, %d \t// vector spill copy\n\t"
+                    "%-7s %s, [R0] \t// vector spill copy", "ADDI", dst_offset, "STXVD2X", Matcher::regName[src_lo]);
+        }
+      }
+#endif // !PRODUCT
    }
    // Memory->VectorRegister Spill.
    else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vec) {
      VectorSRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]).to_vsr();
-      int src_offset = ra_->reg2offset(src_lo);
      if (PowerArchitecturePPC64 >= 9) {
-        if (is_aligned(src_offset, 16)) {
-          if (masm) {
-            __ lxv(Rdst, src_offset, R1_SP);
-          }
-          size += 4;
-        } else {
-          if (masm) {
-            __ addi(R0, R1_SP, src_offset);
-            __ lxvx(Rdst, R0);
-          }
-          size += 8;
+        if (masm) {
+          __ lxv(Rdst, src_offset, R1_SP);
        }
+        size += 4;
      } else {
        if (masm) {
          __ addi(R0, R1_SP, src_offset);
@ -1858,6 +1856,16 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
        }
        size += 8;
      }
+#ifndef PRODUCT
+      if (st != nullptr) {
+        if (PowerArchitecturePPC64 >= 9) {
+          st->print("%-7s %s, [R1_SP + #%d] \t// vector spill copy", "LXV", Matcher::regName[dst_lo], src_offset);
+        } else {
+          st->print("%-7s R0, R1_SP, %d \t// vector spill copy\n\t"
+                    "%-7s %s, [R0] \t// vector spill copy", "ADDI", src_offset, "LXVD2X", Matcher::regName[dst_lo]);
+        }
+      }
+#endif // !PRODUCT
    }
    // VectorRegister->VectorRegister.
    else if (src_lo_rc == rc_vec && dst_lo_rc == rc_vec) {
@ -1867,6 +1875,12 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
        __ xxlor(Rdst, Rsrc, Rsrc);
      }
      size += 4;
+#ifndef PRODUCT
+      if (st != nullptr) {
+        st->print("%-7s %s, %s, %s\t// vector spill copy",
+                  "XXLOR", Matcher::regName[dst_lo], Matcher::regName[src_lo], Matcher::regName[src_lo]);
+      }
+#endif // !PRODUCT
    }
    else {
      ShouldNotReachHere(); // No VR spill.
@ -6321,8 +6335,36 @@ instruct loadConD_Ex(regD dst, immD src) %{
 // Prefetch instructions.
 // Must be safe to execute with invalid address (cannot fault).

+// Special prefetch versions which use the dcbz instruction.
+instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
+  match(PrefetchAllocation (AddP mem src));
+  predicate(AllocatePrefetchStyle == 3);
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
+  size(4);
+  ins_encode %{
+    __ dcbz($src$$Register, $mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
+  match(PrefetchAllocation mem);
+  predicate(AllocatePrefetchStyle == 3);
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
+  size(4);
+  ins_encode %{
+    __ dcbz($mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
  match(PrefetchAllocation (AddP mem src));
+  predicate(AllocatePrefetchStyle != 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
@ -6335,6 +6377,7 @@ instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{

 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
  match(PrefetchAllocation mem);
+  predicate(AllocatePrefetchStyle != 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@ -2067,6 +2067,83 @@ void C2_MacroAssembler::enc_cmove_cmp_fp(int cmpFlag, FloatRegister op1, FloatRe
  }
 }

+void C2_MacroAssembler::enc_cmove_fp_cmp(int cmpFlag, Register op1, Register op2,
+                        FloatRegister dst, FloatRegister src, bool is_single) {
+  bool is_unsigned = (cmpFlag & unsigned_branch_mask) == unsigned_branch_mask;
+  int op_select = cmpFlag & (~unsigned_branch_mask);
+
+  switch (op_select) {
+    case BoolTest::eq:
+      cmov_fp_eq(op1, op2, dst, src, is_single);
+      break;
+    case BoolTest::ne:
+      cmov_fp_ne(op1, op2, dst, src, is_single);
+      break;
+    case BoolTest::le:
+      if (is_unsigned) {
+        cmov_fp_leu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_le(op1, op2, dst, src, is_single);
+      }
+      break;
+    case BoolTest::ge:
+      if (is_unsigned) {
+        cmov_fp_geu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_ge(op1, op2, dst, src, is_single);
+      }
+      break;
+    case BoolTest::lt:
+      if (is_unsigned) {
+        cmov_fp_ltu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_lt(op1, op2, dst, src, is_single);
+      }
+      break;
+    case BoolTest::gt:
+      if (is_unsigned) {
+        cmov_fp_gtu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_gt(op1, op2, dst, src, is_single);
+      }
+      break;
+    default:
+      assert(false, "unsupported compare condition");
+      ShouldNotReachHere();
+  }
+}
+
+void C2_MacroAssembler::enc_cmove_fp_cmp_fp(int cmpFlag,
+                           FloatRegister op1, FloatRegister op2,
+                           FloatRegister dst, FloatRegister src,
+                           bool cmp_single, bool cmov_single) {
+  int op_select = cmpFlag & (~unsigned_branch_mask);
+
+  switch (op_select) {
+    case BoolTest::eq:
+      cmov_fp_cmp_fp_eq(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::ne:
+      cmov_fp_cmp_fp_ne(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::le:
+      cmov_fp_cmp_fp_le(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::ge:
+      cmov_fp_cmp_fp_ge(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::lt:
+      cmov_fp_cmp_fp_lt(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::gt:
+      cmov_fp_cmp_fp_gt(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    default:
+      assert(false, "unsupported compare condition");
+      ShouldNotReachHere();
+  }
+}
+
 // Set dst to NaN if any NaN input.
 void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
                                  FLOAT_TYPE ft, bool is_min) {
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@ -132,6 +132,13 @@
                        FloatRegister op1, FloatRegister op2,
                        Register dst, Register src, bool is_single);

+  void enc_cmove_fp_cmp(int cmpFlag, Register op1, Register op2,
+                        FloatRegister dst, FloatRegister src, bool is_single);
+
+  void enc_cmove_fp_cmp_fp(int cmpFlag, FloatRegister op1, FloatRegister op2,
+                           FloatRegister dst, FloatRegister src,
+                           bool cmp_single, bool cmov_single);
+
  void spill(Register r, bool is64, int offset) {
    is64 ? sd(r, Address(sp, offset))
         : sw(r, Address(sp, offset));
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@ -1233,7 +1233,119 @@ void MacroAssembler::cmov_gtu(Register cmp1, Register cmp2, Register dst, Regist
  bind(no_set);
 }

-// ----------- cmove, compare float -----------
+// ----------- cmove float/double -----------
+
+void MacroAssembler::cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bne(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  beq(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bgt(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bgtu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  blt(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bltu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bge(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bgeu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  ble(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bleu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// ----------- cmove, compare float/double -----------
 //
 // For CmpF/D + CMoveI/L, ordered ones are quite straight and simple,
 // so, just list behaviour of unordered ones as follow.
@ -1391,6 +1503,148 @@ void MacroAssembler::cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Regi
  bind(no_set);
 }

+// ----------- cmove float/double, compare float/double -----------
+
+// Move src to dst only if cmp1 == cmp2,
+// otherwise leave dst unchanged, including the case where one of them is NaN.
+// Clarification:
+//   java code      :  cmp1 != cmp2 ? dst : src
+//   transformed to :  CMove dst, (cmp1 eq cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 != cmp2, including the case of NaN
+    // not jump (i.e. move src to dst) if cmp1 == cmp2
+    float_bne(cmp1, cmp2, no_set);
+  } else {
+    double_bne(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// Keep dst unchanged only if cmp1 == cmp2,
+// otherwise move src to dst, including the case where one of them is NaN.
+// Clarification:
+//   java code      :  cmp1 == cmp2 ? dst : src
+//   transformed to :  CMove dst, (cmp1 ne cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 == cmp2
+    // not jump (i.e. move src to dst) if cmp1 != cmp2, including the case of NaN
+    float_beq(cmp1, cmp2, no_set);
+  } else {
+    double_beq(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// When cmp1 <= cmp2 or any of them is NaN then dst = src, otherwise, dst = dst
+// Clarification
+//   scenario 1:
+//     java code      :  cmp2 < cmp1 ? dst : src
+//     transformed to :  CMove dst, (cmp1 le cmp2), dst, src
+//   scenario 2:
+//     java code      :  cmp1 > cmp2 ? dst : src
+//     transformed to :  CMove dst, (cmp1 le cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 > cmp2
+    // not jump (i.e. move src to dst) if cmp1 <= cmp2 or either is NaN
+    float_bgt(cmp1, cmp2, no_set);
+  } else {
+    double_bgt(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 < cmp2 or either is NaN
+    // not jump (i.e. move src to dst) if cmp1 >= cmp2
+    float_blt(cmp1, cmp2, no_set, false, true);
+  } else {
+    double_blt(cmp1, cmp2, no_set, false, true);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// When cmp1 < cmp2 or any of them is NaN then dst = src, otherwise, dst = dst
+// Clarification
+//   scenario 1:
+//     java code      :  cmp2 <= cmp1 ? dst : src
+//     transformed to :  CMove dst, (cmp1 lt cmp2), dst, src
+//   scenario 2:
+//     java code      :  cmp1 >= cmp2 ? dst : src
+//     transformed to :  CMove dst, (cmp1 lt cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 >= cmp2
+    // not jump (i.e. move src to dst) if cmp1 < cmp2 or either is NaN
+    float_bge(cmp1, cmp2, no_set);
+  } else {
+    double_bge(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 <= cmp2 or either is NaN
+    // not jump (i.e. move src to dst) if cmp1 > cmp2
+    float_ble(cmp1, cmp2, no_set, false, true);
+  } else {
+    double_ble(cmp1, cmp2, no_set, false, true);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
 // Float compare branch instructions

 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                    \
@ -4933,7 +5187,6 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  assert (UseCompressedClassPointers, "should only be used for compressed headers");
  assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
  int index = oop_recorder()->find_index(k);
-  assert(!Universe::heap()->is_in(k), "should not be an oop");

  narrowKlass nk = CompressedKlassPointers::encode(k);
  relocate(metadata_Relocation::spec(index), [&] {
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@ -665,6 +665,24 @@ class MacroAssembler: public Assembler {
  void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
  void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);

+  void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+
+  void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+
 public:
  // We try to follow risc-v asm menomics.
  // But as we don't layout a reachable GOT,
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@ -1924,8 +1924,6 @@ bool Matcher::match_rule_supported(int opcode) {
    case Op_SubHF:
      return UseZfh;

-    case Op_CMoveF:
-    case Op_CMoveD:
    case Op_CMoveP:
    case Op_CMoveN:
      return false;
@ -10466,6 +10464,286 @@ instruct cmovL_cmpP(iRegLNoSp dst, iRegL src, iRegP op1, iRegP op2, cmpOpU cop)
  ins_pipe(pipe_class_compare);
 %}

+// --------- CMoveF ---------
+
+instruct cmovF_cmpI(fRegF dst, fRegF src, iRegI op1, iRegI op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpI op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpI\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpU(fRegF dst, fRegF src, iRegI op1, iRegI op2, cmpOpU cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpU op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpU\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpL(fRegF dst, fRegF src, iRegL op1, iRegL op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpUL(fRegF dst, fRegF src, iRegL op1, iRegL op2, cmpOpU cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpUL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpF(fRegF dst, fRegF src, fRegF op1, fRegF op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpF op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpF\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    true /* cmp_single */, true /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpD(fRegF dst, fRegF src, fRegD op1, fRegD op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpD op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpD\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    false /* cmp_single */, true /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpN(fRegF dst, fRegF src, iRegN op1, iRegN op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpN op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpN\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpP(fRegF dst, fRegF src, iRegP op1, iRegP op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpP op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpP\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+// --------- CMoveD ---------
+
+instruct cmovD_cmpI(fRegD dst, fRegD src, iRegI op1, iRegI op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpI op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpI\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpU(fRegD dst, fRegD src, iRegI op1, iRegI op2, cmpOpU cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpU op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpU\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpL(fRegD dst, fRegD src, iRegL op1, iRegL op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpUL(fRegD dst, fRegD src, iRegL op1, iRegL op2, cmpOpU cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpUL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpF(fRegD dst, fRegD src, fRegF op1, fRegF op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpF op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpF\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    true /* cmp_single */, false /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpD(fRegD dst, fRegD src, fRegD op1, fRegD op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpD op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpD\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    false /* cmp_single */, false /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpN(fRegD dst, fRegD src, iRegN op1, iRegN op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpN op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpN\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpP(fRegD dst, fRegD src, iRegP op1, iRegP op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpP op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpP\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
 // ============================================================================
 // Procedure Call/Return Instructions

--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@ -2493,8 +2493,8 @@ class StubGenerator: public StubCodeGenerator {
    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
    __ vle32_v(res, from);

-    __ mv(t2, 52);
-    __ blt(keylen, t2, L_aes128);
+    __ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ bltu(keylen, t2, L_aes128);
    __ beq(keylen, t2, L_aes192);
    // Else we fallthrough to the biggest case (256-bit key size)

@ -2572,8 +2572,8 @@ class StubGenerator: public StubCodeGenerator {
    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
    __ vle32_v(res, from);

-    __ mv(t2, 52);
-    __ blt(keylen, t2, L_aes128);
+    __ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ bltu(keylen, t2, L_aes128);
    __ beq(keylen, t2, L_aes192);
    // Else we fallthrough to the biggest case (256-bit key size)

@ -2606,6 +2606,401 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  void cipherBlockChaining_encryptAESCrypt(int round, Register from, Register to, Register key,
+                                           Register rvec, Register input_len) {
+    const Register len = x29;
+
+    VectorRegister working_vregs[] = {
+      v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15
+    };
+
+    const unsigned int BLOCK_SIZE = 16;
+
+    __ mv(len, input_len);
+    // load init rvec
+    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
+    __ vle32_v(v16, rvec);
+
+    generate_aes_loadkeys(key, working_vregs, round);
+    Label L_enc_loop;
+    __ bind(L_enc_loop);
+    // Encrypt from source by block size
+      __ vle32_v(v17, from);
+      __ addi(from, from, BLOCK_SIZE);
+      __ vxor_vv(v16, v16, v17);
+      generate_aes_encrypt(v16, working_vregs, round);
+      __ vse32_v(v16, to);
+      __ addi(to, to, BLOCK_SIZE);
+      __ subi(len, len, BLOCK_SIZE);
+      __ bnez(len, L_enc_loop);
+
+    // save current rvec and return
+    __ vse32_v(v16, rvec);
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  // Output:
+  //   x10       - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAESIntrinsics, "Must be");
+    assert(UseZvkn, "need AES instructions (Zvkned extension) support");
+    __ align(CodeEntryAlignment);
+    StubId stub_id = StubId::stubgen_cipherBlockChaining_encryptAESCrypt_id;
+    StubCodeMark mark(this, stub_id);
+
+    const Register from       = c_rarg0;
+    const Register to         = c_rarg1;
+    const Register key        = c_rarg2;
+    const Register rvec       = c_rarg3;
+    const Register input_len  = c_rarg4;
+
+    const Register keylen     = x28;
+
+    address start = __ pc();
+    __ enter();
+
+    Label L_aes128, L_aes192;
+    // Compute #rounds for AES based on the length of the key array
+    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ mv(t0, 52);
+    __ bltu(keylen, t0, L_aes128);
+    __ beq(keylen, t0, L_aes192);
+    // Else we fallthrough to the biggest case (256-bit key size)
+
+    // Note: the following function performs key += 15*16
+    cipherBlockChaining_encryptAESCrypt(15, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 11*16
+    __ bind(L_aes128);
+    cipherBlockChaining_encryptAESCrypt(11, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 13*16
+    __ bind(L_aes192);
+    cipherBlockChaining_encryptAESCrypt(13, from, to, key, rvec, input_len);
+
+    return start;
+  }
+
+  void cipherBlockChaining_decryptAESCrypt(int round, Register from, Register to, Register key,
+                                           Register rvec, Register input_len) {
+    const Register len = x29;
+
+    VectorRegister working_vregs[] = {
+      v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15
+    };
+
+    const unsigned int BLOCK_SIZE = 16;
+
+    __ mv(len, input_len);
+    // load init rvec
+    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
+    __ vle32_v(v16, rvec);
+
+    generate_aes_loadkeys(key, working_vregs, round);
+    Label L_dec_loop;
+    // Decrypt from source by block size
+    __ bind(L_dec_loop);
+      __ vle32_v(v17, from);
+      __ addi(from, from, BLOCK_SIZE);
+      __ vmv_v_v(v18, v17);
+      generate_aes_decrypt(v17, working_vregs, round);
+      __ vxor_vv(v17, v17, v16);
+      __ vse32_v(v17, to);
+      __ vmv_v_v(v16, v18);
+      __ addi(to, to, BLOCK_SIZE);
+      __ subi(len, len, BLOCK_SIZE);
+      __ bnez(len, L_dec_loop);
+
+    // save current rvec and return
+    __ vse32_v(v16, rvec);
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  // Output:
+  //   x10       - input length
+  //
+  address generate_cipherBlockChaining_decryptAESCrypt() {
+    assert(UseAESIntrinsics, "Must be");
+    assert(UseZvkn, "need AES instructions (Zvkned extension) support");
+    __ align(CodeEntryAlignment);
+    StubId stub_id = StubId::stubgen_cipherBlockChaining_decryptAESCrypt_id;
+    StubCodeMark mark(this, stub_id);
+
+    const Register from        = c_rarg0;
+    const Register to          = c_rarg1;
+    const Register key         = c_rarg2;
+    const Register rvec        = c_rarg3;
+    const Register input_len   = c_rarg4;
+
+    const Register keylen      = x28;
+
+    address start = __ pc();
+    __ enter();
+
+    Label L_aes128, L_aes192, L_aes128_loop, L_aes192_loop, L_aes256_loop;
+    // Compute #rounds for AES based on the length of the key array
+    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ mv(t0, 52);
+    __ bltu(keylen, t0, L_aes128);
+    __ beq(keylen, t0, L_aes192);
+    // Else we fallthrough to the biggest case (256-bit key size)
+
+    // Note: the following function performs key += 15*16
+    cipherBlockChaining_decryptAESCrypt(15, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 11*16
+    __ bind(L_aes128);
+    cipherBlockChaining_decryptAESCrypt(11, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 13*16
+    __ bind(L_aes192);
+    cipherBlockChaining_decryptAESCrypt(13, from, to, key, rvec, input_len);
+
+    return start;
+  }
+
+  // Load big-endian 128-bit from memory.
+  void be_load_counter_128(Register counter_hi, Register counter_lo, Register counter) {
+    __ ld(counter_lo, Address(counter, 8)); // Load 128-bits from counter
+    __ ld(counter_hi, Address(counter));
+    __ rev8(counter_lo, counter_lo);        // Convert big-endian to little-endian
+    __ rev8(counter_hi, counter_hi);
+  }
+
+  // Little-endian 128-bit + 64-bit -> 128-bit addition.
+  void add_counter_128(Register counter_hi, Register counter_lo) {
+    assert_different_registers(counter_hi, counter_lo, t0);
+    __ addi(counter_lo, counter_lo, 1);
+    __ seqz(t0, counter_lo);                // Check for result overflow
+    __ add(counter_hi, counter_hi, t0);     // Add 1 if overflow otherwise 0
+  }
+
+  // Store big-endian 128-bit to memory.
+  void be_store_counter_128(Register counter_hi, Register counter_lo, Register counter) {
+    assert_different_registers(counter_hi, counter_lo, t0, t1);
+    __ rev8(t0, counter_lo);                // Convert little-endian to big-endian
+    __ rev8(t1, counter_hi);
+    __ sd(t0, Address(counter, 8));         // Store 128-bits to counter
+    __ sd(t1, Address(counter));
+  }
+
+  void counterMode_AESCrypt(int round, Register in, Register out, Register key, Register counter,
+                            Register input_len,  Register saved_encrypted_ctr, Register used_ptr) {
+    // Algorithm:
+    //
+    //   generate_aes_loadkeys();
+    //   load_counter_128(counter_hi, counter_lo, counter);
+    //
+    //   L_next:
+    //     if (used >= BLOCK_SIZE) goto L_main_loop;
+    //
+    //   L_encrypt_next:
+    //       *out = *in ^ saved_encrypted_ctr[used]);
+    //       out++; in++; used++; len--;
+    //       if (len == 0) goto L_exit;
+    //       goto L_next;
+    //
+    //   L_main_loop:
+    //     if (len == 0) goto L_exit;
+    //     saved_encrypted_ctr = generate_aes_encrypt(counter);
+    //
+    //     add_counter_128(counter_hi, counter_lo);
+    //     be_store_counter_128(counter_hi, counter_lo, counter);
+    //     used = 0;
+    //
+    //     if(len < BLOCK_SIZE) goto L_encrypt_next;
+    //
+    //     v_in = load_16Byte(in);
+    //     v_out = load_16Byte(out);
+    //     v_saved_encrypted_ctr = load_16Byte(saved_encrypted_ctr);
+    //     v_out = v_in ^ v_saved_encrypted_ctr;
+    //     out += BLOCK_SIZE;
+    //     in += BLOCK_SIZE;
+    //     len -= BLOCK_SIZE;
+    //     used = BLOCK_SIZE;
+    //     goto L_main_loop;
+    //
+    //
+    //   L_exit:
+    //     store(used);
+    //     result = input_len
+    //     return result;
+
+    const Register used          = x28;
+    const Register len           = x29;
+    const Register counter_hi    = x30;
+    const Register counter_lo    = x31;
+    const Register block_size    = t2;
+
+    const unsigned int BLOCK_SIZE = 16;
+
+    VectorRegister working_vregs[] = {
+      v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15
+    };
+
+    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
+
+    __ lwu(used, Address(used_ptr));
+    __ mv(len, input_len);
+    __ mv(block_size, BLOCK_SIZE);
+
+    // load keys to working_vregs according to round
+    generate_aes_loadkeys(key, working_vregs, round);
+
+    // 128-bit big-endian load
+    be_load_counter_128(counter_hi, counter_lo, counter);
+
+    Label L_next, L_encrypt_next, L_main_loop, L_exit;
+    // Check the last saved_encrypted_ctr used value, we fall through
+    // to L_encrypt_next when the used value lower than block_size
+    __ bind(L_next);
+    __ bgeu(used, block_size, L_main_loop);
+
+    // There is still data left fewer than block_size after L_main_loop
+    // or last used, we encrypt them one by one.
+    __ bind(L_encrypt_next);
+    __ add(t0, saved_encrypted_ctr, used);
+    __ lbu(t1, Address(t0));
+    __ lbu(t0, Address(in));
+    __ xorr(t1, t1, t0);
+    __ sb(t1, Address(out));
+    __ addi(in, in, 1);
+    __ addi(out, out, 1);
+    __ addi(used, used, 1);
+    __ subi(len, len, 1);
+    __ beqz(len, L_exit);
+    __ j(L_next);
+
+    // We will calculate the next saved_encrypted_ctr and encrypt the blocks of data
+    // one by one until there is less than a full block remaining if len not zero
+    __ bind(L_main_loop);
+    __ beqz(len, L_exit);
+    __ vle32_v(v16, counter);
+
+    // encrypt counter according to round
+    generate_aes_encrypt(v16, working_vregs, round);
+
+    __ vse32_v(v16, saved_encrypted_ctr);
+
+    // 128-bit little-endian increment
+    add_counter_128(counter_hi, counter_lo);
+    // 128-bit big-endian store
+    be_store_counter_128(counter_hi, counter_lo, counter);
+
+    __ mv(used, 0);
+    // Check if we have a full block_size
+    __ bltu(len, block_size, L_encrypt_next);
+
+    // We have one full block to encrypt at least
+    __ vle32_v(v17, in);
+    __ vxor_vv(v16, v16, v17);
+    __ vse32_v(v16, out);
+    __ add(out, out, block_size);
+    __ add(in, in, block_size);
+    __ sub(len, len, block_size);
+    __ mv(used, block_size);
+    __ j(L_main_loop);
+
+    __ bind(L_exit);
+    __ sw(used, Address(used_ptr));
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+  };
+
+  // CTR AES crypt.
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - counter vector byte array address
+  //   c_rarg4   - input length
+  //   c_rarg5   - saved encryptedCounter start
+  //   c_rarg6   - saved used length
+  //
+  // Output:
+  //   x10       - input length
+  //
+  address generate_counterMode_AESCrypt() {
+    assert(UseAESCTRIntrinsics, "Must be");
+    assert(UseZvkn, "need AES instructions (Zvkned extension) support");
+    assert(UseZbb, "need basic bit manipulation (Zbb extension) support");
+
+    __ align(CodeEntryAlignment);
+    StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
+    StubCodeMark mark(this, stub_id);
+
+    const Register in                  = c_rarg0;
+    const Register out                 = c_rarg1;
+    const Register key                 = c_rarg2;
+    const Register counter             = c_rarg3;
+    const Register input_len           = c_rarg4;
+    const Register saved_encrypted_ctr = c_rarg5;
+    const Register used_len_ptr        = c_rarg6;
+
+    const Register keylen              = c_rarg7; // temporary register
+
+    const address start = __ pc();
+    __ enter();
+
+    Label L_exit;
+    __ beqz(input_len, L_exit);
+
+    Label L_aes128, L_aes192;
+    // Compute #rounds for AES based on the length of the key array
+    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ mv(t0, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ bltu(keylen, t0, L_aes128);
+    __ beq(keylen, t0, L_aes192);
+    // Else we fallthrough to the biggest case (256-bit key size)
+
+    // Note: the following function performs crypt with key += 15*16
+    counterMode_AESCrypt(15, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
+
+    // Note: the following function performs crypt with key += 13*16
+    __ bind(L_aes192);
+    counterMode_AESCrypt(13, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
+
+    // Note: the following function performs crypt with key += 11*16
+    __ bind(L_aes128);
+    counterMode_AESCrypt(11, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
+
+    __ bind(L_exit);
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+
+    return start;
+  }
+
  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
  void compare_string_8_x_LU(Register tmpL, Register tmpU,
                             Register strL, Register strU, Label& DIFF) {
@ -6824,6 +7219,12 @@ static const int64_t right_3_bits = right_n_bits(3);
    if (UseAESIntrinsics) {
      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+    }
+
+    if (UseAESCTRIntrinsics) {
+      StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
    }

    if (UsePoly1305Intrinsics) {
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@ -434,6 +434,15 @@ void VM_Version::c2_initialize() {
      warning("UseAESIntrinsics enabled, but UseAES not, enabling");
      UseAES = true;
    }
+
+    if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics) && UseZbb) {
+      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
+    }
+
+    if (UseAESCTRIntrinsics && !UseZbb) {
+      warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZbb support.");
+      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+    }
  } else {
    if (UseAES) {
      warning("AES instructions are not available on this CPU");
@ -443,11 +452,10 @@ void VM_Version::c2_initialize() {
      warning("AES intrinsics are not available on this CPU");
      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
    }
-  }
-
-  if (UseAESCTRIntrinsics) {
-    warning("AES/CTR intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+    if (UseAESCTRIntrinsics) {
+      warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZvkn support.");
+      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+    }
  }
 }

--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@ -1715,6 +1715,8 @@ bool Matcher::match_rule_supported(int opcode) {
  switch (opcode) {
    case Op_ReverseBytesI:
    case Op_ReverseBytesL:
+    case Op_ReverseBytesS:
+    case Op_ReverseBytesUS:
      return UseByteReverseInstruction;
    case Op_PopCountI:
    case Op_PopCountL:
@ -11615,6 +11617,38 @@ instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{

 // Byte reverse

+instruct bytes_reverse_short(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesS src));
+  predicate(UseByteReverseInstruction);
+  ins_cost(2 * DEFAULT_COST);
+  size(8);
+
+  format %{ "LRVR   $dst, $src\n\t # byte reverse int"
+            "SRA    $dst, 0x0010\t # right shift by 16, sign extended" %}
+
+  ins_encode %{
+    __ z_lrvr($dst$$Register, $src$$Register);
+    __ z_sra($dst$$Register, 0x0010);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesUS src));
+  predicate(UseByteReverseInstruction);
+  ins_cost(2 * DEFAULT_COST);
+  size(8);
+
+  format %{ "LRVR   $dst, $src\n\t # byte reverse int"
+            "SRL    $dst, 0x0010\t # right shift by 16, zero extended" %}
+
+  ins_encode %{
+    __ z_lrvr($dst$$Register, $src$$Register);
+    __ z_srl($dst$$Register, 0x0010);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
  match(Set dst (ReverseBytesI src));
  predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
--- a/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
+++ b/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
@ -73,7 +73,7 @@
                                       do_arch_blob,                    \
                                       do_arch_entry,                   \
                                       do_arch_entry_init)              \
-  do_arch_blob(compiler, 109000 WINDOWS_ONLY(+2000))                    \
+  do_arch_blob(compiler, 120000 WINDOWS_ONLY(+2000))                    \
  do_stub(compiler, vector_float_sign_mask)                             \
  do_arch_entry(x86, compiler, vector_float_sign_mask,                  \
                vector_float_sign_mask, vector_float_sign_mask)         \
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@ -3386,6 +3386,11 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
        return false;
      }
      break;
+    case Op_VectorBlend:
+      if (UseAVX == 0 && size_in_bits < 128) {
+        return false;
+      }
+      break;
    case Op_VectorTest:
      if (UseSSE < 4) {
        return false; // Implementation limitation
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@ -2333,8 +2333,8 @@ int os::open(const char *path, int oflag, int mode) {

    if (ret != -1) {
      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
        ::close(fd);
+        errno = EISDIR;
        return -1;
      }
    } else {
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@ -2277,8 +2277,8 @@ int os::open(const char *path, int oflag, int mode) {

    if (ret != -1) {
      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
        ::close(fd);
+        errno = EISDIR;
        return -1;
      }
    } else {
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@ -4305,7 +4305,7 @@ OSReturn os::get_native_priority(const Thread* const thread,
 // For reference, please, see IEEE Std 1003.1-2004:
 //   http://www.unix.org/single_unix_specification

-jlong os::Linux::total_thread_cpu_time(clockid_t clockid) {
+jlong os::Linux::thread_cpu_time(clockid_t clockid) {
  struct timespec tp;
  int status = clock_gettime(clockid, &tp);
  assert(status == 0, "clock_gettime error: %s", os::strerror(errno));
@ -4932,8 +4932,8 @@ int os::open(const char *path, int oflag, int mode) {

    if (ret != -1) {
      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
        ::close(fd);
+        errno = EISDIR;
        return -1;
      }
    } else {
@ -4960,20 +4960,42 @@ int os::open(const char *path, int oflag, int mode) {
  return fd;
 }

+// Since kernel v2.6.12 the Linux ABI has had support for encoding the clock
+// types in the last three bits. Bit 2 indicates whether a cpu clock refers to a
+// thread or a process. Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or
+// FD=3. The clock CPUCLOCK_VIRT (0b001) reports the thread's consumed user
+// time. POSIX compliant implementations of pthread_getcpuclockid return the
+// clock CPUCLOCK_SCHED (0b010) which reports the thread's consumed system+user
+// time (as mandated by the POSIX standard POSIX.1-2024/IEEE Std 1003.1-2024
+// §3.90).
+static bool get_thread_clockid(Thread* thread, clockid_t* clockid, bool total) {
+  constexpr clockid_t CLOCK_TYPE_MASK = 3;
+  constexpr clockid_t CPUCLOCK_VIRT = 1;
+
+  int rc = pthread_getcpuclockid(thread->osthread()->pthread_id(), clockid);
+  if (rc != 0) {
+    // It's possible to encounter a terminated native thread that failed
+    // to detach itself from the VM - which should result in ESRCH.
+    assert_status(rc == ESRCH, rc, "pthread_getcpuclockid failed");
+    return false;
+  }
+
+  if (!total) {
+    clockid_t clockid_tmp = *clockid;
+    clockid_tmp = (clockid_tmp & ~CLOCK_TYPE_MASK) | CPUCLOCK_VIRT;
+    *clockid = clockid_tmp;
+  }
+
+  return true;
+}
+
 static jlong user_thread_cpu_time(Thread *thread);

 static jlong total_thread_cpu_time(Thread *thread) {
-    clockid_t clockid;
-    int rc = pthread_getcpuclockid(thread->osthread()->pthread_id(),
-                                              &clockid);
-    if (rc == 0) {
-      return os::Linux::total_thread_cpu_time(clockid);
-    } else {
-      // It's possible to encounter a terminated native thread that failed
-      // to detach itself from the VM - which should result in ESRCH.
-      assert_status(rc == ESRCH, rc, "pthread_getcpuclockid failed");
-      return -1;
-    }
+  clockid_t clockid;
+  bool success = get_thread_clockid(thread, &clockid, true);
+
+  return success ? os::Linux::thread_cpu_time(clockid) : -1;
 }

 // current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
@ -4984,7 +5006,7 @@ static jlong total_thread_cpu_time(Thread *thread) {
 // the fast estimate available on the platform.

 jlong os::current_thread_cpu_time() {
-  return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
+  return os::Linux::thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
 }

 jlong os::thread_cpu_time(Thread* thread) {
@ -4993,7 +5015,7 @@ jlong os::thread_cpu_time(Thread* thread) {

 jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
  if (user_sys_cpu_time) {
-    return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
+    return os::Linux::thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
  } else {
    return user_thread_cpu_time(Thread::current());
  }
@ -5007,46 +5029,11 @@ jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
  }
 }

-//  -1 on error.
 static jlong user_thread_cpu_time(Thread *thread) {
-  pid_t  tid = thread->osthread()->thread_id();
-  char *s;
-  char stat[2048];
-  size_t statlen;
-  char proc_name[64];
-  int count;
-  long sys_time, user_time;
-  char cdummy;
-  int idummy;
-  long ldummy;
-  FILE *fp;
+  clockid_t clockid;
+  bool success = get_thread_clockid(thread, &clockid, false);

-  os::snprintf_checked(proc_name, 64, "/proc/self/task/%d/stat", tid);
-  fp = os::fopen(proc_name, "r");
-  if (fp == nullptr) return -1;
-  statlen = fread(stat, 1, 2047, fp);
-  stat[statlen] = '\0';
-  fclose(fp);
-
-  // Skip pid and the command string. Note that we could be dealing with
-  // weird command names, e.g. user could decide to rename java launcher
-  // to "java 1.4.2 :)", then the stat file would look like
-  //                1234 (java 1.4.2 :)) R ... ...
-  // We don't really need to know the command string, just find the last
-  // occurrence of ")" and then start parsing from there. See bug 4726580.
-  s = strrchr(stat, ')');
-  if (s == nullptr) return -1;
-
-  // Skip blank chars
-  do { s++; } while (s && isspace((unsigned char) *s));
-
-  count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
-                 &cdummy, &idummy, &idummy, &idummy, &idummy, &idummy,
-                 &ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
-                 &user_time, &sys_time);
-  if (count != 13) return -1;
-
-  return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
+  return success ? os::Linux::thread_cpu_time(clockid) : -1;
 }

 void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
--- a/src/hotspot/os/linux/os_linux.hpp
+++ b/src/hotspot/os/linux/os_linux.hpp
@ -142,7 +142,7 @@ class os::Linux {
  static bool manually_expand_stack(JavaThread * t, address addr);
  static void expand_stack_to(address bottom);

-  static jlong total_thread_cpu_time(clockid_t clockid);
+  static jlong thread_cpu_time(clockid_t clockid);

  static jlong sendfile(int out_fd, int in_fd, jlong* offset, jlong count);

--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@ -1028,6 +1028,7 @@ char* os::realpath(const char* filename, char* outbuf, size_t outbuflen) {
    } else {
      errno = ENAMETOOLONG;
    }
+    ErrnoPreserver ep;
    permit_forbidden_function::free(p); // *not* os::free
  } else {
    // Fallback for platforms struggling with modern Posix standards (AIX 5.3, 6.1). If realpath
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@ -1645,7 +1645,7 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {

  // Save and restore errno to avoid confusing native code with EINTR
  // after sigsuspend.
-  int old_errno = errno;
+  ErrnoPreserver ep;

  PosixSignals::unblock_error_signals();

@ -1727,7 +1727,6 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
    // ignore
  }

-  errno = old_errno;
 }

 static int SR_initialize() {
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@ -4782,8 +4782,8 @@ int os::stat(const char *path, struct stat *sbuf) {
    path_to_target = get_path_to_target(wide_path);
    if (path_to_target == nullptr) {
      // it is a symbolic link, but we failed to resolve it
-      errno = ENOENT;
      os::free(wide_path);
+      errno = ENOENT;
      return -1;
    }
  }
@ -4794,14 +4794,14 @@ int os::stat(const char *path, struct stat *sbuf) {
  // if getting attributes failed, GetLastError should be called immediately after that
  if (!bret) {
    DWORD errcode = ::GetLastError();
+    log_debug(os)("os::stat() failed to GetFileAttributesExW: GetLastError->%lu.", errcode);
+    os::free(wide_path);
+    os::free(path_to_target);
    if (errcode == ERROR_FILE_NOT_FOUND || errcode == ERROR_PATH_NOT_FOUND) {
      errno = ENOENT;
    } else {
      errno = 0;
    }
-    log_debug(os)("os::stat() failed to GetFileAttributesExW: GetLastError->%lu.", errcode);
-    os::free(wide_path);
-    os::free(path_to_target);
    return -1;
  }

@ -5000,8 +5000,8 @@ int os::open(const char *path, int oflag, int mode) {
    path_to_target = get_path_to_target(wide_path);
    if (path_to_target == nullptr) {
      // it is a symbolic link, but we failed to resolve it
-      errno = ENOENT;
      os::free(wide_path);
+      errno = ENOENT;
      return -1;
    }
  }
@ -5275,6 +5275,7 @@ char* os::realpath(const char* filename, char* outbuf, size_t outbuflen) {
    } else {
      errno = ENAMETOOLONG;
    }
+    ErrnoPreserver ep;
    permit_forbidden_function::free(p); // *not* os::free
  }
  return result;
--- a/src/hotspot/share/asm/codeBuffer.cpp
+++ b/src/hotspot/share/asm/codeBuffer.cpp
@ -90,7 +90,7 @@ typedef CodeBuffer::csize_t csize_t;  // file-local definition

 // External buffer, in a predefined CodeBlob.
 // Important: The code_start must be taken exactly, and not realigned.
-CodeBuffer::CodeBuffer(CodeBlob* blob) DEBUG_ONLY(: Scrubber(this, sizeof(*this))) {
+CodeBuffer::CodeBuffer(const CodeBlob* blob) DEBUG_ONLY(: Scrubber(this, sizeof(*this))) {
  // Provide code buffer with meaningful name
  initialize_misc(blob->name());
  initialize(blob->content_begin(), blob->content_size());
--- a/src/hotspot/share/asm/codeBuffer.hpp
+++ b/src/hotspot/share/asm/codeBuffer.hpp
@ -672,7 +672,7 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
  }

  // (2) CodeBuffer referring to pre-allocated CodeBlob.
-  CodeBuffer(CodeBlob* blob);
+  CodeBuffer(const CodeBlob* blob);

  // (3) code buffer allocating codeBlob memory for code & relocation
  // info but with lazy initialization.  The name must be something
--- a/src/hotspot/share/cds/aotMappedHeapWriter.cpp
+++ b/src/hotspot/share/cds/aotMappedHeapWriter.cpp
@ -86,9 +86,9 @@ void AOTMappedHeapWriter::init() {
  if (CDSConfig::is_dumping_heap()) {
    Universe::heap()->collect(GCCause::_java_lang_system_gc);

-    _buffer_offset_to_source_obj_table = new BufferOffsetToSourceObjectTable(/*size (prime)*/36137, /*max size*/1 * M);
+    _buffer_offset_to_source_obj_table = new (mtClassShared) BufferOffsetToSourceObjectTable(/*size (prime)*/36137, /*max size*/1 * M);
    _dumped_interned_strings = new (mtClass)DumpedInternedStrings(INITIAL_TABLE_SIZE, MAX_TABLE_SIZE);
-    _fillers = new FillersTable();
+    _fillers = new (mtClassShared) FillersTable();
    _requested_bottom = nullptr;
    _requested_top = nullptr;

--- a/src/hotspot/share/cds/aotMetaspace.cpp
+++ b/src/hotspot/share/cds/aotMetaspace.cpp
@ -96,6 +96,7 @@
 #include "runtime/vmOperations.hpp"
 #include "runtime/vmThread.hpp"
 #include "sanitizers/leak.hpp"
+#include "services/management.hpp"
 #include "utilities/align.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/defaultStream.hpp"
--- a/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp
+++ b/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp
@ -357,7 +357,7 @@ InstanceKlass* LambdaProxyClassDictionary::load_and_init_lambda_proxy_class(Inst
  InstanceKlass* nest_host = caller_ik->nest_host(THREAD);
  assert(nest_host == shared_nest_host, "mismatched nest host");

-  EventClassLoad class_load_start_event;
+  EventClassLoad class_load_event;

  // Add to class hierarchy, and do possible deoptimizations.
  lambda_ik->add_to_hierarchy(THREAD);
@ -368,8 +368,8 @@ InstanceKlass* LambdaProxyClassDictionary::load_and_init_lambda_proxy_class(Inst
  if (JvmtiExport::should_post_class_load()) {
    JvmtiExport::post_class_load(THREAD, lambda_ik);
  }
-  if (class_load_start_event.should_commit()) {
-    SystemDictionary::post_class_load_event(&class_load_start_event, lambda_ik, ClassLoaderData::class_loader_data(class_loader()));
+  if (class_load_event.should_commit()) {
+    JFR_ONLY(SystemDictionary::post_class_load_event(&class_load_event, lambda_ik, ClassLoaderData::class_loader_data(class_loader()));)
  }

  lambda_ik->initialize(CHECK_NULL);
--- a/src/hotspot/share/ci/ciInstanceKlass.hpp
+++ b/src/hotspot/share/ci/ciInstanceKlass.hpp
@ -149,6 +149,10 @@ public:
    assert(is_loaded(), "must be loaded");
    return _flags;
  }
+
+  // Fetch Klass::access_flags.
+  jint                   access_flags() { return flags().as_int(); }
+
  bool                   has_finalizer()  {
    assert(is_loaded(), "must be loaded");
    return _has_finalizer; }
--- a/src/hotspot/share/ci/ciKlass.cpp
+++ b/src/hotspot/share/ci/ciKlass.cpp
@ -216,15 +216,6 @@ jint ciKlass::modifier_flags() {
  )
 }

-// ------------------------------------------------------------------
-// ciKlass::access_flags
-jint ciKlass::access_flags() {
-  assert(is_loaded(), "not loaded");
-  GUARDED_VM_ENTRY(
-    return get_Klass()->access_flags().as_unsigned_short();
-  )
-}
-
 // ------------------------------------------------------------------
 // ciKlass::misc_flags
 klass_flags_t ciKlass::misc_flags() {
--- a/src/hotspot/share/ci/ciKlass.hpp
+++ b/src/hotspot/share/ci/ciKlass.hpp
@ -122,9 +122,6 @@ public:
  // Fetch modifier flags.
  jint                   modifier_flags();

-  // Fetch Klass::access_flags.
-  jint                   access_flags();
-
  // Fetch Klass::misc_flags.
  klass_flags_t          misc_flags();

--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@ -89,9 +89,6 @@
 #if INCLUDE_CDS
 #include "classfile/systemDictionaryShared.hpp"
 #endif
-#if INCLUDE_JFR
-#include "jfr/support/jfrTraceIdExtension.hpp"
-#endif

 // We generally try to create the oops directly when parsing, rather than
 // allocating temporary data structures and copying the bytes twice. A
@ -157,6 +154,8 @@

 #define JAVA_26_VERSION                   70

+#define JAVA_27_VERSION                   71
+
 void ClassFileParser::set_class_bad_constant_seen(short bad_constant) {
  assert((bad_constant == JVM_CONSTANT_Module ||
          bad_constant == JVM_CONSTANT_Package) && _major_version >= JAVA_9_VERSION,
@ -5272,8 +5271,6 @@ void ClassFileParser::fill_instance_klass(InstanceKlass* ik,
    }
  }

-  JFR_ONLY(INIT_ID(ik);)
-
  // If we reach here, all is well.
  // Now remove the InstanceKlass* from the _klass_to_deallocate field
  // in order for it to not be destroyed in the ClassFileParser destructor.
--- a/src/hotspot/share/classfile/classFileParser.hpp
+++ b/src/hotspot/share/classfile/classFileParser.hpp
@ -500,6 +500,8 @@ class ClassFileParser {

  InstanceKlass* create_instance_klass(bool cf_changed_in_CFLH, const ClassInstanceInfo& cl_inst_info, TRAPS);

+  const ClassFileStream& stream() const { return *_stream; }
+
  const ClassFileStream* clone_stream() const;

  void set_klass_to_deallocate(InstanceKlass* klass);
--- a/src/hotspot/share/classfile/defaultMethods.cpp
+++ b/src/hotspot/share/classfile/defaultMethods.cpp
@ -439,7 +439,7 @@ class MethodFamily : public ResourceObj {
    StreamIndentor si(str, indent * 2);
    str->print("Selected method: ");
    print_method(str, _selected_target);
-    Klass* method_holder = _selected_target->method_holder();
+    InstanceKlass* method_holder = _selected_target->method_holder();
    if (!method_holder->is_interface()) {
      str->print(" : in superclass");
    }
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@ -1091,10 +1091,6 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
  // Set the modifiers flag.
  u2 computed_modifiers = k->compute_modifier_flags();
  set_modifiers(mirror(), computed_modifiers);
-  // Set the raw access_flags, this is used by reflection instead of modifier flags.
-  // The Java code for array classes gets the access flags from the element type.
-  assert(!k->is_array_klass() || k->access_flags().as_unsigned_short() == 0, "access flags are not set for arrays");
-  set_raw_access_flags(mirror(), k->access_flags().as_unsigned_short());

  InstanceMirrorKlass* mk = InstanceMirrorKlass::cast(mirror->klass());
  assert(oop_size(mirror()) == mk->instance_size(k), "should have been set");
@ -1103,6 +1099,8 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti

  // It might also have a component mirror.  This mirror must already exist.
  if (k->is_array_klass()) {
+    // The Java code for array classes gets the access flags from the element type.
+    set_raw_access_flags(mirror(), 0);
    if (k->is_typeArray_klass()) {
      BasicType type = TypeArrayKlass::cast(k)->element_type();
      if (is_scratch) {
@ -1129,6 +1127,8 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
    // and java_mirror in this klass.
  } else {
    assert(k->is_instance_klass(), "Must be");
+    // Set the raw access_flags, this is used by reflection instead of modifier flags.
+    set_raw_access_flags(mirror(), InstanceKlass::cast(k)->access_flags().as_unsigned_short());
    initialize_mirror_fields(InstanceKlass::cast(k), mirror, protection_domain, classData, THREAD);
    if (HAS_PENDING_EXCEPTION) {
      // If any of the fields throws an exception like OOM remove the klass field
@ -1684,8 +1684,8 @@ int java_lang_Thread::_name_offset;
 int java_lang_Thread::_contextClassLoader_offset;
 int java_lang_Thread::_eetop_offset;
 int java_lang_Thread::_jvmti_thread_state_offset;
-int java_lang_Thread::_jvmti_VTMS_transition_disable_count_offset;
-int java_lang_Thread::_jvmti_is_in_VTMS_transition_offset;
+int java_lang_Thread::_vthread_transition_disable_count_offset;
+int java_lang_Thread::_is_in_vthread_transition_offset;
 int java_lang_Thread::_interrupted_offset;
 int java_lang_Thread::_interruptLock_offset;
 int java_lang_Thread::_tid_offset;
@ -1745,34 +1745,34 @@ void java_lang_Thread::set_jvmti_thread_state(oop java_thread, JvmtiThreadState*
  java_thread->address_field_put(_jvmti_thread_state_offset, (address)state);
 }

-int java_lang_Thread::VTMS_transition_disable_count(oop java_thread) {
-  return java_thread->int_field(_jvmti_VTMS_transition_disable_count_offset);
+int java_lang_Thread::vthread_transition_disable_count(oop java_thread) {
+  jint* addr = java_thread->field_addr<jint>(_vthread_transition_disable_count_offset);
+  return AtomicAccess::load(addr);
 }

-void java_lang_Thread::inc_VTMS_transition_disable_count(oop java_thread) {
-  assert(JvmtiVTMSTransition_lock->owned_by_self(), "Must be locked");
-  int val = VTMS_transition_disable_count(java_thread);
-  java_thread->int_field_put(_jvmti_VTMS_transition_disable_count_offset, val + 1);
+void java_lang_Thread::inc_vthread_transition_disable_count(oop java_thread) {
+  assert(VThreadTransition_lock->owned_by_self(), "Must be locked");
+  jint* addr = java_thread->field_addr<jint>(_vthread_transition_disable_count_offset);
+  int val = AtomicAccess::load(addr);
+  AtomicAccess::store(addr, val + 1);
 }

-void java_lang_Thread::dec_VTMS_transition_disable_count(oop java_thread) {
-  assert(JvmtiVTMSTransition_lock->owned_by_self(), "Must be locked");
-  int val = VTMS_transition_disable_count(java_thread);
-  assert(val > 0, "VTMS_transition_disable_count should never be negative");
-  java_thread->int_field_put(_jvmti_VTMS_transition_disable_count_offset, val - 1);
+void java_lang_Thread::dec_vthread_transition_disable_count(oop java_thread) {
+  assert(VThreadTransition_lock->owned_by_self(), "Must be locked");
+  jint* addr = java_thread->field_addr<jint>(_vthread_transition_disable_count_offset);
+  int val = AtomicAccess::load(addr);
+  AtomicAccess::store(addr, val - 1);
 }

-bool java_lang_Thread::is_in_VTMS_transition(oop java_thread) {
-  return java_thread->bool_field_volatile(_jvmti_is_in_VTMS_transition_offset);
+bool java_lang_Thread::is_in_vthread_transition(oop java_thread) {
+  jboolean* addr = java_thread->field_addr<jboolean>(_is_in_vthread_transition_offset);
+  return AtomicAccess::load(addr);
 }

-void java_lang_Thread::set_is_in_VTMS_transition(oop java_thread, bool val) {
-  assert(is_in_VTMS_transition(java_thread) != val, "already %s transition", val ? "inside" : "outside");
-  java_thread->bool_field_put_volatile(_jvmti_is_in_VTMS_transition_offset, val);
-}
-
-int java_lang_Thread::is_in_VTMS_transition_offset() {
-  return _jvmti_is_in_VTMS_transition_offset;
+void java_lang_Thread::set_is_in_vthread_transition(oop java_thread, bool val) {
+  assert(is_in_vthread_transition(java_thread) != val, "already %s transition", val ? "inside" : "outside");
+  jboolean* addr = java_thread->field_addr<jboolean>(_is_in_vthread_transition_offset);
+  AtomicAccess::store(addr, (jboolean)val);
 }

 void java_lang_Thread::clear_scopedValueBindings(oop java_thread) {
--- a/src/hotspot/share/classfile/javaClasses.hpp
+++ b/src/hotspot/share/classfile/javaClasses.hpp
@ -375,8 +375,8 @@ class java_lang_Class : AllStatic {

 #define THREAD_INJECTED_FIELDS(macro)                                  \
  macro(java_lang_Thread, jvmti_thread_state, intptr_signature, false) \
-  macro(java_lang_Thread, jvmti_VTMS_transition_disable_count, int_signature, false) \
-  macro(java_lang_Thread, jvmti_is_in_VTMS_transition, bool_signature, false) \
+  macro(java_lang_Thread, vthread_transition_disable_count, int_signature, false) \
+  macro(java_lang_Thread, is_in_vthread_transition, bool_signature, false) \
  JFR_ONLY(macro(java_lang_Thread, jfr_epoch, short_signature, false))

 class java_lang_Thread : AllStatic {
@ -390,8 +390,8 @@ class java_lang_Thread : AllStatic {
  static int _contextClassLoader_offset;
  static int _eetop_offset;
  static int _jvmti_thread_state_offset;
-  static int _jvmti_VTMS_transition_disable_count_offset;
-  static int _jvmti_is_in_VTMS_transition_offset;
+  static int _vthread_transition_disable_count_offset;
+  static int _is_in_vthread_transition_offset;
  static int _interrupted_offset;
  static int _interruptLock_offset;
  static int _tid_offset;
@ -444,12 +444,15 @@ class java_lang_Thread : AllStatic {

  static JvmtiThreadState* jvmti_thread_state(oop java_thread);
  static void set_jvmti_thread_state(oop java_thread, JvmtiThreadState* state);
-  static int  VTMS_transition_disable_count(oop java_thread);
-  static void inc_VTMS_transition_disable_count(oop java_thread);
-  static void dec_VTMS_transition_disable_count(oop java_thread);
-  static bool is_in_VTMS_transition(oop java_thread);
-  static void set_is_in_VTMS_transition(oop java_thread, bool val);
-  static int  is_in_VTMS_transition_offset();
+
+  static int  vthread_transition_disable_count(oop java_thread);
+  static void inc_vthread_transition_disable_count(oop java_thread);
+  static void dec_vthread_transition_disable_count(oop java_thread);
+  static int  vthread_transition_disable_count_offset() { return _vthread_transition_disable_count_offset; }
+
+  static bool is_in_vthread_transition(oop java_thread);
+  static void set_is_in_vthread_transition(oop java_thread, bool val);
+  static int  is_in_vthread_transition_offset() { return _is_in_vthread_transition_offset; }

  // Clear all scoped value bindings on error
  static void clear_scopedValueBindings(oop java_thread);
--- a/src/hotspot/share/classfile/klassFactory.cpp
+++ b/src/hotspot/share/classfile/klassFactory.cpp
@ -37,7 +37,7 @@
 #include "runtime/handles.inline.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_JFR
-#include "jfr/support/jfrKlassExtension.hpp"
+#include "jfr/jfr.hpp"
 #endif


@ -99,6 +99,9 @@ InstanceKlass* KlassFactory::check_shared_class_file_load_hook(
        new_ik->set_classpath_index(path_index);
      }

+
+      JFR_ONLY(Jfr::on_klass_creation(new_ik, parser, THREAD);)
+
      return new_ik;
    }
  }
@ -213,7 +216,7 @@ InstanceKlass* KlassFactory::create_from_stream(ClassFileStream* stream,
    result->set_cached_class_file(cached_class_file);
  }

-  JFR_ONLY(ON_KLASS_CREATION(result, parser, THREAD);)
+  JFR_ONLY(Jfr::on_klass_creation(result, parser, THREAD);)

 #if INCLUDE_CDS
  if (CDSConfig::is_dumping_archive()) {
--- a/src/hotspot/share/classfile/systemDictionary.cpp
+++ b/src/hotspot/share/classfile/systemDictionary.cpp
@ -560,15 +560,6 @@ static InstanceKlass* handle_parallel_loading(JavaThread* current,
  return nullptr;
 }

-void SystemDictionary::post_class_load_event(EventClassLoad* event, const InstanceKlass* k, const ClassLoaderData* init_cld) {
-  assert(event != nullptr, "invariant");
-  assert(k != nullptr, "invariant");
-  event->set_loadedClass(k);
-  event->set_definingClassLoader(k->class_loader_data());
-  event->set_initiatingClassLoader(init_cld);
-  event->commit();
-}
-
 // SystemDictionary::resolve_instance_class_or_null is the main function for class name resolution.
 // After checking if the InstanceKlass already exists, it checks for ClassCircularityError and
 // whether the thread must wait for loading in parallel.  It eventually calls load_instance_class,
@ -582,7 +573,7 @@ InstanceKlass* SystemDictionary::resolve_instance_class_or_null(Symbol* name,
  assert(name != nullptr && !Signature::is_array(name) &&
         !Signature::has_envelope(name), "invalid class name: %s", name == nullptr ? "nullptr" : name->as_C_string());

-  EventClassLoad class_load_start_event;
+  EventClassLoad class_load_event;

  HandleMark hm(THREAD);

@ -713,8 +704,8 @@ InstanceKlass* SystemDictionary::resolve_instance_class_or_null(Symbol* name,
    return nullptr;
  }

-  if (class_load_start_event.should_commit()) {
-    post_class_load_event(&class_load_start_event, loaded_class, loader_data);
+  if (class_load_event.should_commit()) {
+    JFR_ONLY(post_class_load_event(&class_load_event, loaded_class, loader_data);)
  }

  // Make sure we have the right class in the dictionary
@ -789,7 +780,7 @@ InstanceKlass* SystemDictionary::resolve_hidden_class_from_stream(
                                                     const ClassLoadInfo& cl_info,
                                                     TRAPS) {

-  EventClassLoad class_load_start_event;
+  EventClassLoad class_load_event;
  ClassLoaderData* loader_data;

  // - for hidden classes that are not strong: create a new CLD that has a class holder and
@ -819,15 +810,16 @@ InstanceKlass* SystemDictionary::resolve_hidden_class_from_stream(
  k->add_to_hierarchy(THREAD);
  // But, do not add to dictionary.

+  if (class_load_event.should_commit()) {
+    JFR_ONLY(post_class_load_event(&class_load_event, k, loader_data);)
+  }
+
  k->link_class(CHECK_NULL);

  // notify jvmti
  if (JvmtiExport::should_post_class_load()) {
    JvmtiExport::post_class_load(THREAD, k);
  }
-  if (class_load_start_event.should_commit()) {
-    post_class_load_event(&class_load_start_event, k, loader_data);
-  }

  return k;
 }
@ -1182,6 +1174,8 @@ void SystemDictionary::preload_class(Handle class_loader, InstanceKlass* ik, TRA
  }
 #endif

+  EventClassLoad class_load_event;
+
  ClassLoaderData* loader_data = ClassLoaderData::class_loader_data(class_loader());
  oop java_mirror = ik->archived_java_mirror();
  precond(java_mirror != nullptr);
@ -1203,11 +1197,26 @@ void SystemDictionary::preload_class(Handle class_loader, InstanceKlass* ik, TRA
    update_dictionary(THREAD, ik, loader_data);
  }

+  if (class_load_event.should_commit()) {
+    JFR_ONLY(post_class_load_event(&class_load_event, ik, loader_data);)
+  }
+
  assert(ik->is_loaded(), "Must be in at least loaded state");
 }

 #endif // INCLUDE_CDS

+#if INCLUDE_JFR
+void SystemDictionary::post_class_load_event(EventClassLoad* event, const InstanceKlass* k, const ClassLoaderData* init_cld) {
+  assert(event != nullptr, "invariant");
+  assert(k != nullptr, "invariant");
+  event->set_loadedClass(k);
+  event->set_definingClassLoader(k->class_loader_data());
+  event->set_initiatingClassLoader(init_cld);
+  event->commit();
+}
+#endif // INCLUDE_JFR
+
 InstanceKlass* SystemDictionary::load_instance_class_impl(Symbol* class_name, Handle class_loader, TRAPS) {

  if (class_loader.is_null()) {
@ -1380,15 +1389,6 @@ InstanceKlass* SystemDictionary::load_instance_class(Symbol* name,
  return loaded_class;
 }

-static void post_class_define_event(InstanceKlass* k, const ClassLoaderData* def_cld) {
-  EventClassDefine event;
-  if (event.should_commit()) {
-    event.set_definedClass(k);
-    event.set_definingClassLoader(def_cld);
-    event.commit();
-  }
-}
-
 void SystemDictionary::define_instance_class(InstanceKlass* k, Handle class_loader, TRAPS) {

  ClassLoaderData* loader_data = k->class_loader_data();
@ -1440,7 +1440,6 @@ void SystemDictionary::define_instance_class(InstanceKlass* k, Handle class_load
  if (JvmtiExport::should_post_class_load()) {
    JvmtiExport::post_class_load(THREAD, k);
  }
-  post_class_define_event(k, loader_data);
 }

 // Support parallel classloading
@ -2173,9 +2172,10 @@ static bool is_always_visible_class(oop mirror) {
    return true; // primitive array
  }
  assert(klass->is_instance_klass(), "%s", klass->external_name());
-  return klass->is_public() &&
-         (InstanceKlass::cast(klass)->is_same_class_package(vmClasses::Object_klass()) ||       // java.lang
-          InstanceKlass::cast(klass)->is_same_class_package(vmClasses::MethodHandle_klass()));  // java.lang.invoke
+  InstanceKlass* ik = InstanceKlass::cast(klass);
+  return ik->is_public() &&
+         (ik->is_same_class_package(vmClasses::Object_klass()) ||       // java.lang
+          ik->is_same_class_package(vmClasses::MethodHandle_klass()));  // java.lang.invoke
 }

 // Find or construct the Java mirror (java.lang.Class instance) for
--- a/src/hotspot/share/classfile/systemDictionary.hpp
+++ b/src/hotspot/share/classfile/systemDictionary.hpp
@ -326,11 +326,10 @@ private:
  static void restore_archived_method_handle_intrinsics_impl(TRAPS) NOT_CDS_RETURN;

 protected:
-  // Used by AOTLinkedClassBulkLoader, LambdaProxyClassDictionary, and SystemDictionaryShared
+  // Used by AOTLinkedClassBulkLoader, LambdaProxyClassDictionary, VMClasses and SystemDictionaryShared

  static bool add_loader_constraint(Symbol* name, Klass* klass_being_linked,  Handle loader1,
                                    Handle loader2);
-  static void post_class_load_event(EventClassLoad* event, const InstanceKlass* k, const ClassLoaderData* init_cld);
  static InstanceKlass* load_shared_class(InstanceKlass* ik,
                                          Handle class_loader,
                                          Handle protection_domain,
@ -342,6 +341,9 @@ protected:
  static InstanceKlass* find_or_define_instance_class(Symbol* class_name,
                                                      Handle class_loader,
                                                      InstanceKlass* k, TRAPS);
+  JFR_ONLY(static void post_class_load_event(EventClassLoad* event,
+                                             const InstanceKlass* k,
+                                             const ClassLoaderData* init_cld);)

 public:
  static bool is_system_class_loader(oop class_loader);
--- a/src/hotspot/share/classfile/vmClasses.cpp
+++ b/src/hotspot/share/classfile/vmClasses.cpp
@ -35,6 +35,7 @@
 #include "classfile/vmClasses.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "gc/shared/collectedHeap.hpp"
+#include "jfr/jfrEvents.hpp"
 #include "memory/metaspaceClosure.hpp"
 #include "memory/universe.hpp"
 #include "oops/instanceKlass.hpp"
@ -240,6 +241,8 @@ void vmClasses::resolve_shared_class(InstanceKlass* klass, ClassLoaderData* load
    return;
  }

+  EventClassLoad class_load_event;
+
  // add super and interfaces first
  InstanceKlass* super = klass->super();
  if (super != nullptr && super->class_loader_data() == nullptr) {
@ -261,6 +264,10 @@ void vmClasses::resolve_shared_class(InstanceKlass* klass, ClassLoaderData* load
  dictionary->add_klass(THREAD, klass->name(), klass);
  klass->add_to_hierarchy(THREAD);
  assert(klass->is_loaded(), "Must be in at least loaded state");
+
+  if (class_load_event.should_commit()) {
+    JFR_ONLY(SystemDictionary::post_class_load_event(&class_load_event, klass, loader_data);)
+  }
 }

 #endif // INCLUDE_CDS
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@ -649,10 +649,10 @@ class methodHandle;
  do_intrinsic(_Continuation_unpin,        jdk_internal_vm_Continuation, unpin_name, void_method_signature, F_SN)       \
                                                                                                                        \
  /* java/lang/VirtualThread */                                                                                         \
-  do_intrinsic(_notifyJvmtiVThreadStart, java_lang_VirtualThread, notifyJvmtiStart_name, void_method_signature, F_RN)   \
-  do_intrinsic(_notifyJvmtiVThreadEnd, java_lang_VirtualThread, notifyJvmtiEnd_name, void_method_signature, F_RN)       \
-  do_intrinsic(_notifyJvmtiVThreadMount, java_lang_VirtualThread, notifyJvmtiMount_name, bool_void_signature, F_RN)     \
-  do_intrinsic(_notifyJvmtiVThreadUnmount, java_lang_VirtualThread, notifyJvmtiUnmount_name, bool_void_signature, F_RN) \
+  do_intrinsic(_vthreadEndFirstTransition, java_lang_VirtualThread, endFirstTransition_name, void_method_signature, F_RN) \
+  do_intrinsic(_vthreadStartFinalTransition, java_lang_VirtualThread, startFinalTransition_name, void_method_signature, F_RN) \
+  do_intrinsic(_vthreadStartTransition, java_lang_VirtualThread, startTransition_name, bool_void_signature, F_RN)       \
+  do_intrinsic(_vthreadEndTransition, java_lang_VirtualThread, endTransition_name, bool_void_signature, F_RN)           \
  do_intrinsic(_notifyJvmtiVThreadDisableSuspend, java_lang_VirtualThread, notifyJvmtiDisableSuspend_name, bool_void_signature, F_SN) \
                                                                                                                        \
  /* support for UnsafeConstants */                                                                                     \
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@ -395,10 +395,10 @@ class SerializeClosure;
  template(run_finalization_name,                     "runFinalization")                          \
  template(dispatchUncaughtException_name,            "dispatchUncaughtException")                \
  template(loadClass_name,                            "loadClass")                                \
-  template(notifyJvmtiStart_name,                     "notifyJvmtiStart")                         \
-  template(notifyJvmtiEnd_name,                       "notifyJvmtiEnd")                           \
-  template(notifyJvmtiMount_name,                     "notifyJvmtiMount")                         \
-  template(notifyJvmtiUnmount_name,                   "notifyJvmtiUnmount")                       \
+  template(startTransition_name,                      "startTransition")                          \
+  template(endTransition_name,                        "endTransition")                            \
+  template(startFinalTransition_name,                 "startFinalTransition")                     \
+  template(endFirstTransition_name,                   "endFirstTransition")                       \
  template(notifyJvmtiDisableSuspend_name,            "notifyJvmtiDisableSuspend")                \
  template(doYield_name,                              "doYield")                                  \
  template(enter_name,                                "enter")                                    \
@ -497,8 +497,8 @@ class SerializeClosure;
  template(java_lang_Boolean_signature,               "Ljava/lang/Boolean;")                      \
  template(url_code_signer_array_void_signature,      "(Ljava/net/URL;[Ljava/security/CodeSigner;)V") \
  template(jvmti_thread_state_name,                   "jvmti_thread_state")                       \
-  template(jvmti_VTMS_transition_disable_count_name,  "jvmti_VTMS_transition_disable_count")      \
-  template(jvmti_is_in_VTMS_transition_name,          "jvmti_is_in_VTMS_transition")              \
+  template(vthread_transition_disable_count_name,     "vthread_transition_disable_count")         \
+  template(is_in_vthread_transition_name,             "is_in_vthread_transition")                 \
  template(module_entry_name,                         "module_entry")                             \
  template(resolved_references_name,                  "<resolved_references>")                    \
  template(init_lock_name,                            "<init_lock>")                              \
--- a/src/hotspot/share/code/aotCodeCache.cpp
+++ b/src/hotspot/share/code/aotCodeCache.cpp
@ -1346,18 +1346,16 @@ void AOTCodeAddressTable::init_extrs() {
    SET_ADDRESS(_extrs, OptoRuntime::multianewarray4_C);
    SET_ADDRESS(_extrs, OptoRuntime::multianewarray5_C);
    SET_ADDRESS(_extrs, OptoRuntime::multianewarrayN_C);
-#if INCLUDE_JVMTI
-    SET_ADDRESS(_extrs, SharedRuntime::notify_jvmti_vthread_start);
-    SET_ADDRESS(_extrs, SharedRuntime::notify_jvmti_vthread_end);
-    SET_ADDRESS(_extrs, SharedRuntime::notify_jvmti_vthread_mount);
-    SET_ADDRESS(_extrs, SharedRuntime::notify_jvmti_vthread_unmount);
-#endif
    SET_ADDRESS(_extrs, OptoRuntime::complete_monitor_locking_C);
    SET_ADDRESS(_extrs, OptoRuntime::monitor_notify_C);
    SET_ADDRESS(_extrs, OptoRuntime::monitor_notifyAll_C);
    SET_ADDRESS(_extrs, OptoRuntime::rethrow_C);
    SET_ADDRESS(_extrs, OptoRuntime::slow_arraycopy_C);
    SET_ADDRESS(_extrs, OptoRuntime::register_finalizer_C);
+    SET_ADDRESS(_extrs, OptoRuntime::vthread_end_first_transition_C);
+    SET_ADDRESS(_extrs, OptoRuntime::vthread_start_final_transition_C);
+    SET_ADDRESS(_extrs, OptoRuntime::vthread_start_transition_C);
+    SET_ADDRESS(_extrs, OptoRuntime::vthread_end_transition_C);
 #if defined(AARCH64)
    SET_ADDRESS(_extrs, JavaThread::verify_cross_modify_fence_failure);
 #endif // AARCH64
--- a/src/hotspot/share/code/codeCache.cpp
+++ b/src/hotspot/share/code/codeCache.cpp
@ -227,11 +227,6 @@ void CodeCache::initialize_heaps() {

  if (!non_nmethod.set) {
    non_nmethod.size += compiler_buffer_size;
-    // Further down, just before FLAG_SET_ERGO(), all segment sizes are
-    // aligned down to the next lower multiple of min_size. For large page
-    // sizes, this may result in (non_nmethod.size == 0) which is not acceptable.
-    // Therefore, force non_nmethod.size to at least min_size.
-    non_nmethod.size = MAX2(non_nmethod.size, min_size);
  }

  if (!profiled.set && !non_profiled.set) {
@ -307,11 +302,10 @@ void CodeCache::initialize_heaps() {

  // Note: if large page support is enabled, min_size is at least the large
  // page size. This ensures that the code cache is covered by large pages.
-  non_profiled.size += non_nmethod.size & alignment_mask(min_size);
-  non_profiled.size += profiled.size & alignment_mask(min_size);
-  non_nmethod.size = align_down(non_nmethod.size, min_size);
-  profiled.size = align_down(profiled.size, min_size);
-  non_profiled.size = align_down(non_profiled.size, min_size);
+  non_nmethod.size = align_up(non_nmethod.size, min_size);
+  profiled.size = align_up(profiled.size, min_size);
+  non_profiled.size = align_up(non_profiled.size, min_size);
+  cache_size = non_nmethod.size + profiled.size + non_profiled.size;

  FLAG_SET_ERGO(NonNMethodCodeHeapSize, non_nmethod.size);
  FLAG_SET_ERGO(ProfiledCodeHeapSize, profiled.size);
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@ -1498,6 +1498,40 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
  //   - OOP table
  memcpy(consts_begin(), nm.consts_begin(), nm.data_end() - nm.consts_begin());

+  // Fix relocation
+  RelocIterator iter(this);
+  CodeBuffer src(&nm);
+  CodeBuffer dst(this);
+  while (iter.next()) {
+#ifdef USE_TRAMPOLINE_STUB_FIX_OWNER
+    // After an nmethod is moved, some direct call sites may end up out of range.
+    // CallRelocation::fix_relocation_after_move() assumes the target is always
+    // reachable and does not check branch range. Calling it without range checks
+    // could cause us to write an offset too large for the instruction.
+    //
+    // If a call site has a trampoline, we skip the normal call relocation. The
+    // associated trampoline_stub_Relocation will handle the call and the
+    // trampoline, including range checks and updating the branch as needed.
+    //
+    // If no trampoline exists, we can assume the call target is always
+    // reachable and therefore within direct branch range, so calling
+    // CallRelocation::fix_relocation_after_move() is safe.
+    if (iter.reloc()->is_call()) {
+      address trampoline = trampoline_stub_Relocation::get_trampoline_for(iter.reloc()->addr(), this);
+      if (trampoline != nullptr) {
+        continue;
+      }
+    }
+#endif
+
+    iter.reloc()->fix_relocation_after_move(&src, &dst);
+  }
+
+  {
+    MutexLocker ml(NMethodState_lock, Mutex::_no_safepoint_check_flag);
+    clear_inline_caches();
+  }
+
  post_init();
 }

@ -1521,25 +1555,6 @@ nmethod* nmethod::relocate(CodeBlobType code_blob_type) {
    return nullptr;
  }

-  // Fix relocation
-  RelocIterator iter(nm_copy);
-  CodeBuffer src(this);
-  CodeBuffer dst(nm_copy);
-  while (iter.next()) {
-#ifdef USE_TRAMPOLINE_STUB_FIX_OWNER
-    // Direct calls may no longer be in range and the use of a trampoline may now be required.
-    // Instead, allow trampoline relocations to update their owners and perform the necessary checks.
-    if (iter.reloc()->is_call()) {
-      address trampoline = trampoline_stub_Relocation::get_trampoline_for(iter.reloc()->addr(), nm_copy);
-      if (trampoline != nullptr) {
-        continue;
-      }
-    }
-#endif
-
-    iter.reloc()->fix_relocation_after_move(&src, &dst);
-  }
-
  // To make dependency checking during class loading fast, record
  // the nmethod dependencies in the classes it is dependent on.
  // This allows the dependency checking code to simply walk the
@ -1569,8 +1584,6 @@ nmethod* nmethod::relocate(CodeBlobType code_blob_type) {
  if (!is_marked_for_deoptimization() && is_in_use()) {
    assert(method() != nullptr && method()->code() == this, "should be if is in use");

-    nm_copy->clear_inline_caches();
-
    // Attempt to start using the copy
    if (nm_copy->make_in_use()) {
      ICache::invalidate_range(nm_copy->code_begin(), nm_copy->code_size());
@ -1578,7 +1591,7 @@ nmethod* nmethod::relocate(CodeBlobType code_blob_type) {
      methodHandle mh(Thread::current(), nm_copy->method());
      nm_copy->method()->set_code(mh, nm_copy);

-      make_not_used();
+      make_not_entrant(InvalidationReason::RELOCATED);

      nm_copy->post_compiled_method_load_event();

--- a/src/hotspot/share/code/nmethod.hpp
+++ b/src/hotspot/share/code/nmethod.hpp
@ -499,6 +499,7 @@ public:
    UNCOMMON_TRAP,
    WHITEBOX_DEOPTIMIZATION,
    ZOMBIE,
+    RELOCATED,
    INVALIDATION_REASONS_COUNT
  };

@ -543,6 +544,8 @@ public:
        return "whitebox deoptimization";
      case InvalidationReason::ZOMBIE:
        return "zombie";
+      case InvalidationReason::RELOCATED:
+        return "relocated";
      default: {
        assert(false, "Unhandled reason");
        return "Unknown";
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
@ -58,8 +58,6 @@
 #include "utilities/macros.hpp"
 #include "utilities/vmError.hpp"

-PSYoungGen*  ParallelScavengeHeap::_young_gen = nullptr;
-PSOldGen*    ParallelScavengeHeap::_old_gen = nullptr;
 PSAdaptiveSizePolicy* ParallelScavengeHeap::_size_policy = nullptr;
 GCPolicyCounters* ParallelScavengeHeap::_gc_policy_counters = nullptr;
 size_t ParallelScavengeHeap::_desired_page_size = 0;
@ -143,9 +141,9 @@ void ParallelScavengeHeap::initialize_serviceability() {
                                                "PS Survivor Space",
                                                false /* support_usage_threshold */);

-  _old_pool = new PSGenerationPool(_old_gen,
-                                   "PS Old Gen",
-                                   true /* support_usage_threshold */);
+  _old_pool = new PSOldGenerationPool(_old_gen,
+                                      "PS Old Gen",
+                                      true /* support_usage_threshold */);

  _young_manager = new GCMemoryManager("PS Scavenge");
  _old_manager = new GCMemoryManager("PS MarkSweep");
@ -891,9 +889,23 @@ void ParallelScavengeHeap::resize_after_young_gc(bool is_survivor_overflowing) {

  // Consider if should shrink old-gen
  if (!is_survivor_overflowing) {
-    // Upper bound for a single step shrink
-    size_t max_shrink_bytes = SpaceAlignment;
+    assert(old_gen()->capacity_in_bytes() >= old_gen()->min_gen_size(), "inv");
+
+    // Old gen min_gen_size constraint.
+    const size_t max_shrink_bytes_gen_size_constraint = old_gen()->capacity_in_bytes() - old_gen()->min_gen_size();
+
+    // Per-step delta to avoid too aggressive shrinking.
+    const size_t max_shrink_bytes_per_step_constraint = SpaceAlignment;
+
+    // Combining the above two constraints.
+    const size_t max_shrink_bytes = MIN2(max_shrink_bytes_gen_size_constraint,
+                                         max_shrink_bytes_per_step_constraint);
+
    size_t shrink_bytes = _size_policy->compute_old_gen_shrink_bytes(old_gen()->free_in_bytes(), max_shrink_bytes);
+
+    assert(old_gen()->capacity_in_bytes() >= shrink_bytes, "inv");
+    assert(old_gen()->capacity_in_bytes() - shrink_bytes >= old_gen()->min_gen_size(), "inv");
+
    if (shrink_bytes != 0) {
      if (MinHeapFreeRatio != 0) {
        size_t new_capacity = old_gen()->capacity_in_bytes() - shrink_bytes;
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
@ -69,8 +69,8 @@ class ReservedSpace;
 class ParallelScavengeHeap : public CollectedHeap {
  friend class VMStructs;
 private:
-  static PSYoungGen* _young_gen;
-  static PSOldGen*   _old_gen;
+  PSYoungGen* _young_gen;
+  PSOldGen*   _old_gen;

  // Sizing policy for entire heap
  static PSAdaptiveSizePolicy*       _size_policy;
@ -160,8 +160,8 @@ public:
  GrowableArray<GCMemoryManager*> memory_managers() override;
  GrowableArray<MemoryPool*> memory_pools() override;

-  static PSYoungGen* young_gen() { return _young_gen; }
-  static PSOldGen* old_gen()     { return _old_gen; }
+  PSYoungGen* young_gen() const { return _young_gen; }
+  PSOldGen*   old_gen()   const { return _old_gen; }

  PSAdaptiveSizePolicy* size_policy() { return _size_policy; }

--- a/src/hotspot/share/gc/parallel/psMemoryPool.cpp
+++ b/src/hotspot/share/gc/parallel/psMemoryPool.cpp
@ -24,14 +24,14 @@

 #include "gc/parallel/psMemoryPool.hpp"

-PSGenerationPool::PSGenerationPool(PSOldGen* old_gen,
-                                   const char* name,
-                                   bool support_usage_threshold) :
+PSOldGenerationPool::PSOldGenerationPool(PSOldGen* old_gen,
+                                         const char* name,
+                                         bool support_usage_threshold) :
  CollectedMemoryPool(name, old_gen->capacity_in_bytes(),
                      old_gen->reserved().byte_size(), support_usage_threshold), _old_gen(old_gen) {
 }

-MemoryUsage PSGenerationPool::get_memory_usage() {
+MemoryUsage PSOldGenerationPool::get_memory_usage() {
  size_t maxSize   = (available_for_allocation() ? max_size() : 0);
  size_t used      = used_in_bytes();
  size_t committed = _old_gen->capacity_in_bytes();
--- a/src/hotspot/share/gc/parallel/psMemoryPool.hpp
+++ b/src/hotspot/share/gc/parallel/psMemoryPool.hpp
@ -31,12 +31,12 @@
 #include "services/memoryPool.hpp"
 #include "services/memoryUsage.hpp"

-class PSGenerationPool : public CollectedMemoryPool {
+class PSOldGenerationPool : public CollectedMemoryPool {
 private:
  PSOldGen* _old_gen;

 public:
-  PSGenerationPool(PSOldGen* pool, const char* name, bool support_usage_threshold);
+  PSOldGenerationPool(PSOldGen* pool, const char* name, bool support_usage_threshold);

  MemoryUsage get_memory_usage();
  size_t used_in_bytes() { return _old_gen->used_in_bytes(); }
--- a/src/hotspot/share/gc/parallel/psScavenge.hpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.hpp
@ -115,7 +115,7 @@ class PSScavenge: AllStatic {
  }

  static bool is_obj_in_to_space(oop o) {
-    return ParallelScavengeHeap::young_gen()->to_space()->contains(o);
+    return ParallelScavengeHeap::heap()->young_gen()->to_space()->contains(o);
  }
 };

--- a/src/hotspot/share/gc/parallel/vmStructs_parallelgc.hpp
+++ b/src/hotspot/share/gc/parallel/vmStructs_parallelgc.hpp
@ -64,8 +64,8 @@
  nonstatic_field(PSOldGen,                    _max_gen_size,                                 const size_t)                          \
                                                                                                                                     \
                                                                                                                                     \
-     static_field(ParallelScavengeHeap,        _young_gen,                                    PSYoungGen*)                           \
-     static_field(ParallelScavengeHeap,        _old_gen,                                      PSOldGen*)                             \
+  nonstatic_field(ParallelScavengeHeap,        _young_gen,                                    PSYoungGen*)                           \
+  nonstatic_field(ParallelScavengeHeap,        _old_gen,                                      PSOldGen*)                             \
                                                                                                                                     \

 #define VM_TYPES_PARALLELGC(declare_type,                                 \
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@ -236,7 +236,10 @@ DefNewGeneration::DefNewGeneration(ReservedSpace rs,
  // These values are exported as performance counters.
  uintx size = _virtual_space.reserved_size();
  _max_survivor_size = compute_survivor_size(size, SpaceAlignment);
-  _max_eden_size = size - (2*_max_survivor_size);
+
+  // Eden might grow to be almost as large as the entire young generation.
+  // We approximate this as the entire virtual space.
+  _max_eden_size = size;

  // allocate the performance counters

--- a/src/hotspot/share/gc/serial/serialHeap.cpp
+++ b/src/hotspot/share/gc/serial/serialHeap.cpp
@ -91,14 +91,16 @@ SerialHeap::SerialHeap() :
    CollectedHeap(),
    _young_gen(nullptr),
    _old_gen(nullptr),
+    _young_gen_saved_top(nullptr),
+    _old_gen_saved_top(nullptr),
    _rem_set(nullptr),
    _gc_policy_counters(new GCPolicyCounters("Copy:MSC", 2, 2)),
    _young_manager(nullptr),
    _old_manager(nullptr),
-    _is_heap_almost_full(false),
    _eden_pool(nullptr),
    _survivor_pool(nullptr),
-    _old_pool(nullptr) {
+    _old_pool(nullptr),
+    _is_heap_almost_full(false) {
  _young_manager = new GCMemoryManager("Copy");
  _old_manager = new GCMemoryManager("MarkSweepCompact");
  GCLocker::initialize();
@ -147,7 +149,8 @@ GrowableArray<MemoryPool*> SerialHeap::memory_pools() {

 HeapWord* SerialHeap::allocate_loaded_archive_space(size_t word_size) {
  MutexLocker ml(Heap_lock);
-  return old_gen()->allocate(word_size);
+  HeapWord* const addr = old_gen()->allocate(word_size);
+  return addr != nullptr ? addr : old_gen()->expand_and_allocate(word_size);
 }

 void SerialHeap::complete_loaded_archive_space(MemRegion archive_space) {
@ -629,6 +632,14 @@ bool SerialHeap::requires_barriers(stackChunkOop obj) const {

 // Returns "TRUE" iff "p" points into the committed areas of the heap.
 bool SerialHeap::is_in(const void* p) const {
+  // precondition
+  verify_not_in_native_if_java_thread();
+
+  if (!is_in_reserved(p)) {
+    // If it's not even in reserved.
+    return false;
+  }
+
  return _young_gen->is_in(p) || _old_gen->is_in(p);
 }

@ -796,3 +807,12 @@ void SerialHeap::gc_epilogue(bool full) {

  MetaspaceCounters::update_performance_counters();
 };
+
+#ifdef ASSERT
+void SerialHeap::verify_not_in_native_if_java_thread() {
+  if (Thread::current()->is_Java_thread()) {
+    JavaThread* thread = JavaThread::current();
+    assert(thread->thread_state() != _thread_in_native, "precondition");
+  }
+}
+#endif
--- a/src/hotspot/share/gc/serial/serialHeap.hpp
+++ b/src/hotspot/share/gc/serial/serialHeap.hpp
@ -76,6 +76,8 @@ class SerialHeap : public CollectedHeap {
 private:
  DefNewGeneration* _young_gen;
  TenuredGeneration* _old_gen;
+
+  // Used during young-gc
  HeapWord* _young_gen_saved_top;
  HeapWord* _old_gen_saved_top;

@ -94,6 +96,10 @@ private:
  GCMemoryManager* _young_manager;
  GCMemoryManager* _old_manager;

+  MemoryPool* _eden_pool;
+  MemoryPool* _survivor_pool;
+  MemoryPool* _old_pool;
+
  // Indicate whether heap is almost or approaching full.
  // Usually, there is some memory headroom for application/gc to run properly.
  // However, in extreme cases, e.g. young-gen is non-empty after a full gc, we
@ -111,6 +117,21 @@ private:
  void print_tracing_info() const override;
  void stop() override {};

+  static void verify_not_in_native_if_java_thread() NOT_DEBUG_RETURN;
+
+  // Try to allocate space by expanding the heap.
+  HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
+
+  HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
+  HeapWord* mem_allocate_work(size_t size, bool is_tlab);
+
+  void initialize_serviceability() override;
+
+  // Set the saved marks of generations, if that makes sense.
+  // In particular, if any generation might iterate over the oops
+  // in other generations, it should call this method.
+  void save_marks();
+
 public:
  // Returns JNI_OK on success
  jint initialize() override;
@ -209,26 +230,6 @@ public:
  // generations in a fully generational heap.
  CardTableRS* rem_set() { return _rem_set; }

- public:
-  // Set the saved marks of generations, if that makes sense.
-  // In particular, if any generation might iterate over the oops
-  // in other generations, it should call this method.
-  void save_marks();
-
-private:
-  // Try to allocate space by expanding the heap.
-  HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
-
-  HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
-  HeapWord* mem_allocate_work(size_t size, bool is_tlab);
-
-  MemoryPool* _eden_pool;
-  MemoryPool* _survivor_pool;
-  MemoryPool* _old_pool;
-
-  void initialize_serviceability() override;
-
-public:
  static SerialHeap* heap();

  SerialHeap();
--- a/src/hotspot/share/gc/shared/gc_globals.hpp
+++ b/src/hotspot/share/gc/shared/gc_globals.hpp
@ -291,7 +291,7 @@
          "size on systems with small physical memory size")                \
          range(0.0, 100.0)                                                 \
                                                                            \
-  product(double, InitialRAMPercentage, 0.2,                                \
+  product(double, InitialRAMPercentage, 0.0,                                \
          "Percentage of real memory used for initial heap size")           \
          range(0.0, 100.0)                                                 \
                                                                            \
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
@ -37,6 +37,7 @@
 #include "utilities/copy.hpp"

 size_t       ThreadLocalAllocBuffer::_max_size = 0;
+int          ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
 unsigned int ThreadLocalAllocBuffer::_target_refills = 0;

 ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() :
@ -224,6 +225,30 @@ void ThreadLocalAllocBuffer::startup_initialization() {
  // abort during VM initialization.
  _target_refills = MAX2(_target_refills, 2U);

+#ifdef COMPILER2
+  // If the C2 compiler is present, extra space is needed at the end of
+  // TLABs, otherwise prefetching instructions generated by the C2
+  // compiler will fault (due to accessing memory outside of heap).
+  // The amount of space is the max of the number of lines to
+  // prefetch for array and for instance allocations. (Extra space must be
+  // reserved to accommodate both types of allocations.)
+  //
+  // Only SPARC-specific BIS instructions are known to fault. (Those
+  // instructions are generated if AllocatePrefetchStyle==3 and
+  // AllocatePrefetchInstr==1). To be on the safe side, however,
+  // extra space is reserved for all combinations of
+  // AllocatePrefetchStyle and AllocatePrefetchInstr.
+  //
+  // If the C2 compiler is not present, no space is reserved.
+
+  // +1 for rounding up to next cache line, +1 to be safe
+  if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) {
+    int lines =  MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
+    _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
+                                       (int)HeapWordSize;
+  }
+#endif
+
  // During jvm startup, the main thread is initialized
  // before the heap is initialized.  So reinitialize it now.
  guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
@ -429,7 +454,8 @@ void ThreadLocalAllocStats::publish() {
 }

 size_t ThreadLocalAllocBuffer::end_reserve() {
-  return CollectedHeap::lab_alignment_reserve();
+  size_t reserve_size = CollectedHeap::lab_alignment_reserve();
+  return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch);
 }

 const HeapWord* ThreadLocalAllocBuffer::start_relaxed() const {
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
@ -58,6 +58,7 @@ private:
  size_t    _allocated_before_last_gc;           // total bytes allocated up until the last gc

  static size_t   _max_size;                          // maximum size of any TLAB
+  static int      _reserve_for_allocation_prefetch;   // Reserve at the end of the TLAB
  static unsigned _target_refills;                    // expected number of refills between GCs

  unsigned  _number_of_refills;
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
@ -1394,7 +1394,7 @@ void ShenandoahBarrierC2Support::pin_and_expand(PhaseIdealLoop* phase) {
    }
    if (addr->Opcode() == Op_AddP) {
      Node* orig_base = addr->in(AddPNode::Base);
-      Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::StrongDependency);
+      Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::DependencyType::NonFloatingNarrowing);
      phase->register_new_node(base, ctrl);
      if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
        // Field access
--- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
@ -83,16 +83,15 @@ public:
        return "PLAB";
      default:
        ShouldNotReachHere();
-        return "";
    }
  }

 private:
  // When ShenandoahElasticTLAB is enabled, the request cannot be made smaller than _min_size.
-  size_t _min_size;
+  size_t const _min_size;

  // The size of the request in words.
-  size_t _requested_size;
+  size_t const _requested_size;

  // The allocation may be increased for padding or decreased to fit in the remaining space of a region.
  size_t _actual_size;
@ -104,7 +103,7 @@ private:
  size_t _waste;

  // This is the type of the request.
-  Type _alloc_type;
+  Type const _alloc_type;

 #ifdef ASSERT
  // Check that this is set before being read.
@ -209,6 +208,10 @@ public:
    return (_alloc_type & bit_old_alloc) == 0;
  }

+  inline bool is_cds() const {
+    return _alloc_type == _alloc_cds;
+  }
+
  inline ShenandoahAffiliation affiliation() const {
    return (_alloc_type & bit_old_alloc) == 0 ? YOUNG_GENERATION : OLD_GENERATION ;
  }
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
@ -128,8 +128,8 @@ public:
  void write_ref_array(HeapWord* start, size_t count);

 private:
-  template <class T>
-  inline void arraycopy_marking(T* dst, size_t count);
+  template <bool IS_GENERATIONAL, class T>
+  void arraycopy_marking(T* dst, size_t count);
  template <class T>
  inline void arraycopy_evacuation(T* src, size_t count);
  template <class T>
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
@ -429,7 +429,11 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
    // If marking old or young, we must evaluate the SATB barrier. This will be the only
    // action if we are not marking old. If we are marking old, we must still evaluate the
    // load reference barrier for a young collection.
-    arraycopy_marking(dst, count);
+    if (_heap->mode()->is_generational()) {
+      arraycopy_marking<true>(dst, count);
+    } else {
+      arraycopy_marking<false>(dst, count);
+    }
  }

  if ((gc_state & ShenandoahHeap::EVACUATION) != 0) {
@ -441,11 +445,12 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
  }
 }

-template <class T>
+template <bool IS_GENERATIONAL, class T>
 void ShenandoahBarrierSet::arraycopy_marking(T* dst, size_t count) {
  assert(_heap->is_concurrent_mark_in_progress(), "only during marking");
  if (ShenandoahSATBBarrier) {
-    if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast<HeapWord*>(dst))) {
+    if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast<HeapWord*>(dst)) ||
+        (IS_GENERATIONAL && _heap->heap_region_containing(dst)->is_old() && _heap->is_concurrent_young_mark_in_progress())) {
      arraycopy_work<T, false, false, true>(dst, count);
    }
  }
--- a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp
@ -144,13 +144,12 @@ public:
    {
      ShenandoahReentrantLocker locker(nm_data->lock());

-      // Heal oops and disarm
+      // Heal oops
      if (_bs->is_armed(nm)) {
        ShenandoahEvacOOMScope oom_evac_scope;
        ShenandoahNMethod::heal_nmethod_metadata(nm_data);
-        // Code cache unloading needs to know about on-stack nmethods. Arm the nmethods to get
-        // mark_as_maybe_on_stack() callbacks when they are used again.
-        _bs->arm(nm);
+        // Must remain armed to complete remaining work in nmethod entry barrier
+        assert(_bs->is_armed(nm), "Should remain armed");
      }
    }

--- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
@ -175,7 +175,6 @@ ShenandoahRegionPartitions::ShenandoahRegionPartitions(size_t max_regions, Shena
 void ShenandoahFreeSet::account_for_pip_regions(size_t mutator_regions, size_t mutator_bytes,
                                                size_t collector_regions, size_t collector_bytes) {
  shenandoah_assert_heaplocked();
-  size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();

  // We have removed all of these regions from their respective partition. Each pip region is "in" the NotFree partition.
  // We want to account for all pip pad memory as if it had been consumed from within the Mutator partition.
@ -1370,7 +1369,7 @@ template<typename Iter>
 HeapWord* ShenandoahFreeSet::allocate_from_regions(Iter& iterator, ShenandoahAllocRequest &req, bool &in_new_region) {
  for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) {
    ShenandoahHeapRegion* r = _heap->get_region(idx);
-    size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab) ? req.min_size() : req.size();
+    size_t min_size = req.is_lab_alloc() ? req.min_size() : req.size();
    if (alloc_capacity(r) >= min_size * HeapWordSize) {
      HeapWord* result = try_allocate_in(r, req, in_new_region);
      if (result != nullptr) {
@ -1502,7 +1501,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah

  if (in_new_region) {
    log_debug(gc, free)("Using new region (%zu) for %s (" PTR_FORMAT ").",
-                        r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req));
+                        r->index(), req.type_string(), p2i(&req));
    assert(!r->is_affiliated(), "New region %zu should be unaffiliated", r->index());
    r->set_affiliation(req.affiliation());
    if (r->is_old()) {
@ -1521,7 +1520,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
    assert(ctx->is_bitmap_range_within_region_clear(ctx->top_bitmap(r), r->end()), "Bitmap above top_bitmap() must be clear");
 #endif
    log_debug(gc, free)("Using new region (%zu) for %s (" PTR_FORMAT ").",
-                        r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req));
+                        r->index(), req.type_string(), p2i(&req));
  } else {
    assert(r->is_affiliated(), "Region %zu that is not new should be affiliated", r->index());
    if (r->affiliation() != req.affiliation()) {
@ -1535,8 +1534,8 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
  if (req.is_lab_alloc()) {
    size_t adjusted_size = req.size();
    size_t free = r->free();    // free represents bytes available within region r
-    if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
-      // This is a PLAB allocation
+    if (req.is_old()) {
+      // This is a PLAB allocation(lab alloc in old gen)
      assert(_heap->mode()->is_generational(), "PLABs are only for generational mode");
      assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::OldCollector, r->index()),
             "PLABS must be allocated in old_collector_free regions");
@ -1597,26 +1596,19 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
      r->set_update_watermark(r->top());
      if (r->is_old()) {
        _partitions.increase_used(ShenandoahFreeSetPartitionId::OldCollector, (req.actual_size() + req.waste()) * HeapWordSize);
-        assert(req.type() != ShenandoahAllocRequest::_alloc_gclab, "old-gen allocations use PLAB or shared allocation");
-        // for plabs, we'll sort the difference between evac and promotion usage when we retire the plab
      } else {
        _partitions.increase_used(ShenandoahFreeSetPartitionId::Collector, (req.actual_size() + req.waste()) * HeapWordSize);
      }
    }
  }

-  size_t ac = alloc_capacity(r);
  ShenandoahFreeSetPartitionId orig_partition;
-  ShenandoahGeneration* request_generation = nullptr;
  if (req.is_mutator_alloc()) {
-    request_generation = _heap->mode()->is_generational()? _heap->young_generation(): _heap->global_generation();
    orig_partition = ShenandoahFreeSetPartitionId::Mutator;
  } else if (req.is_old()) {
-    request_generation = _heap->old_generation();
    orig_partition = ShenandoahFreeSetPartitionId::OldCollector;
  } else {
    // Not old collector alloc, so this is a young collector gclab or shared allocation
-    request_generation = _heap->mode()->is_generational()? _heap->young_generation(): _heap->global_generation();
    orig_partition = ShenandoahFreeSetPartitionId::Collector;
  }
  if (alloc_capacity(r) < PLAB::min_size() * HeapWordSize) {
@ -1688,7 +1680,6 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req, bo
  idx_t num = ShenandoahHeapRegion::required_regions(words_size * HeapWordSize);

  assert(req.is_young(), "Humongous regions always allocated in YOUNG");
-  ShenandoahGeneration* generation = _heap->generation_for(req.affiliation());

  // Check if there are enough regions left to satisfy allocation.
  if (num > (idx_t) _partitions.count(ShenandoahFreeSetPartitionId::Mutator)) {
@ -1833,107 +1824,7 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req, bo
 }

 class ShenandoahRecycleTrashedRegionClosure final : public ShenandoahHeapRegionClosure {
-private:
-  static const ssize_t SentinelUsed = -1;
-  static const ssize_t SentinelIndex = -1;
-  static const size_t MaxSavedRegions = 128;
-
-  ShenandoahRegionPartitions* _partitions;
-  volatile size_t _recycled_region_count;
-  ssize_t _region_indices[MaxSavedRegions];
-  ssize_t _region_used[MaxSavedRegions];
-
-  void get_lock_and_flush_buffer(size_t region_count, size_t overflow_region_used, size_t overflow_region_index) {
-    ShenandoahHeap* heap = ShenandoahHeap::heap();
-    ShenandoahHeapLocker locker(heap->lock());
-    size_t recycled_regions = AtomicAccess::load(&_recycled_region_count);
-    size_t region_tallies[int(ShenandoahRegionPartitions::NumPartitions)];
-    size_t used_byte_tallies[int(ShenandoahRegionPartitions::NumPartitions)];
-    for (int p = 0; p < int(ShenandoahRegionPartitions::NumPartitions); p++) {
-      region_tallies[p] = 0;
-      used_byte_tallies[p] = 0;
-    }
-    ShenandoahFreeSetPartitionId p = _partitions->membership(overflow_region_index);
-    used_byte_tallies[int(p)] += overflow_region_used;
-    if (region_count <= recycled_regions) {
-      // _recycled_region_count has not been decremented after I incremented it to obtain region_count, so I will
-      // try to flush the buffer.
-
-      // Multiple worker threads may attempt to flush this buffer.  The first thread to acquire the lock does the work.
-      // _recycled_region_count is only decreased while holding the heap lock.
-      if (region_count > recycled_regions) {
-        region_count = recycled_regions;
-      }
-      for (size_t i = 0; i < region_count; i++) {
-        ssize_t used;
-        // wait for other threads to finish updating their entries within the region buffer before processing entry
-        do {
-          used = _region_used[i];
-        } while (used == SentinelUsed);
-        ssize_t index;
-        do {
-          index = _region_indices[i];
-        } while (index == SentinelIndex);
-
-        ShenandoahFreeSetPartitionId p = _partitions->membership(index);
-        assert(p != ShenandoahFreeSetPartitionId::NotFree, "Trashed regions should be in a free partition");
-        used_byte_tallies[int(p)] += used;
-        region_tallies[int(p)]++;
-      }
-      if (region_count > 0) {
-        for (size_t i = 0; i < MaxSavedRegions; i++) {
-          _region_indices[i] = SentinelIndex;
-          _region_used[i] = SentinelUsed;
-        }
-      }
-
-      // The almost last thing we do before releasing the lock is to set the _recycled_region_count to 0.  What happens next?
-      //
-      //  1. Any worker thread that attempted to buffer a new region while we were flushing the buffer will have seen
-      //     that _recycled_region_count > MaxSavedRegions. All such worker threads will first wait for the lock, then
-      //     discover that the _recycled_region_count is zero, then, while holding the lock, they will process the
-      //     region so it doesn't have to be placed into the buffer.  This handles the large majority of cases.
-      //
-      //  2. However, there's a race that can happen, which will result in someewhat different behavior.  Suppose
-      //     this thread resets _recycled_region_count to 0.  Then some other worker thread increments _recycled_region_count
-      //     in order to stores its region into the buffer and suppose this happens before all of the other worker threads
-      //     which are waiting to acquire the heap lock have finished their efforts to flush the buffer.  If this happens,
-      //     then the workers who are waiting to acquire the heap lock and flush the buffer will find that _recycled_region_count
-      //     has decreased from the value it held when they last tried to increment its value.  In this case, these worker
-      //     threads will process their overflow region while holding the lock, but they will not attempt to process regions
-      //     newly placed into the buffer.  Otherwise, confusion could result.
-      //
-      // Assumption: all worker threads who are attempting to acquire lock and flush buffer will finish their efforts before
-      //             the buffer once again overflows.
-      // How could we avoid depending on this assumption?
-      //   1. Let MaxSavedRegions be as large as number of regions, or at least as large as the collection set.
-      //   2. Keep a count of how many times the buffer has been flushed per instantation of the
-      //      ShenandoahRecycleTrashedRegionClosure object, and only consult/update this value while holding the heap lock.
-      //      Need to think about how this helps resolve the race.
-      _recycled_region_count = 0;
-    } else {
-      // Some other thread has already processed the buffer, resetting _recycled_region_count to zero. Its current value
-      // may be greater than zero because other workers may have accumulated entries into the buffer. But it is "extremely"
-      // unlikely that it will overflow again before all waiting workers have had a chance to clear their state. While I've
-      // got the heap lock, I'll go ahead and update the global state for my overflow region. I'll let other heap regions
-      // accumulate in the buffer to be processed when the buffer is once again full.
-      region_count = 0;
-    }
-    for (size_t p = 0; p < int(ShenandoahRegionPartitions::NumPartitions); p++) {
-      _partitions->decrease_used(ShenandoahFreeSetPartitionId(p), used_byte_tallies[p]);
-    }
-  }
-
 public:
-  ShenandoahRecycleTrashedRegionClosure(ShenandoahRegionPartitions* p): ShenandoahHeapRegionClosure() {
-    _partitions = p;
-    _recycled_region_count = 0;
-    for (size_t i = 0; i < MaxSavedRegions; i++) {
-      _region_indices[i] = SentinelIndex;
-      _region_used[i] = SentinelUsed;
-    }
-  }
-
  void heap_region_do(ShenandoahHeapRegion* r) {
    r->try_recycle();
  }
@ -1950,14 +1841,12 @@ void ShenandoahFreeSet::recycle_trash() {
  ShenandoahHeap* heap = ShenandoahHeap::heap();
  heap->assert_gc_workers(heap->workers()->active_workers());

-  ShenandoahRecycleTrashedRegionClosure closure(&_partitions);
+  ShenandoahRecycleTrashedRegionClosure closure;
  heap->parallel_heap_region_iterate(&closure);
 }

 bool ShenandoahFreeSet::transfer_one_region_from_mutator_to_old_collector(size_t idx, size_t alloc_capacity) {
  ShenandoahGenerationalHeap* gen_heap = ShenandoahGenerationalHeap::heap();
-  ShenandoahYoungGeneration* young_gen = gen_heap->young_generation();
-  ShenandoahOldGeneration* old_gen = gen_heap->old_generation();
  size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();
  assert(alloc_capacity == region_size_bytes, "Region must be empty");
  if (young_unaffiliated_regions() > 0) {
@ -1985,7 +1874,6 @@ bool ShenandoahFreeSet::flip_to_old_gc(ShenandoahHeapRegion* r) {
  assert(_partitions.partition_id_matches(idx, ShenandoahFreeSetPartitionId::Mutator), "Should be in mutator view");
  assert(can_allocate_from(r), "Should not be allocated");

-  ShenandoahGenerationalHeap* gen_heap = ShenandoahGenerationalHeap::heap();
  const size_t region_alloc_capacity = alloc_capacity(r);

  if (transfer_one_region_from_mutator_to_old_collector(idx, region_alloc_capacity)) {
@ -2133,7 +2021,6 @@ void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_r
  size_t total_mutator_regions = 0;
  size_t total_old_collector_regions = 0;

-  bool is_generational = _heap->mode()->is_generational();
  size_t num_regions = _heap->num_regions();
  for (size_t idx = 0; idx < num_regions; idx++) {
    ShenandoahHeapRegion* region = _heap->get_region(idx);
@ -2222,7 +2109,6 @@ void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_r
        }
      } else {
        assert(_partitions.membership(idx) == ShenandoahFreeSetPartitionId::NotFree, "Region should have been retired");
-        size_t ac = alloc_capacity(region);
        size_t humongous_waste_bytes = 0;
        if (region->is_humongous_start()) {
          oop obj = cast_to_oop(region->bottom());
@ -3120,7 +3006,6 @@ void ShenandoahFreeSet::log_status() {
      size_t total_used = 0;
      size_t total_free = 0;
      size_t total_free_ext = 0;
-      size_t total_trashed_free = 0;

      for (idx_t idx = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator);
           idx <= _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); idx++) {
--- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
@ -76,6 +76,9 @@ public:
    }
  }

+  // Bitmap reset task is heavy-weight and benefits from much smaller tasks than the default.
+  size_t parallel_region_stride() override { return 8; }
+
  bool is_thread_safe() override { return true; }
 };

@ -524,7 +527,6 @@ size_t ShenandoahGeneration::select_aged_regions(const size_t old_promotion_rese
  assert_no_in_place_promotions();

  auto const heap = ShenandoahGenerationalHeap::heap();
-  ShenandoahYoungGeneration* young_gen = heap->young_generation();
  ShenandoahFreeSet* free_set = heap->free_set();
  bool* const candidate_regions_for_promotion_by_copy = heap->collection_set()->preselected_regions();
  ShenandoahMarkingContext* const ctx = heap->marking_context();
@ -562,7 +564,6 @@ size_t ShenandoahGeneration::select_aged_regions(const size_t old_promotion_rese
  size_t pip_mutator_bytes = 0;
  size_t pip_collector_bytes = 0;

-  size_t min_remnant_size = PLAB::min_size() * HeapWordSize;
  for (idx_t i = 0; i < num_regions; i++) {
    ShenandoahHeapRegion* const r = heap->get_region(i);
    if (r->is_empty() || !r->has_live() || !r->is_young() || !r->is_regular()) {
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
@ -688,19 +688,6 @@ void ShenandoahGenerationalHeap::reset_generation_reserves() {
  old_generation()->set_promoted_reserve(0);
 }

-void ShenandoahGenerationalHeap::TransferResult::print_on(const char* when, outputStream* ss) const {
-  auto heap = ShenandoahGenerationalHeap::heap();
-  ShenandoahYoungGeneration* const young_gen = heap->young_generation();
-  ShenandoahOldGeneration* const old_gen = heap->old_generation();
-  const size_t young_available = young_gen->available();
-  const size_t old_available = old_gen->available();
-  ss->print_cr("After %s, %s %zu regions to %s to prepare for next gc, old available: "
-                     PROPERFMT ", young_available: " PROPERFMT,
-                     when,
-                     success? "successfully transferred": "failed to transfer", region_count, region_destination,
-                     PROPERFMTARGS(old_available), PROPERFMTARGS(young_available));
-}
-
 void ShenandoahGenerationalHeap::coalesce_and_fill_old_regions(bool concurrent) {
  class ShenandoahGlobalCoalesceAndFill : public WorkerTask {
  private:
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp
@ -132,24 +132,12 @@ public:

  bool requires_barriers(stackChunkOop obj) const override;

-  // Used for logging the result of a region transfer outside the heap lock
-  struct TransferResult {
-    bool success;
-    size_t region_count;
-    const char* region_destination;
-
-    void print_on(const char* when, outputStream* ss) const;
-  };
-
  // Zeros out the evacuation and promotion reserves
  void reset_generation_reserves();

  // Computes the optimal size for the old generation, represented as a surplus or deficit of old regions
  void compute_old_generation_balance(size_t old_xfer_limit, size_t old_cset_regions);

-  // Transfers surplus old regions to young, or takes regions from young to satisfy old region deficit
-  TransferResult balance_generations();
-
  // Balances generations, coalesces and fills old regions if necessary
  void complete_degenerated_cycle();
  void complete_concurrent_cycle();
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@ -985,7 +985,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {

    assert (req.is_lab_alloc() || (requested == actual),
            "Only LAB allocations are elastic: %s, requested = %zu, actual = %zu",
-            ShenandoahAllocRequest::alloc_type_to_string(req.type()), requested, actual);
+            req.type_string(), requested, actual);
  }

  return result;
@ -1014,8 +1014,9 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req

  // Record the plab configuration for this result and register the object.
  if (result != nullptr && req.is_old()) {
-    old_generation()->configure_plab_for_current_thread(req);
-    if (!req.is_lab_alloc()) {
+    if (req.is_lab_alloc()) {
+      old_generation()->configure_plab_for_current_thread(req);
+    } else {
      // Register the newly allocated object while we're holding the global lock since there's no synchronization
      // built in to the implementation of register_object().  There are potential races when multiple independent
      // threads are allocating objects, some of which might span the same card region.  For example, consider
@ -1035,6 +1036,13 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
      // last-start representing object b while first-start represents object c.  This is why we need to require all
      // register_object() invocations to be "mutually exclusive" with respect to each card's memory range.
      old_generation()->card_scan()->register_object(result);
+
+      if (req.is_promotion()) {
+        // Shared promotion.
+        const size_t actual_size = req.actual_size() * HeapWordSize;
+        log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
+        old_generation()->expend_promoted(actual_size);
+      }
    }
  }

@ -1962,7 +1970,7 @@ void ShenandoahHeap::parallel_heap_region_iterate(ShenandoahHeapRegionClosure* b
  assert(blk->is_thread_safe(), "Only thread-safe closures here");
  const uint active_workers = workers()->active_workers();
  const size_t n_regions = num_regions();
-  size_t stride = ShenandoahParallelRegionStride;
+  size_t stride = blk->parallel_region_stride();
  if (stride == 0 && active_workers > 1) {
    // Automatically derive the stride to balance the work between threads
    // evenly. Do not try to split work if below the reasonable threshold.
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@ -113,6 +113,7 @@ public:
 class ShenandoahHeapRegionClosure : public StackObj {
 public:
  virtual void heap_region_do(ShenandoahHeapRegion* r) = 0;
+  virtual size_t parallel_region_stride() { return ShenandoahParallelRegionStride; }
  virtual bool is_thread_safe() { return false; }
 };

--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
@ -447,7 +447,7 @@ public:
    return (bottom() <= p) && (p < top());
  }

-  inline void adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t);
+  inline void adjust_alloc_metadata(const ShenandoahAllocRequest &req, size_t);
  void reset_alloc_metadata();
  size_t get_shared_allocs() const;
  size_t get_tlab_allocs() const;
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
@ -71,7 +71,7 @@ HeapWord* ShenandoahHeapRegion::allocate_aligned(size_t size, ShenandoahAllocReq
    }

    make_regular_allocation(req.affiliation());
-    adjust_alloc_metadata(req.type(), size);
+    adjust_alloc_metadata(req, size);

    HeapWord* new_top = aligned_obj + size;
    assert(new_top <= end(), "PLAB cannot span end of heap region");
@ -111,7 +111,7 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque
  HeapWord* obj = top();
  if (pointer_delta(end(), obj) >= size) {
    make_regular_allocation(req.affiliation());
-    adjust_alloc_metadata(req.type(), size);
+    adjust_alloc_metadata(req, size);

    HeapWord* new_top = obj + size;
    set_top(new_top);
@ -125,26 +125,16 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque
  }
 }

-inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t size) {
-  switch (type) {
-    case ShenandoahAllocRequest::_alloc_shared:
-    case ShenandoahAllocRequest::_alloc_shared_gc:
-    case ShenandoahAllocRequest::_alloc_shared_gc_old:
-    case ShenandoahAllocRequest::_alloc_shared_gc_promotion:
-    case ShenandoahAllocRequest::_alloc_cds:
-      // Counted implicitly by tlab/gclab allocs
-      break;
-    case ShenandoahAllocRequest::_alloc_tlab:
+inline void ShenandoahHeapRegion::adjust_alloc_metadata(const ShenandoahAllocRequest &req, size_t size) {
+  // Only need to update alloc metadata for lab alloc, shared alloc is counted implicitly by tlab/gclab allocs
+  if (req.is_lab_alloc()) {
+    if (req.is_mutator_alloc()) {
      _tlab_allocs += size;
-      break;
-    case ShenandoahAllocRequest::_alloc_gclab:
-      _gclab_allocs += size;
-      break;
-    case ShenandoahAllocRequest::_alloc_plab:
+    } else if (req.is_old()) {
      _plab_allocs += size;
-      break;
-    default:
-      ShouldNotReachHere();
+    } else {
+      _gclab_allocs += size;
+    }
  }
 }

@ -157,7 +147,7 @@ inline void ShenandoahHeapRegion::increase_live_data_gc_words(size_t s) {
 }

 inline void ShenandoahHeapRegion::internal_increase_live_data(size_t s) {
-  size_t new_live_data = AtomicAccess::add(&_live_data, s, memory_order_relaxed);
+  AtomicAccess::add(&_live_data, s, memory_order_relaxed);
 }

 inline void ShenandoahHeapRegion::clear_live_data() {
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegionClosures.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegionClosures.hpp
@ -44,6 +44,10 @@ public:
    }
  }

+  size_t parallel_region_stride() override {
+    return _closure->parallel_region_stride();
+  }
+
  bool is_thread_safe() override {
    return _closure->is_thread_safe();
  }
@ -64,6 +68,10 @@ public:
    }
  }

+  size_t parallel_region_stride() override {
+    return _closure->parallel_region_stride();
+  }
+
  bool is_thread_safe() override {
    return _closure->is_thread_safe();
  }
--- a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp
@ -168,7 +168,7 @@ size_t ShenandoahOldGeneration::get_promoted_expended() const {
 }

 bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) const {
-  assert(req.type() != ShenandoahAllocRequest::_alloc_gclab, "GCLAB pertains only to young-gen memory");
+  assert(req.is_old(), "Must be old allocation request");

  const size_t requested_bytes = req.size() * HeapWordSize;
  // The promotion reserve may also be used for evacuations. If we can promote this object,
@ -180,7 +180,7 @@ bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) co
    return true;
  }

-  if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
+  if (req.is_lab_alloc()) {
    // The promotion reserve cannot accommodate this plab request. Check if we still have room for
    // evacuations. Note that we cannot really know how much of the plab will be used for evacuations,
    // so here we only check that some evacuation reserve still exists.
@ -195,37 +195,29 @@ bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) co

 void
 ShenandoahOldGeneration::configure_plab_for_current_thread(const ShenandoahAllocRequest &req) {
-  // Note: Even when a mutator is performing a promotion outside a LAB, we use a 'shared_gc' request.
-  if (req.is_gc_alloc()) {
-    const size_t actual_size = req.actual_size() * HeapWordSize;
-    if (req.type() ==  ShenandoahAllocRequest::_alloc_plab) {
-      // We've created a new plab. Now we configure it whether it will be used for promotions
-      // and evacuations - or just evacuations.
-      Thread* thread = Thread::current();
-      ShenandoahThreadLocalData::reset_plab_promoted(thread);
+  assert(req.is_gc_alloc() && req.is_old() && req.is_lab_alloc(), "Must be a plab alloc request");
+  const size_t actual_size = req.actual_size() * HeapWordSize;
+  // We've created a new plab. Now we configure it whether it will be used for promotions
+  // and evacuations - or just evacuations.
+  Thread* thread = Thread::current();
+  ShenandoahThreadLocalData::reset_plab_promoted(thread);

-      // The actual size of the allocation may be larger than the requested bytes (due to alignment on card boundaries).
-      // If this puts us over our promotion budget, we need to disable future PLAB promotions for this thread.
-      if (can_promote(actual_size)) {
-        // Assume the entirety of this PLAB will be used for promotion.  This prevents promotion from overreach.
-        // When we retire this plab, we'll unexpend what we don't really use.
-        log_debug(gc, plab)("Thread can promote using PLAB of %zu bytes. Expended: %zu, available: %zu",
-                            actual_size, get_promoted_expended(), get_promoted_reserve());
-        expend_promoted(actual_size);
-        ShenandoahThreadLocalData::enable_plab_promotions(thread);
-        ShenandoahThreadLocalData::set_plab_actual_size(thread, actual_size);
-      } else {
-        // Disable promotions in this thread because entirety of this PLAB must be available to hold old-gen evacuations.
-        ShenandoahThreadLocalData::disable_plab_promotions(thread);
-        ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
-        log_debug(gc, plab)("Thread cannot promote using PLAB of %zu bytes. Expended: %zu, available: %zu, mixed evacuations? %s",
-                            actual_size, get_promoted_expended(), get_promoted_reserve(), BOOL_TO_STR(ShenandoahHeap::heap()->collection_set()->has_old_regions()));
-      }
-    } else if (req.is_promotion()) {
-      // Shared promotion.
-      log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
-      expend_promoted(actual_size);
-    }
+  // The actual size of the allocation may be larger than the requested bytes (due to alignment on card boundaries).
+  // If this puts us over our promotion budget, we need to disable future PLAB promotions for this thread.
+  if (can_promote(actual_size)) {
+    // Assume the entirety of this PLAB will be used for promotion.  This prevents promotion from overreach.
+    // When we retire this plab, we'll unexpend what we don't really use.
+    log_debug(gc, plab)("Thread can promote using PLAB of %zu bytes. Expended: %zu, available: %zu",
+                        actual_size, get_promoted_expended(), get_promoted_reserve());
+    expend_promoted(actual_size);
+    ShenandoahThreadLocalData::enable_plab_promotions(thread);
+    ShenandoahThreadLocalData::set_plab_actual_size(thread, actual_size);
+  } else {
+    // Disable promotions in this thread because entirety of this PLAB must be available to hold old-gen evacuations.
+    ShenandoahThreadLocalData::disable_plab_promotions(thread);
+    ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
+    log_debug(gc, plab)("Thread cannot promote using PLAB of %zu bytes. Expended: %zu, available: %zu, mixed evacuations? %s",
+                        actual_size, get_promoted_expended(), get_promoted_reserve(), BOOL_TO_STR(ShenandoahHeap::heap()->collection_set()->has_old_regions()));
  }
 }

--- a/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.hpp
@ -62,7 +62,6 @@ class ShenandoahRegulatorThread: public ConcurrentGCThread {
  bool start_old_cycle() const;
  bool start_young_cycle() const;
  bool start_global_cycle() const;
-  bool resume_old_cycle();

  // The generational mode can only unload classes in a global cycle. The regulator
  // thread itself will trigger a global cycle if metaspace is out of memory.
--- a/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
@ -335,7 +335,6 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
    if (ctx->is_marked(p)) {
      oop obj = cast_to_oop(p);
      assert(oopDesc::is_oop(obj), "Should be an object");
-      assert(Klass::is_valid(obj->klass()), "Not a valid klass ptr");
      assert(p + obj->size() > left, "This object should span start of card");
      assert(p < right, "Result must precede right");
      return p;
@ -362,15 +361,15 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con

  // Recall that we already dealt with the co-initial object case above
  assert(p < left, "obj should start before left");
-  // While it is safe to ask an object its size in the loop that
-  // follows, the (ifdef'd out) loop should never be needed.
+  // While it is safe to ask an object its size in the block that
+  // follows, the (ifdef'd out) block should never be needed.
  // 1. we ask this question only for regions in the old generation, and those
  //    that are not humongous regions
  // 2. there is no direct allocation ever by mutators in old generation
  //    regions walked by this code. Only GC will ever allocate in old regions,
  //    and then too only during promotion/evacuation phases. Thus there is no danger
  //    of races between reading from and writing to the object start array,
-  //    or of asking partially initialized objects their size (in the loop below).
+  //    or of asking partially initialized objects their size (in the ifdef below).
  //    Furthermore, humongous regions (and their dirty cards) are never processed
  //    by this code.
  // 3. only GC asks this question during phases when it is not concurrently
@ -382,15 +381,6 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
 #ifdef ASSERT
  oop obj = cast_to_oop(p);
  assert(oopDesc::is_oop(obj), "Should be an object");
-  while (p + obj->size() < left) {
-    p += obj->size();
-    obj = cast_to_oop(p);
-    assert(oopDesc::is_oop(obj), "Should be an object");
-    assert(Klass::is_valid(obj->klass()), "Not a valid klass ptr");
-    // Check assumptions in previous block comment if this assert fires
-    fatal("Should never need forward walk in block start");
-  }
-  assert(p <= left, "p should start at or before left end of card");
  assert(p + obj->size() > left, "obj should end after left end of card");
 #endif // ASSERT
  return p;
--- a/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.hpp
@ -233,8 +233,6 @@ public:
  inline bool is_write_card_dirty(size_t card_index) const;
  inline void mark_card_as_dirty(size_t card_index);
  inline void mark_range_as_dirty(size_t card_index, size_t num_cards);
-  inline void mark_card_as_clean(size_t card_index);
-  inline void mark_range_as_clean(size_t card_index, size_t num_cards);
  inline bool is_card_dirty(HeapWord* p) const;
  inline bool is_write_card_dirty(HeapWord* p) const;
  inline void mark_card_as_dirty(HeapWord* p);
--- a/Show More
+++ b/Show More