diff --git a/.github/actions/get-msys2/action.yml b/.github/actions/get-msys2/action.yml
index 7351a120ac4..4082aad0c1a 100644
--- a/.github/actions/get-msys2/action.yml
+++ b/.github/actions/get-msys2/action.yml
@@ -25,20 +25,49 @@
name: 'Get MSYS2'
description: 'Download MSYS2 and prepare a Windows host'
+inputs:
+ architecture:
+ description: 'Architecture'
+ required: true
runs:
using: composite
steps:
- - name: 'Install MSYS2'
- id: msys2
+ - name: 'Install MSYS2 on x86.x64'
+ id: msys2-x64
uses: msys2/setup-msys2@v2.31.0
with:
install: 'autoconf tar unzip zip make'
path-type: minimal
release: false
+ if: ${{ inputs.architecture == 'x86.x64' }}
+
+ - name: 'Install MSYS2 on ARM64'
+ id: msys2-arm64
+ uses: msys2/setup-msys2@v2.31.0
+ with:
+ install: 'autoconf tar unzip zip make'
+ path-type: minimal
+ release: true
+ location: ${{ runner.tool_cache }}/msys2
+ if: ${{ inputs.architecture == 'ARM64' }}
# We can't run bash until this is completed, so stick with pwsh
- - name: 'Set MSYS2 path'
+ - name: 'Set MSYS2 path for x64'
run: |
- echo "${{ steps.msys2.outputs.msys2-location }}/usr/bin" >> $env:GITHUB_PATH
+ echo "${{ steps.msys2-x64.outputs.msys2-location }}/usr/bin" >> $env:GITHUB_PATH
shell: pwsh
+ if: ${{ inputs.architecture == 'x86.x64' }}
+
+ - name: 'Set MSYS2 path for ARM64'
+ run: |
+ echo "${{ steps.msys2-arm64.outputs.msys2-location }}/usr/bin" >> $env:GITHUB_PATH
+ shell: pwsh
+ if: ${{ inputs.architecture == 'ARM64' }}
+
+ # Remove the default config.site file provided by MSYS2 to ensure config.guess accurately detects the host system.
+ - name: 'Remove default config.site'
+ run: |
+ echo "Removing default config.site"
+ rm -f /etc/config.site
+ shell: env /usr/bin/bash --login -eo pipefail {0}
diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml
index 99b6c40606c..c80f676864e 100644
--- a/.github/workflows/build-cross-compile.yml
+++ b/.github/workflows/build-cross-compile.yml
@@ -54,18 +54,11 @@ jobs:
fail-fast: false
matrix:
target-cpu:
- - aarch64
- arm
- s390x
- ppc64le
- riscv64
include:
- - target-cpu: aarch64
- gnu-arch: aarch64
- debian-arch: arm64
- debian-repository: https://httpredir.debian.org/debian/
- debian-version: trixie
- tolerate-sysroot-errors: false
- target-cpu: arm
gnu-arch: arm
debian-arch: armhf
diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml
index c501670439e..a77ebece7e2 100644
--- a/.github/workflows/build-linux.yml
+++ b/.github/workflows/build-linux.yml
@@ -31,6 +31,14 @@ on:
platform:
required: true
type: string
+ runs-on:
+ required: false
+ type: string
+ default: 'ubuntu-24.04'
+ bootjdk-platform:
+ required: false
+ type: string
+ default: 'linux-x64'
extra-conf-options:
required: false
type: string
@@ -75,7 +83,7 @@ on:
jobs:
build-linux:
name: build
- runs-on: ubuntu-24.04
+ runs-on: ${{ inputs.runs-on }}
strategy:
fail-fast: false
@@ -90,7 +98,7 @@ jobs:
id: bootjdk
uses: ./.github/actions/get-bootjdk
with:
- platform: linux-x64
+ platform: ${{ inputs.bootjdk-platform }}
- name: 'Get JTReg'
id: jtreg
diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml
index 3bb50a137ec..002cbe7cd56 100644
--- a/.github/workflows/build-windows.yml
+++ b/.github/workflows/build-windows.yml
@@ -31,6 +31,9 @@ on:
platform:
required: true
type: string
+ runs-on:
+ required: true
+ type: string
extra-conf-options:
required: false
type: string
@@ -45,7 +48,7 @@ on:
msvc-toolset-version:
required: true
type: string
- msvc-toolset-architecture:
+ architecture:
required: true
type: string
configure-arguments:
@@ -67,7 +70,7 @@ env:
jobs:
build-windows:
name: build
- runs-on: windows-2025
+ runs-on: ${{ inputs.runs-on }}
defaults:
run:
shell: bash
@@ -87,12 +90,14 @@ jobs:
- name: 'Get MSYS2'
uses: ./.github/actions/get-msys2
+ with:
+ architecture: ${{ inputs.architecture }}
- name: 'Get the BootJDK'
id: bootjdk
uses: ./.github/actions/get-bootjdk
with:
- platform: windows-x64
+ platform: ${{ inputs.platform }}
- name: 'Get JTReg'
id: jtreg
@@ -102,26 +107,49 @@ jobs:
id: gtest
uses: ./.github/actions/get-gtest
- - name: 'Check toolchain installed'
- id: toolchain-check
+ - name: 'Check toolchain installed for x64'
+ id: toolchain-check-x64
run: |
set +e
'/c/Program Files/Microsoft Visual Studio/2022/Enterprise/vc/auxiliary/build/vcvars64.bat' -vcvars_ver=${{ inputs.msvc-toolset-version }}
if [ $? -eq 0 ]; then
- echo "Toolchain is already installed"
+ echo "Toolchain is already installed for x64"
echo "toolchain-installed=true" >> $GITHUB_OUTPUT
else
- echo "Toolchain is not yet installed"
+ echo "Toolchain is not yet installed for x64"
echo "toolchain-installed=false" >> $GITHUB_OUTPUT
fi
+ if: ${{ inputs.architecture == 'x86.x64' }}
- - name: 'Install toolchain and dependencies'
+ - name: 'Check toolchain installed for ARM64'
+ id: toolchain-check-arm64
+ run: |
+ set +e
+ "/c/Program Files/Microsoft Visual Studio/2022/Enterprise/vc/auxiliary/build/vcvarsarm64.bat" -vcvars_ver=${{ inputs.msvc-toolset-version }}
+ if [ $? -eq 0 ]; then
+ echo "Toolchain is already installed for ARM64"
+ echo "toolchain-installed=true" >> $GITHUB_OUTPUT
+ else
+ echo "Toolchain is not yet installed for ARM64"
+ echo "toolchain-installed=false" >> $GITHUB_OUTPUT
+ fi
+ if: ${{ inputs.architecture == 'ARM64' }}
+
+ - name: 'Install toolchain and dependencies for x64'
run: |
# Run Visual Studio Installer
'/c/Program Files (x86)/Microsoft Visual Studio/Installer/vs_installer.exe' \
modify --quiet --installPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' \
- --add Microsoft.VisualStudio.Component.VC.${{ inputs.msvc-toolset-version }}.${{ inputs.msvc-toolset-architecture }}
- if: steps.toolchain-check.outputs.toolchain-installed != 'true'
+ --add Microsoft.VisualStudio.Component.VC.${{ inputs.msvc-toolset-version }}.x86.x64
+ if: ${{ (inputs.architecture == 'x86.x64') && (steps.toolchain-check-x64.outputs.toolchain-installed != 'true') }}
+
+ - name: 'Install toolchain and dependencies for ARM64'
+ run: |
+ # Run Visual Studio Installer
+ '/c/Program Files (x86)/Microsoft Visual Studio/Installer/vs_installer.exe' \
+ modify --quiet --installPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' \
+ --add Microsoft.VisualStudio.Component.VC.${{ inputs.msvc-toolset-version }}.ARM64
+ if: ${{ ( inputs.architecture == 'ARM64') && (steps.toolchain-check-arm64.outputs.toolchain-installed != 'true') }}
- name: 'Configure'
run: >
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 20be196b128..bcb9ea6e0b8 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@ on:
platforms:
description: 'Platform(s) to execute on (comma separated, e.g. "linux-x64, macos, aarch64")'
required: true
- default: 'linux-x64, linux-x64-variants, linux-cross-compile, alpine-linux-x64, macos-x64, macos-aarch64, windows-x64, windows-aarch64, docs'
+ default: 'linux-x64, linux-x64-variants, linux-aarch64, linux-cross-compile, alpine-linux-x64, macos-x64, macos-aarch64, windows-x64, windows-aarch64, docs'
configure-arguments:
description: 'Additional configure arguments'
required: false
@@ -64,6 +64,7 @@ jobs:
outputs:
linux-x64: ${{ steps.include.outputs.linux-x64 }}
linux-x64-variants: ${{ steps.include.outputs.linux-x64-variants }}
+ linux-aarch64: ${{ steps.include.outputs.linux-aarch64 }}
linux-cross-compile: ${{ steps.include.outputs.linux-cross-compile }}
alpine-linux-x64: ${{ steps.include.outputs.alpine-linux-x64 }}
macos-x64: ${{ steps.include.outputs.macos-x64 }}
@@ -176,6 +177,7 @@ jobs:
echo "linux-x64=$(check_platform linux-x64 linux x64)" >> $GITHUB_OUTPUT
echo "linux-x64-variants=$(check_platform linux-x64-variants variants)" >> $GITHUB_OUTPUT
+ echo "linux-aarch64=$(check_platform linux-aarch64 linux aarch64)" >> $GITHUB_OUTPUT
echo "linux-cross-compile=$(check_platform linux-cross-compile cross-compile)" >> $GITHUB_OUTPUT
echo "alpine-linux-x64=$(check_platform alpine-linux-x64 alpine-linux x64)" >> $GITHUB_OUTPUT
echo "macos-x64=$(check_platform macos-x64 macos x64)" >> $GITHUB_OUTPUT
@@ -201,6 +203,20 @@ jobs:
dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
if: needs.prepare.outputs.linux-x64 == 'true'
+ build-linux-aarch64:
+ name: linux-aarch64
+ needs: prepare
+ uses: ./.github/workflows/build-linux.yml
+ with:
+ platform: linux-aarch64
+ runs-on: 'ubuntu-24.04-arm'
+ bootjdk-platform: linux-aarch64
+ gcc-major-version: '14'
+ configure-arguments: ${{ github.event.inputs.configure-arguments }}
+ make-arguments: ${{ github.event.inputs.make-arguments }}
+ dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
+ if: needs.prepare.outputs.linux-aarch64 == 'true'
+
build-linux-x64-hs-nopch:
name: linux-x64-hs-nopch
needs: prepare
@@ -295,7 +311,7 @@ jobs:
dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
# Upload static libs bundles separately to avoid interference with normal linux-x64 bundle.
# This bundle is not used by testing jobs, but downstreams use it to check that
- # dependent projects, e.g. libgraal, builds fine.
+ # dependent projects build fine.
bundle-suffix: "-static-libs"
if: needs.prepare.outputs.linux-x64-variants == 'true'
@@ -353,8 +369,9 @@ jobs:
uses: ./.github/workflows/build-windows.yml
with:
platform: windows-x64
+ runs-on: windows-2022
+ architecture: 'x86.x64'
msvc-toolset-version: '14.44'
- msvc-toolset-architecture: 'x86.x64'
configure-arguments: ${{ github.event.inputs.configure-arguments }}
make-arguments: ${{ github.event.inputs.make-arguments }}
dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
@@ -366,10 +383,9 @@ jobs:
uses: ./.github/workflows/build-windows.yml
with:
platform: windows-aarch64
+ runs-on: windows-11-arm
+ architecture: 'ARM64'
msvc-toolset-version: '14.44'
- msvc-toolset-architecture: 'arm64'
- make-target: 'hotspot'
- extra-conf-options: '--openjdk-target=aarch64-unknown-cygwin'
configure-arguments: ${{ github.event.inputs.configure-arguments }}
make-arguments: ${{ github.event.inputs.make-arguments }}
dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
@@ -423,6 +439,19 @@ jobs:
dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
static-suffix: "-static"
+ test-linux-aarch64:
+ name: linux-aarch64
+ needs:
+ - prepare
+ - build-linux-aarch64
+ uses: ./.github/workflows/test.yml
+ with:
+ platform: linux-aarch64
+ bootjdk-platform: linux-aarch64
+ runs-on: ubuntu-24.04-arm
+ dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
+ debug-suffix: -debug
+
test-macos-aarch64:
name: macos-aarch64
needs:
@@ -446,6 +475,21 @@ jobs:
with:
platform: windows-x64
bootjdk-platform: windows-x64
- runs-on: windows-2025
+ architecture: 'x86.x64'
+ runs-on: windows-2022
+ dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
+ debug-suffix: -debug
+
+ test-windows-aarch64:
+ name: windows-aarch64
+ needs:
+ - prepare
+ - build-windows-aarch64
+ uses: ./.github/workflows/test.yml
+ with:
+ platform: windows-aarch64
+ bootjdk-platform: windows-aarch64
+ architecture: 'ARM64'
+ runs-on: windows-11-arm
dry-run: ${{ needs.prepare.outputs.dry-run == 'true' }}
debug-suffix: -debug
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b240b42fb97..6270e44d746 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -37,6 +37,9 @@ on:
runs-on:
required: true
type: string
+ architecture:
+ required: false
+ type: string
xcode-toolset-version:
required: false
type: string
@@ -132,6 +135,8 @@ jobs:
- name: 'Get MSYS2'
uses: ./.github/actions/get-msys2
+ with:
+ architecture: ${{ inputs.architecture }}
if: runner.os == 'Windows'
- name: 'Get the BootJDK'
diff --git a/doc/building.html b/doc/building.html
index 534888ef667..86ee3390ead 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -545,7 +545,7 @@ earlier versions may also work.
no longer comes bundled with Xcode, so it needs to be installed
separately. This can either be done via the Xcode's Settings/Components
UI, or in the command line calling
-xcodebuild -downloadComponent metalToolchain.
+xcodebuild -downloadComponent MetalToolchain.
The standard macOS environment contains the basic tooling needed to
build, but for external libraries a package manager is recommended. The
JDK uses homebrew in the examples, but
diff --git a/doc/building.md b/doc/building.md
index d653d36eb55..93ab386ee8e 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -355,7 +355,7 @@ earlier versions may also work.
Starting with Xcode 26, introduced in macOS 26, the Metal toolchain no longer
comes bundled with Xcode, so it needs to be installed separately. This can
either be done via the Xcode's Settings/Components UI, or in the command line
-calling `xcodebuild -downloadComponent metalToolchain`.
+calling `xcodebuild -downloadComponent MetalToolchain`.
The standard macOS environment contains the basic tooling needed to build, but
for external libraries a package manager is recommended. The JDK uses
diff --git a/make/Bundles.gmk b/make/Bundles.gmk
index 0b324e7e3f3..925149d4cc7 100644
--- a/make/Bundles.gmk
+++ b/make/Bundles.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -410,27 +410,6 @@ endif
################################################################################
-ifneq ($(filter static-libs-graal-bundles, $(MAKECMDGOALS)), )
- STATIC_LIBS_GRAAL_BUNDLE_FILES := $(call FindFiles, $(STATIC_LIBS_GRAAL_IMAGE_DIR))
-
- ifeq ($(OPENJDK_TARGET_OS)-$(DEBUG_LEVEL), macosx-release)
- STATIC_LIBS_GRAAL_BUNDLE_SUBDIR := $(JDK_MACOSX_CONTENTS_SUBDIR)/Home
- else
- STATIC_LIBS_GRAAL_BUNDLE_SUBDIR := $(JDK_BUNDLE_SUBDIR)
- endif
-
- $(eval $(call SetupBundleFile, BUILD_STATIC_LIBS_GRAAL_BUNDLE, \
- BUNDLE_NAME := $(STATIC_LIBS_GRAAL_BUNDLE_NAME), \
- FILES := $(STATIC_LIBS_GRAAL_BUNDLE_FILES), \
- BASE_DIRS := $(STATIC_LIBS_GRAAL_IMAGE_DIR), \
- SUBDIR := $(STATIC_LIBS_GRAAL_BUNDLE_SUBDIR), \
- ))
-
- STATIC_LIBS_GRAAL_TARGETS += $(BUILD_STATIC_LIBS_GRAAL_BUNDLE)
-endif
-
-#################################################################################
-
ifneq ($(filter static-jdk-bundles, $(MAKECMDGOALS)), )
STATIC_JDK_BUNDLE_FILES := $(call FindFiles, $(STATIC_JDK_IMAGE_DIR))
@@ -453,13 +432,12 @@ docs-jdk-bundles: $(DOCS_JDK_TARGETS)
docs-javase-bundles: $(DOCS_JAVASE_TARGETS)
docs-reference-bundles: $(DOCS_REFERENCE_TARGETS)
static-libs-bundles: $(STATIC_LIBS_TARGETS)
-static-libs-graal-bundles: $(STATIC_LIBS_GRAAL_TARGETS)
static-jdk-bundles: $(STATIC_JDK_TARGETS)
jcov-bundles: $(JCOV_TARGETS)
.PHONY: product-bundles test-bundles \
docs-jdk-bundles docs-javase-bundles docs-reference-bundles \
- static-libs-bundles static-libs-graal-bundles static-jdk-bundles jcov-bundles
+ static-libs-bundles static-jdk-bundles jcov-bundles
################################################################################
diff --git a/make/GraalBuilderImage.gmk b/make/GraalBuilderImage.gmk
deleted file mode 100644
index d707e067a55..00000000000
--- a/make/GraalBuilderImage.gmk
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# Copyright (c) 2020, Red Hat Inc.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-include MakeFileStart.gmk
-
-################################################################################
-# This makefile creates a jdk image overlaid with statically linked core
-# libraries.
-################################################################################
-
-include CopyFiles.gmk
-
-$(eval $(call SetupCopyFiles, COPY_JDK_IMG, \
- SRC := $(JDK_IMAGE_DIR)/, \
- DEST := $(GRAAL_BUILDER_IMAGE_DIR)/, \
- FILES := $(call FindFiles, $(JDK_IMAGE_DIR)/), \
-))
-TARGETS += $(COPY_JDK_IMG)
-
-$(eval $(call SetupCopyFiles, COPY_STATIC_LIBS, \
- SRC := $(STATIC_LIBS_GRAAL_IMAGE_DIR)/lib, \
- DEST := $(GRAAL_BUILDER_IMAGE_DIR)/lib, \
- FILES := $(filter %$(STATIC_LIBRARY_SUFFIX), \
- $(call FindFiles, $(STATIC_LIBS_GRAAL_IMAGE_DIR)/lib)), \
-))
-TARGETS += $(COPY_STATIC_LIBS)
-
-################################################################################
-
-include MakeFileEnd.gmk
diff --git a/make/Images.gmk b/make/Images.gmk
index 89c0a834477..8008cfa6779 100644
--- a/make/Images.gmk
+++ b/make/Images.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -137,16 +137,16 @@ CDS_DUMP_FLAGS = -Xmx128M -Xms128M
# Helper function for creating the CDS archives for the JDK and JRE
#
# Param1 - VM variant (e.g., server, client, zero, ...)
-# Param2 - _nocoops, _coh, _nocoops_coh, or empty
+# Param2 - _nocoops, _nocoh, _nocoops_nocoh, or empty
define CreateCDSArchive
$1_$2_COOPS_OPTION := $(if $(findstring _nocoops, $2),-XX:-UseCompressedOops)
# enable and also explicitly disable coh as needed.
ifeq ($(call isTargetCpuBits, 64), true)
- $1_$2_COH_OPTION := -XX:+UnlockExperimentalVMOptions \
- $(if $(findstring _coh, $2),-XX:+UseCompactObjectHeaders,-XX:-UseCompactObjectHeaders)
+ $1_$2_NOCOH_OPTION := -XX:+UnlockExperimentalVMOptions \
+ $(if $(findstring _nocoh, $2),-XX:-UseCompactObjectHeaders,-XX:+UseCompactObjectHeaders)
endif
- $1_$2_DUMP_EXTRA_ARG := $$($1_$2_COOPS_OPTION) $$($1_$2_COH_OPTION)
- $1_$2_DUMP_TYPE := $(if $(findstring _nocoops, $2),-NOCOOPS,)$(if $(findstring _coh, $2),-COH,)
+ $1_$2_DUMP_EXTRA_ARG := $$($1_$2_COOPS_OPTION) $$($1_$2_NOCOH_OPTION)
+ $1_$2_DUMP_TYPE := $(if $(findstring _nocoops, $2),-NOCOOPS,)$(if $(findstring _nocoh, $2),-NOCOH,)
$1_$2_CDS_DUMP_FLAGS := $(CDS_DUMP_FLAGS) $(if $(filter g1gc, $(JVM_FEATURES_$1)), -XX:+UseG1GC)
@@ -200,12 +200,12 @@ ifeq ($(BUILD_CDS_ARCHIVE), true)
$(foreach v, $(JVM_VARIANTS), \
$(eval $(call CreateCDSArchive,$v,_nocoops)) \
)
- ifeq ($(BUILD_CDS_ARCHIVE_COH), true)
+ ifeq ($(BUILD_CDS_ARCHIVE_NOCOH), true)
$(foreach v, $(JVM_VARIANTS), \
- $(eval $(call CreateCDSArchive,$v,_coh)) \
+ $(eval $(call CreateCDSArchive,$v,_nocoh)) \
)
$(foreach v, $(JVM_VARIANTS), \
- $(eval $(call CreateCDSArchive,$v,_nocoops_coh)) \
+ $(eval $(call CreateCDSArchive,$v,_nocoops_nocoh)) \
)
endif
endif
diff --git a/make/Main.gmk b/make/Main.gmk
index 22302cdea46..172cee697ad 100644
--- a/make/Main.gmk
+++ b/make/Main.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -478,11 +478,6 @@ $(eval $(call SetupTarget, static-libs-image, \
TARGET := static-libs-image, \
))
-$(eval $(call SetupTarget, static-libs-graal-image, \
- MAKEFILE := StaticLibsImage, \
- TARGET := static-libs-graal-image, \
-))
-
$(eval $(call SetupTarget, mac-jdk-bundle, \
MAKEFILE := MacBundles, \
TARGET := jdk-bundle, \
@@ -506,11 +501,6 @@ $(eval $(call SetupTarget, exploded-image-optimize, \
buildtools-modules, \
))
-$(eval $(call SetupTarget, graal-builder-image, \
- MAKEFILE := GraalBuilderImage, \
- DEPS := jdk-image static-libs-graal-image, \
-))
-
ifeq ($(JCOV_ENABLED), true)
$(eval $(call SetupTarget, jcov-image, \
MAKEFILE := Coverage, \
@@ -877,12 +867,6 @@ $(eval $(call SetupTarget, static-libs-bundles, \
DEPS := static-libs-image, \
))
-$(eval $(call SetupTarget, static-libs-graal-bundles, \
- MAKEFILE := Bundles, \
- TARGET := static-libs-graal-bundles, \
- DEPS := static-libs-graal-image, \
-))
-
$(eval $(call SetupTarget, static-jdk-bundles, \
MAKEFILE := Bundles, \
TARGET := static-jdk-bundles, \
@@ -1124,8 +1108,6 @@ else
static-libs-image: hotspot-static-libs static-libs
- static-libs-graal-image: static-libs
-
bootcycle-images: jdk-image
docs-jdk-api-javadoc: $(GENSRC_TARGETS)
diff --git a/make/RunTests.gmk b/make/RunTests.gmk
index d4be5936c41..1eb81499505 100644
--- a/make/RunTests.gmk
+++ b/make/RunTests.gmk
@@ -972,11 +972,6 @@ define SetupRunJtregTestBody
JTREG_AUTO_PROBLEM_LISTS += ProblemList-enable-preview.txt
endif
- ifneq ($$(findstring -XX:+UseCompactObjectHeaders, $$(JTREG_ALL_OPTIONS)), )
- JTREG_AUTO_PROBLEM_LISTS += ProblemList-coh.txt
- endif
-
-
ifneq ($$(JTREG_EXTRA_PROBLEM_LISTS), )
# Accept both absolute paths as well as relative to the current test root.
$1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
diff --git a/make/RunTestsPrebuiltSpec.gmk b/make/RunTestsPrebuiltSpec.gmk
index 5fe559eafad..568f69da5a5 100644
--- a/make/RunTestsPrebuiltSpec.gmk
+++ b/make/RunTestsPrebuiltSpec.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -64,7 +64,7 @@ TEST_JOBS ?= 0
# Use hard-coded values for java flags (one size, fits all!)
JAVA_FLAGS := -Duser.language=en -Duser.country=US
JAVA_FLAGS_BIG := -Xms64M -Xmx2048M
-JAVA_FLAGS_SMALL := -XX:+UseSerialGC -Xms32M -Xmx512M -XX:TieredStopAtLevel=1
+JAVA_FLAGS_SMALL := -Xms32M -Xmx512M -XX:TieredStopAtLevel=1
BUILDJDK_JAVA_FLAGS_SMALL := -Xms32M -Xmx512M -XX:TieredStopAtLevel=1
BUILD_JAVA_FLAGS := $(JAVA_FLAGS_BIG)
diff --git a/make/StaticLibsImage.gmk b/make/StaticLibsImage.gmk
index 1f50c5b7f1b..6e00a39977b 100644
--- a/make/StaticLibsImage.gmk
+++ b/make/StaticLibsImage.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -39,8 +39,6 @@ ALL_MODULES = $(call FindAllModules)
ifneq ($(filter static-libs-image, $(MAKECMDGOALS)), )
IMAGE_DEST_DIR = $(STATIC_LIBS_IMAGE_DIR)/lib
-else ifneq ($(filter static-libs-graal-image, $(MAKECMDGOALS)), )
- IMAGE_DEST_DIR = $(STATIC_LIBS_GRAAL_IMAGE_DIR)/lib
endif
# Copy JDK static libs to the image.
@@ -70,7 +68,6 @@ ifneq ($(filter static-libs-image, $(MAKECMDGOALS)), )
endif
static-libs-image: $(HOTSPOT_VARIANT_STATIC_LIBS_TARGETS) $(STATIC_LIBS_TARGETS)
-static-libs-graal-image: $(STATIC_LIBS_TARGETS)
################################################################################
diff --git a/make/UpdateSleefSource.gmk b/make/UpdateSleefSource.gmk
index d7b8f8e141b..38483f0ec68 100644
--- a/make/UpdateSleefSource.gmk
+++ b/make/UpdateSleefSource.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,7 @@ ifneq ($(OPENJDK_BUILD_OS), linux)
endif
SLEEF_SUPPORT_DIR := $(MAKESUPPORT_OUTPUTDIR)/sleef
-SLEEF_SOURCE_BASE_DIR := $(TOPDIR)/src/jdk.incubator.vector/linux/native/libsleef
+SLEEF_SOURCE_BASE_DIR := $(TOPDIR)/src/jdk.incubator.vector/unix/native/libsleef
SLEEF_SOURCE_DIR := $(SLEEF_SOURCE_BASE_DIR)/upstream
SLEEF_TARGET_DIR := $(SLEEF_SOURCE_BASE_DIR)/generated
SLEEF_NATIVE_BUILD_DIR := $(SLEEF_SUPPORT_DIR)/native
@@ -82,7 +82,12 @@ $(eval $(call SetupExecute, sleef_native_config, \
INFO := Configuring native sleef build, \
OUTPUT_DIR := $(SLEEF_NATIVE_BUILD_DIR), \
WORKING_DIR := $(SLEEF_SOURCE_DIR), \
- COMMAND := $(CMAKE) -S . -B $(SLEEF_NATIVE_BUILD_DIR), \
+ COMMAND := $(CMAKE) -S . -B $(SLEEF_NATIVE_BUILD_DIR) \
+ -DCMAKE_INSTALL_PREFIX=$(SLEEF_NATIVE_BUILD_DIR) \
+ -DSLEEF_BUILD_TESTS=OFF \
+ -DSLEEF_DISABLE_SSL=ON \
+ -DSLEEF_ENABLE_TLFLOAT=OFF \
+ -DSLEEF_ENABLE_TESTER4=OFF, \
))
TARGETS := $(sleef_native_config)
@@ -106,6 +111,11 @@ $(eval $(call SetupExecute, sleef_cross_config, \
-DCMAKE_C_COMPILER=$(CC) \
-DCMAKE_TOOLCHAIN_FILE=$(SLEEF_CMAKE_FILE) \
-DNATIVE_BUILD_DIR=$(SLEEF_NATIVE_BUILD_DIR) \
+ -DCMAKE_INSTALL_PREFIX=$(SLEEF_CROSS_BUILD_DIR) \
+ -DSLEEF_BUILD_TESTS=OFF \
+ -DSLEEF_DISABLE_SSL=ON \
+ -DSLEEF_ENABLE_TLFLOAT=OFF \
+ -DSLEEF_ENABLE_TESTER4=OFF \
-DSLEEF_BUILD_INLINE_HEADERS=TRUE \
$(EXTRA_CROSS_OPTIONS), \
))
@@ -139,7 +149,7 @@ $(eval $(call SetupCopyFiles, copy_generated_sleef_source, \
DEST := $(SLEEF_TARGET_DIR), \
))
-TARGETS := $(copy_generated_sleef_source)
+TARGETS := $(copy_static_sleef_source) $(copy_generated_sleef_source)
################################################################################
diff --git a/make/autoconf/boot-jdk.m4 b/make/autoconf/boot-jdk.m4
index b3dbc292919..4468a9acf27 100644
--- a/make/autoconf/boot-jdk.m4
+++ b/make/autoconf/boot-jdk.m4
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -481,8 +481,6 @@ AC_DEFUN_ONCE([BOOTJDK_SETUP_BOOT_JDK_ARGUMENTS],
AC_MSG_CHECKING([flags for boot jdk java command for small workloads])
- # Use serial gc for small short lived tools if possible
- UTIL_ADD_JVM_ARG_IF_OK([-XX:+UseSerialGC],boot_jdk_jvmargs_small,[$JAVA])
UTIL_ADD_JVM_ARG_IF_OK([-Xms32M],boot_jdk_jvmargs_small,[$JAVA])
UTIL_ADD_JVM_ARG_IF_OK([-Xmx512M],boot_jdk_jvmargs_small,[$JAVA])
UTIL_ADD_JVM_ARG_IF_OK([-XX:TieredStopAtLevel=1],boot_jdk_jvmargs_small,[$JAVA])
@@ -492,8 +490,6 @@ AC_DEFUN_ONCE([BOOTJDK_SETUP_BOOT_JDK_ARGUMENTS],
JAVA_FLAGS_SMALL=$boot_jdk_jvmargs_small
AC_SUBST(JAVA_FLAGS_SMALL)
- # Don't presuppose SerialGC is present in the buildjdk. Also, we cannot test
- # the buildjdk, but on the other hand we know what it will support.
BUILD_JAVA_FLAGS_SMALL="-Xms32M -Xmx512M -XX:TieredStopAtLevel=1"
AC_SUBST(BUILD_JAVA_FLAGS_SMALL)
diff --git a/make/autoconf/configure.ac b/make/autoconf/configure.ac
index 2e608f893d6..6d65ad93c40 100644
--- a/make/autoconf/configure.ac
+++ b/make/autoconf/configure.ac
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -260,7 +260,7 @@ LIB_TESTS_ENABLE_DISABLE_JTREG_TEST_THREAD_FACTORY
JDKOPT_ENABLE_DISABLE_GENERATE_CLASSLIST
JDKOPT_EXCLUDE_TRANSLATIONS
JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE
-JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE_COH
+JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE_NOCOH
JDKOPT_ENABLE_DISABLE_COMPATIBLE_CDS_ALIGNMENT
JDKOPT_SETUP_MACOSX_SIGNING
JDKOPT_SETUP_SIGNING_HOOK
diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4
index 5b1e4c3d7f7..465e06ab39d 100644
--- a/make/autoconf/jdk-options.m4
+++ b/make/autoconf/jdk-options.m4
@@ -786,14 +786,14 @@ AC_DEFUN([JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE],
#
# Enable or disable the default CDS archive generation for Compact Object Headers
#
-AC_DEFUN([JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE_COH],
+AC_DEFUN([JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE_NOCOH],
[
- UTIL_ARG_ENABLE(NAME: cds-archive-coh, DEFAULT: auto, RESULT: BUILD_CDS_ARCHIVE_COH,
- DESC: [enable generation of default CDS archives for compact object headers (requires --enable-cds-archive)],
+ UTIL_ARG_ENABLE(NAME: cds-archive-nocoh, DEFAULT: auto, RESULT: BUILD_CDS_ARCHIVE_NOCOH,
+ DESC: [enable generation of default CDS archives for no compact object headers (requires --enable-cds-archive)],
DEFAULT_DESC: [auto],
- CHECKING_MSG: [if default CDS archives for compact object headers should be generated],
+ CHECKING_MSG: [if default CDS archives for no compact object headers should be generated],
CHECK_AVAILABLE: [
- AC_MSG_CHECKING([if CDS archive with compact object headers is available])
+ AC_MSG_CHECKING([if CDS archive with no compact object headers is available])
if test "x$BUILD_CDS_ARCHIVE" = "xfalse"; then
AC_MSG_RESULT([no (CDS default archive generation is disabled)])
AVAILABLE=false
@@ -810,7 +810,7 @@ AC_DEFUN([JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE_COH],
AVAILABLE=true
fi
])
- AC_SUBST(BUILD_CDS_ARCHIVE_COH)
+ AC_SUBST(BUILD_CDS_ARCHIVE_NOCOH)
])
################################################################################
diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4
index 234d7b74268..0696c4f1199 100644
--- a/make/autoconf/jvm-features.m4
+++ b/make/autoconf/jvm-features.m4
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -45,7 +45,7 @@ m4_define(jvm_features_valid, m4_normalize( \
ifdef([custom_jvm_features_valid], custom_jvm_features_valid) \
\
cds compiler1 compiler2 dtrace epsilongc g1gc jfr jni-check \
- jvmci jvmti link-time-opt management minimal opt-size parallelgc \
+ jvmti link-time-opt management minimal opt-size parallelgc \
serialgc services shenandoahgc vm-structs zero zgc \
))
@@ -62,7 +62,6 @@ m4_define(jvm_feature_desc_epsilongc, [include the epsilon (no-op) garbage colle
m4_define(jvm_feature_desc_g1gc, [include the G1 garbage collector])
m4_define(jvm_feature_desc_jfr, [enable JDK Flight Recorder (JFR)])
m4_define(jvm_feature_desc_jni_check, [enable -Xcheck:jni support])
-m4_define(jvm_feature_desc_jvmci, [enable JVM Compiler Interface (JVMCI)])
m4_define(jvm_feature_desc_jvmti, [enable Java Virtual Machine Tool Interface (JVM TI)])
m4_define(jvm_feature_desc_link_time_opt, [enable link time optimization])
m4_define(jvm_feature_desc_management, [enable java.lang.management API support])
@@ -269,26 +268,6 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_DTRACE],
])
])
-################################################################################
-# Check if the feature 'jvmci' is available on this platform.
-#
-AC_DEFUN_ONCE([JVM_FEATURES_CHECK_JVMCI],
-[
- JVM_FEATURES_CHECK_AVAILABILITY(jvmci, [
- AC_MSG_CHECKING([if platform is supported by JVMCI])
- if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then
- AC_MSG_RESULT([yes])
- elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
- AC_MSG_RESULT([yes])
- elif test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
- AVAILABLE=false
- fi
- ])
-])
-
################################################################################
# Check if the feature 'shenandoahgc' is available on this platform.
#
@@ -376,7 +355,6 @@ AC_DEFUN_ONCE([JVM_FEATURES_PREPARE_PLATFORM],
JVM_FEATURES_CHECK_CDS
JVM_FEATURES_CHECK_DTRACE
- JVM_FEATURES_CHECK_JVMCI
JVM_FEATURES_CHECK_SHENANDOAHGC
JVM_FEATURES_CHECK_ZGC
@@ -401,17 +379,17 @@ AC_DEFUN([JVM_FEATURES_PREPARE_VARIANT],
JVM_FEATURES_VARIANT_UNAVAILABLE="cds minimal zero"
elif test "x$variant" = "xzero"; then
JVM_FEATURES_VARIANT_UNAVAILABLE="compiler1 compiler2 \
- jvmci minimal zgc"
+ minimal zgc"
else
JVM_FEATURES_VARIANT_UNAVAILABLE="minimal zero"
fi
# Check which features should be off by default for this JVM variant.
if test "x$variant" = "xclient"; then
- JVM_FEATURES_VARIANT_FILTER="compiler2 jvmci link-time-opt opt-size"
+ JVM_FEATURES_VARIANT_FILTER="compiler2 link-time-opt opt-size"
elif test "x$variant" = "xminimal"; then
JVM_FEATURES_VARIANT_FILTER="cds compiler2 dtrace epsilongc g1gc \
- jfr jni-check jvmci jvmti management parallelgc services \
+ jfr jni-check jvmti management parallelgc services \
shenandoahgc vm-structs zgc"
if test "x$OPENJDK_TARGET_CPU" = xarm ; then
JVM_FEATURES_VARIANT_FILTER="$JVM_FEATURES_VARIANT_FILTER opt-size"
@@ -421,7 +399,7 @@ AC_DEFUN([JVM_FEATURES_PREPARE_VARIANT],
link-time-opt"
fi
elif test "x$variant" = "xcore"; then
- JVM_FEATURES_VARIANT_FILTER="compiler1 compiler2 jvmci \
+ JVM_FEATURES_VARIANT_FILTER="compiler1 compiler2 \
link-time-opt opt-size"
elif test "x$variant" = "xzero"; then
JVM_FEATURES_VARIANT_FILTER="jfr link-time-opt opt-size"
@@ -517,11 +495,6 @@ AC_DEFUN([JVM_FEATURES_VERIFY],
AC_MSG_ERROR([Specified JVM feature 'jfr' requires feature 'services' for variant '$variant'])
fi
- if JVM_FEATURES_IS_ACTIVE(jvmci) && ! (JVM_FEATURES_IS_ACTIVE(compiler1) || \
- JVM_FEATURES_IS_ACTIVE(compiler2)); then
- AC_MSG_ERROR([Specified JVM feature 'jvmci' requires feature 'compiler2' or 'compiler1' for variant '$variant'])
- fi
-
if JVM_FEATURES_IS_ACTIVE(jvmti) && ! JVM_FEATURES_IS_ACTIVE(services); then
AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services' for variant '$variant'])
fi
@@ -531,9 +504,6 @@ AC_DEFUN([JVM_FEATURES_VERIFY],
if ! JVM_FEATURES_IS_ACTIVE(cds); then
ENABLE_CDS="false"
fi
- if ! JVM_FEATURES_IS_ACTIVE(jvmci); then
- INCLUDE_JVMCI="false"
- fi
if JVM_FEATURES_IS_ACTIVE(compiler2); then
INCLUDE_COMPILER2="true"
fi
@@ -559,7 +529,6 @@ AC_DEFUN_ONCE([JVM_FEATURES_SETUP],
# and disable them in JVM_FEATURES_VERIFY if a variant is found that are
# missing any of them.
ENABLE_CDS="true"
- INCLUDE_JVMCI="true"
INCLUDE_COMPILER2="false"
for variant in $JVM_VARIANTS; do
@@ -599,7 +568,6 @@ AC_DEFUN_ONCE([JVM_FEATURES_SETUP],
AC_SUBST(JVM_FEATURES_zero)
AC_SUBST(JVM_FEATURES_custom)
- AC_SUBST(INCLUDE_JVMCI)
AC_SUBST(INCLUDE_COMPILER2)
])
diff --git a/make/autoconf/spec.gmk.template b/make/autoconf/spec.gmk.template
index 7de5056cb83..ecfd5dd0a92 100644
--- a/make/autoconf/spec.gmk.template
+++ b/make/autoconf/spec.gmk.template
@@ -369,7 +369,7 @@ ENABLE_GENERATE_CLASSLIST := @ENABLE_GENERATE_CLASSLIST@
EXCLUDE_TRANSLATIONS := @EXCLUDE_TRANSLATIONS@
BUILD_CDS_ARCHIVE := @BUILD_CDS_ARCHIVE@
-BUILD_CDS_ARCHIVE_COH := @BUILD_CDS_ARCHIVE_COH@
+BUILD_CDS_ARCHIVE_NOCOH := @BUILD_CDS_ARCHIVE_NOCOH@
ENABLE_COMPATIBLE_CDS_ALIGNMENT := @ENABLE_COMPATIBLE_CDS_ALIGNMENT@
@@ -844,7 +844,6 @@ PNG_CFLAGS := @PNG_CFLAGS@
# Misc
INCLUDE_SA := @INCLUDE_SA@
-INCLUDE_JVMCI := @INCLUDE_JVMCI@
INCLUDE_COMPILER2 := @INCLUDE_COMPILER2@
OS_VERSION_MAJOR := @OS_VERSION_MAJOR@
@@ -892,14 +891,6 @@ DOCS_OUTPUTDIR := $(DOCS_JDK_IMAGE_DIR)
STATIC_LIBS_IMAGE_SUBDIR := static-libs
STATIC_LIBS_IMAGE_DIR := $(IMAGES_OUTPUTDIR)/$(STATIC_LIBS_IMAGE_SUBDIR)
-# Graal static libs image
-STATIC_LIBS_GRAAL_IMAGE_SUBDIR := static-libs-graal
-STATIC_LIBS_GRAAL_IMAGE_DIR := $(IMAGES_OUTPUTDIR)/$(STATIC_LIBS_GRAAL_IMAGE_SUBDIR)
-
-# Graal builder image
-GRAAL_BUILDER_IMAGE_SUBDIR := graal-builder-jdk
-GRAAL_BUILDER_IMAGE_DIR := $(IMAGES_OUTPUTDIR)/$(GRAAL_BUILDER_IMAGE_SUBDIR)
-
# Macosx bundles directory definitions
JDK_MACOSX_BUNDLE_SUBDIR := jdk-bundle
JRE_MACOSX_BUNDLE_SUBDIR := jre-bundle
@@ -944,7 +935,6 @@ DOCS_JDK_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
DOCS_JAVASE_BUNDLE_NAME := javase-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
DOCS_REFERENCE_BUNDLE_NAME := jdk-reference-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
STATIC_LIBS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs$(DEBUG_PART).tar.gz
-STATIC_LIBS_GRAAL_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs-graal$(DEBUG_PART).tar.gz
STATIC_JDK_BUNDLE_NAME := static-jdk-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
JCOV_BUNDLE_NAME := jdk-jcov-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
diff --git a/make/autoconf/toolchain.m4 b/make/autoconf/toolchain.m4
index c882deb10a7..8a08e86d53f 100644
--- a/make/autoconf/toolchain.m4
+++ b/make/autoconf/toolchain.m4
@@ -692,6 +692,8 @@ AC_DEFUN_ONCE([TOOLCHAIN_DETECT_TOOLCHAIN_EXTRA],
AC_MSG_NOTICE([A full XCode is required to build the JDK (not only command line tools)])
AC_MSG_NOTICE([If you have XCode installed, you might need to reset the Xcode active developer directory])
AC_MSG_NOTICE([using 'sudo xcode-select -r'])
+ AC_MSG_NOTICE([Starting with Xcode 26, the Metal toolchain is no longer bundled with Xcode.])
+ AC_MSG_NOTICE([Try installing it with 'xcodebuild -downloadComponent MetalToolchain'])
AC_MSG_ERROR([XCode tool 'metal' neither found in path nor with xcrun])
else
AC_MSG_RESULT([yes, will be using '$METAL'])
diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk
index 2880504676a..1ec7bef6f98 100644
--- a/make/common/Modules.gmk
+++ b/make/common/Modules.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -54,13 +54,6 @@ ifeq ($(INCLUDE_SA), false)
MODULES_FILTER += jdk.hotspot.agent
endif
-# Filter out jvmci specific modules if jvmci is disabled
-ifeq ($(INCLUDE_JVMCI), false)
- MODULES_FILTER += jdk.internal.vm.ci
- MODULES_FILTER += jdk.graal.compiler
- MODULES_FILTER += jdk.graal.compiler.management
-endif
-
# jpackage is only on windows, macosx, and linux
ifeq ($(call isTargetOs, windows macosx linux), false)
MODULES_FILTER += jdk.jpackage
diff --git a/make/conf/build-module-sets.conf b/make/conf/build-module-sets.conf
index 06c879659d1..b7608279553 100644
--- a/make/conf/build-module-sets.conf
+++ b/make/conf/build-module-sets.conf
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -45,5 +45,4 @@ LANGTOOLS_MODULES= \
# These models require buildtools-hotspot to process for gensrc
HOTSPOT_MODULES= \
jdk.hotspot.agent \
- jdk.internal.vm.ci \
#
diff --git a/make/conf/github-actions.conf b/make/conf/github-actions.conf
index 6771e8923dc..9aee8e87e3c 100644
--- a/make/conf/github-actions.conf
+++ b/make/conf/github-actions.conf
@@ -32,6 +32,10 @@ LINUX_X64_BOOT_JDK_EXT=tar.gz
LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk26/c3cc523845074aa0af4f5e1e1ed4151d/35/GPL/openjdk-26_linux-x64_bin.tar.gz
LINUX_X64_BOOT_JDK_SHA256=83c78367f8c81257beef72aca4bbbf8e6dac8ca2b3a4546a85879a09e6e4e128
+LINUX_AARCH64_BOOT_JDK_EXT=tar.gz
+LINUX_AARCH64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk26/c3cc523845074aa0af4f5e1e1ed4151d/35/GPL/openjdk-26_linux-aarch64_bin.tar.gz
+LINUX_AARCH64_BOOT_JDK_SHA256=403ccf451e88d0be9e1dec129fcb9318de9752121e0eb92dfa9a8cf06f249007
+
ALPINE_LINUX_X64_BOOT_JDK_EXT=tar.gz
ALPINE_LINUX_X64_BOOT_JDK_URL=https://github.com/adoptium/temurin26-binaries/releases/download/jdk-26%2B35/OpenJDK26U-jdk_x64_alpine-linux_hotspot_26_35.tar.gz
ALPINE_LINUX_X64_BOOT_JDK_SHA256=c105e581fdccb4e7120d889235d1ad8d5b2bed0af4972bc881e0a8ba687c94a4
@@ -47,3 +51,7 @@ MACOS_X64_BOOT_JDK_SHA256=8642b89d889c14ede2c446fd5bbe3621c8a3082e3df02013fd1658
WINDOWS_X64_BOOT_JDK_EXT=zip
WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk26/c3cc523845074aa0af4f5e1e1ed4151d/35/GPL/openjdk-26_windows-x64_bin.zip
WINDOWS_X64_BOOT_JDK_SHA256=2dd2d92c9374cd49a120fe9d916732840bf6bb9f0e0cc29794917a3c08b99c5f
+
+WINDOWS_AARCH64_BOOT_JDK_EXT=zip
+WINDOWS_AARCH64_BOOT_JDK_URL=https://github.com/adoptium/temurin26-binaries/releases/download/jdk-26%2B35-ea-beta/OpenJDK26U-jdk_aarch64_windows_hotspot_26_35-ea.zip
+WINDOWS_AARCH64_BOOT_JDK_SHA256=4892cd9714e222947de1a7415a696f43adf691560522dc5858c0a8af070912c0
diff --git a/make/conf/module-loader-map.conf b/make/conf/module-loader-map.conf
index 65101af2b8a..35b9345ed8f 100644
--- a/make/conf/module-loader-map.conf
+++ b/make/conf/module-loader-map.conf
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -44,7 +44,6 @@ BOOT_MODULES= \
java.security.sasl \
java.xml \
jdk.incubator.vector \
- jdk.internal.vm.ci \
jdk.jfr \
jdk.management \
jdk.management.jfr \
@@ -60,8 +59,6 @@ BOOT_MODULES= \
# should carefully be considered if it should be upgradeable or not.
UPGRADEABLE_PLATFORM_MODULES= \
java.compiler \
- jdk.graal.compiler \
- jdk.graal.compiler.management \
#
PLATFORM_MODULES= \
@@ -107,7 +104,6 @@ NATIVE_ACCESS_MODULES= \
jdk.crypto.mscapi \
jdk.hotspot.agent \
jdk.internal.le \
- jdk.internal.vm.ci \
jdk.jdi \
jdk.jfr \
jdk.jpackage \
diff --git a/make/data/cldr/common/dtd/ldml.dtd b/make/data/cldr/common/dtd/ldml.dtd
index aebedd33a43..b4247f2d924 100644
--- a/make/data/cldr/common/dtd/ldml.dtd
+++ b/make/data/cldr/common/dtd/ldml.dtd
@@ -1,5 +1,5 @@
-
+
@@ -493,6 +493,16 @@ CLDR data files are interpreted according to the LDML specification (http://unic
+
+
+
+
+
+
+
+
+
+
diff --git a/make/data/cldr/common/main/aa.xml b/make/data/cldr/common/main/aa.xml
index 3ff6fb6dd06..791c3009658 100644
--- a/make/data/cldr/common/main/aa.xml
+++ b/make/data/cldr/common/main/aa.xml
@@ -1,6 +1,6 @@
-
@@ -1027,6 +1027,7 @@ For terms of use, see http://www.unicode.org/copyright.html
+
diff --git a/make/data/cldr/common/supplemental/likelySubtags.xml b/make/data/cldr/common/supplemental/likelySubtags.xml
index 76e215255fd..a73b8a8c95b 100644
--- a/make/data/cldr/common/supplemental/likelySubtags.xml
+++ b/make/data/cldr/common/supplemental/likelySubtags.xml
@@ -1,7 +1,7 @@
-
+
@@ -1343,7 +1343,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
-
+
diff --git a/make/data/cldr/common/supplemental/metaZones.xml b/make/data/cldr/common/supplemental/metaZones.xml
index 710934fef81..610921a8f6d 100644
--- a/make/data/cldr/common/supplemental/metaZones.xml
+++ b/make/data/cldr/common/supplemental/metaZones.xml
@@ -735,7 +735,7 @@ For terms of use, see http://www.unicode.org/copyright.html
-
+
diff --git a/make/data/cldr/common/supplemental/supplementalData.xml b/make/data/cldr/common/supplemental/supplementalData.xml
index 25684d36c6e..cbfe2c5e875 100644
--- a/make/data/cldr/common/supplemental/supplementalData.xml
+++ b/make/data/cldr/common/supplemental/supplementalData.xml
@@ -1,7 +1,7 @@
@@ -57,7 +57,7 @@ For terms of use, see https://www.unicode.org/copyright.html
-
+
@@ -3147,7 +3147,7 @@ XXX Code for transations where no currency is involved
-
+
diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk
index e8db4888d3a..66ba18f2775 100644
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@@ -204,7 +204,6 @@ $(eval $(call SetupJdkLibrary, BUILD_LIBJVM, \
DISABLED_WARNINGS_gcc_jfrChunkWriter.cpp := unused-const-variable, \
DISABLED_WARNINGS_gcc_jfrMemorySizer.cpp := unused-const-variable, \
DISABLED_WARNINGS_gcc_jfrTraceIdKlassQueue.cpp := unused-const-variable, \
- DISABLED_WARNINGS_gcc_jvmciCodeInstaller.cpp := stringop-overflow, \
DISABLED_WARNINGS_gcc_jvmFlag.cpp := unused-const-variable, \
DISABLED_WARNINGS_gcc_jvmtiTagMap.cpp := stringop-overflow, \
DISABLED_WARNINGS_gcc_macroAssembler_ppc_sha.cpp := unused-const-variable, \
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 90ea8a985e3..9477b0925d2 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2013, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -96,12 +96,6 @@ ifneq ($(call check-jvm-feature, jvmti), true)
jvmtiAgentList.cpp jfrJvmtiAgent.cpp
endif
-ifneq ($(call check-jvm-feature, jvmci), true)
- JVM_CFLAGS_FEATURES += -DINCLUDE_JVMCI=0
- JVM_EXCLUDES += jvmci
- JVM_EXCLUDE_FILES += jvmciCodeInstaller_$(HOTSPOT_TARGET_CPU_ARCH).cpp
-endif
-
ifneq ($(call check-jvm-feature, vm-structs), true)
JVM_CFLAGS_FEATURES += -DINCLUDE_VM_STRUCTS=0
JVM_EXCLUDE_FILES += vmStructs.cpp
diff --git a/make/hotspot/lib/JvmOverrideFiles.gmk b/make/hotspot/lib/JvmOverrideFiles.gmk
index 3eedb64a597..80f3582043c 100644
--- a/make/hotspot/lib/JvmOverrideFiles.gmk
+++ b/make/hotspot/lib/JvmOverrideFiles.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2013, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -33,8 +33,6 @@ ifeq ($(INCLUDE), true)
ifeq ($(TOOLCHAIN_TYPE), gcc)
BUILD_LIBJVM_vmStructs.cpp_CXXFLAGS := -fno-var-tracking-assignments
- BUILD_LIBJVM_jvmciCompilerToVM.cpp_CXXFLAGS := -fno-var-tracking-assignments
- BUILD_LIBJVM_jvmciCompilerToVMInit.cpp_CXXFLAGS := -fno-var-tracking-assignments
ifeq ($(DEBUG_LEVEL), release)
# Need extra inlining to collapse shared marking code into the hot marking loop
BUILD_LIBJVM_shenandoahMark.cpp_CXXFLAGS := --param inline-unit-growth=1000
@@ -104,7 +102,6 @@ else ifeq ($(call isTargetOs, macosx), true)
sharedRuntimeTrig.cpp \
sharedRuntimeTrans.cpp \
loopTransform.cpp \
- jvmciCompilerToVM.cpp \
$(OPT_SPEED_SRC) \
#
endif
@@ -140,13 +137,9 @@ else ifeq ($(call isTargetOs, windows), true)
os_windows.cpp \
os_windows_x86.cpp \
osThread_windows.cpp \
- jvmciCompilerToVMInit.cpp \
$(OPT_SPEED_SRC) \
#
- # Workaround for jvmciCompilerToVM.cpp long compilation time
- BUILD_LIBJVM_jvmciCompilerToVMInit.cpp_OPTIMIZATION := NONE
-
endif
################################################################################
diff --git a/make/ide/visualstudio/hotspot/CreateVSProject.gmk b/make/ide/visualstudio/hotspot/CreateVSProject.gmk
index b2afbe2b9bc..8cf26d9cab8 100644
--- a/make/ide/visualstudio/hotspot/CreateVSProject.gmk
+++ b/make/ide/visualstudio/hotspot/CreateVSProject.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -111,7 +111,6 @@ ifeq ($(call isTargetOs, windows), true)
-hidePath .hg \
-hidePath .jcheck \
-hidePath jdk.hotspot.agent \
- -hidePath jdk.internal.vm.ci \
-hidePath jdk.jfr \
-compiler VC10 \
-jdkTargetRoot $(call FixPath, $(JDK_OUTPUTDIR)) \
diff --git a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
index 18ce0c334fb..b7754326c97 100644
--- a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
+++ b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
@@ -815,6 +815,13 @@ public class CLDRConverter {
data = map.get(TIMEZONE_ID_PREFIX + tzLink);
}
+ String meta = handlerMetaZones.get(tzKey);
+ if (meta == null && tzLink != null) {
+ // Check for tzLink
+ meta = handlerMetaZones.get(tzLink);
+ }
+ String metaKey = meta != null ? METAZONE_ID_PREFIX + meta : null;
+
if (data instanceof String[] tznames) {
// Hack for UTC. UTC is an alias to Etc/UTC in CLDR
if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) {
@@ -826,24 +833,14 @@ public class CLDRConverter {
tznames = Arrays.copyOf(tznames, tznames.length);
fillTZDBShortNames(tzKey, tznames);
names.put(tzid, tznames);
+ if (meta != null && map.get(metaKey) instanceof String[] metaNames) {
+ recordMetazone(names, meta, tzKey, metaNames);
+ }
}
} else {
- String meta = handlerMetaZones.get(tzKey);
- if (meta == null && tzLink != null) {
- // Check for tzLink
- meta = handlerMetaZones.get(tzLink);
- }
if (meta != null) {
- String metaKey = METAZONE_ID_PREFIX + meta;
- data = map.get(metaKey);
- if (data instanceof String[] tznames) {
- if (isDefaultZone(meta, tzKey)) {
- // Record the metazone names only from the default
- // (001) zone, with short names filled from TZDB
- tznames = Arrays.copyOf(tznames, tznames.length);
- fillTZDBShortNames(tzKey, tznames);
- names.put(metaKey, tznames);
- }
+ if (map.get(metaKey) instanceof String[] metaNames) {
+ recordMetazone(names, meta, tzKey, metaNames);
names.put(tzid, meta);
if (tzLink != null && availableIds.contains(tzLink)) {
names.put(tzLink, meta);
@@ -1231,7 +1228,7 @@ public class CLDRConverter {
String zone001 = handlerMetaZones.zidMap().get(meta);
return zone001 == null ? "" :
String.format(" \"%s\", \"%s\", \"%s\",",
- id, meta, zone001);
+ escape(id), escape(meta), escape(zone001));
})
.filter(s -> !s.isEmpty())
.sorted();
@@ -1508,11 +1505,18 @@ public class CLDRConverter {
}
}
- private static boolean isDefaultZone(String meta, String tzid) {
+ private static void recordMetazone(Map names, String meta, String tzid, String[] tznames) {
String zone001 = handlerMetaZones.zidMap().get(meta);
var tzLink = getTZDBLink(tzid);
- return canonicalTZMap.getOrDefault(tzid, tzid).equals(zone001) ||
- tzLink != null && canonicalTZMap.getOrDefault(tzLink, tzLink).equals(zone001);
+
+ // Record the metazone names only from the default
+ // (001) zone, with short names filled from TZDB
+ if (canonicalTZMap.getOrDefault(tzid, tzid).equals(zone001) ||
+ tzLink != null && canonicalTZMap.getOrDefault(tzLink, tzLink).equals(zone001)) {
+ tznames = Arrays.copyOf(tznames, tznames.length);
+ fillTZDBShortNames(tzid, tznames);
+ names.put(METAZONE_ID_PREFIX + meta, tznames);
+ }
}
private static String getTZDBLink(String tzid) {
diff --git a/make/jdk/src/classes/build/tools/cldrconverter/MetaZonesParseHandler.java b/make/jdk/src/classes/build/tools/cldrconverter/MetaZonesParseHandler.java
index 45de46d2476..9d699cc8a74 100644
--- a/make/jdk/src/classes/build/tools/cldrconverter/MetaZonesParseHandler.java
+++ b/make/jdk/src/classes/build/tools/cldrconverter/MetaZonesParseHandler.java
@@ -102,9 +102,9 @@ class MetaZonesParseHandler extends AbstractLDMLHandler {
zones.put(attributes.getValue("other"), attributes.getValue("type"));
} else {
mzoneMapEntryList.add(String.format(" \"%s\", \"%s\", \"%s\",",
- attributes.getValue("other"),
- territory,
- attributes.getValue("type")));
+ CLDRConverter.escape(attributes.getValue("other")),
+ CLDRConverter.escape(territory),
+ CLDRConverter.escape(attributes.getValue("type"))));
}
pushIgnoredContainer(qName);
break;
diff --git a/make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java b/make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java
index 0bc5a2bdb0d..84657ae94f0 100644
--- a/make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java
+++ b/make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java
@@ -103,14 +103,14 @@ class ResourceBundleGenerator implements BundleGenerator {
for (String key : map.keySet()) {
if (key.startsWith(CLDRConverter.METAZONE_ID_PREFIX)) {
String meta = key.substring(CLDRConverter.METAZONE_ID_PREFIX.length());
- String[] value;
- value = (String[]) map.get(key);
- fmt.format(" final String[] %s = new String[] {\n", meta);
- for (String s : value) {
- fmt.format(" \"%s\",\n", CLDRConverter.escape(s));
+ if (map.get(key) instanceof String[] value) {
+ fmt.format(" final String[] %s = new String[] {\n", CLDRConverter.escape(meta));
+ for (String s : value) {
+ fmt.format(" \"%s\",\n", CLDRConverter.escape(s));
+ }
+ fmt.format(" };\n");
+ metaKeys.add(key);
}
- fmt.format(" };\n");
- metaKeys.add(key);
}
}
for (String key : metaKeys) {
@@ -143,15 +143,15 @@ class ResourceBundleGenerator implements BundleGenerator {
if (fmt == null) {
fmt = new Formatter();
}
- String metaVal = oldEntry.metaKey();
+ String metaVal = CLDRConverter.escape(oldEntry.metaKey());
if (val instanceof String[] values) {
fmt.format(" final String[] %s = new String[] {\n", metaVal);
for (String s : values) {
fmt.format(" \"%s\",\n", CLDRConverter.escape(s));
}
fmt.format(" };\n");
- } else {
- fmt.format(" final String %s = \"%s\";\n", metaVal, CLDRConverter.escape((String)val));
+ } else if (val instanceof String str) {
+ fmt.format(" final String %s = \"%s\";\n", metaVal, CLDRConverter.escape(str));
}
newMap.put(oldEntry.key, oldEntry.metaKey());
}
@@ -178,21 +178,21 @@ class ResourceBundleGenerator implements BundleGenerator {
out.println(" final Object[][] data = new Object[][] {");
for (String key : map.keySet()) {
Object value = map.get(key);
+ var keyStr = CLDRConverter.escape(key);
if (value == null) {
CLDRConverter.warning("null value for " + key);
- } else if (value instanceof String) {
- String valStr = (String)value;
+ } else if (value instanceof String valStr) {
+ var escapedVal = CLDRConverter.escape(valStr);
if (type == BundleType.TIMEZONE &&
!(key.startsWith(CLDRConverter.EXEMPLAR_CITY_PREFIX) ||
key.startsWith(CLDRConverter.METAZONE_DSTOFFSET_PREFIX)) ||
valStr.startsWith(META_VALUE_PREFIX)) {
- out.printf(" { \"%s\", %s },\n", key, CLDRConverter.escape(valStr));
+ out.printf(" { \"%s\", %s },\n", keyStr, escapedVal);
} else {
- out.printf(" { \"%s\", \"%s\" },\n", key, CLDRConverter.escape(valStr));
+ out.printf(" { \"%s\", \"%s\" },\n", keyStr, escapedVal);
}
- } else if (value instanceof String[]) {
- String[] values = (String[]) value;
- out.println(" { \"" + key + "\",\n new String[] {");
+ } else if (value instanceof String[] values) {
+ out.println(" { \"" + keyStr + "\",\n new String[] {");
for (String s : values) {
out.println(" \"" + CLDRConverter.escape(s) + "\",");
}
@@ -311,7 +311,7 @@ class ResourceBundleGenerator implements BundleGenerator {
out.printf(" parentLocalesMap.put(Locale.ROOT,\n");
} else {
out.printf(" parentLocalesMap.put(Locale.forLanguageTag(\"%s\"),\n",
- parentTag);
+ CLDRConverter.escape(parentTag));
}
generateStringArray(metaInfo.get(key), out);
}
@@ -320,7 +320,7 @@ class ResourceBundleGenerator implements BundleGenerator {
// for languageAliasMap
CLDRConverter.handlerSupplMeta.getLanguageAliasData().forEach((key, value) -> {
- out.printf(" languageAliasMap.put(\"%s\", \"%s\");\n", key, value);
+ out.printf(" languageAliasMap.put(\"%s\", \"%s\");\n", CLDRConverter.escape(key), CLDRConverter.escape(value));
});
out.printf(" }\n\n");
@@ -338,11 +338,11 @@ class ResourceBundleGenerator implements BundleGenerator {
CLDRConverter.handlerTimeZone.getData().entrySet().stream()
.forEach(e -> {
String[] ids = ((String)e.getValue()).split("\\s");
- out.printf(" tzCanonicalIDMap.put(\"%s\", \"%s\");\n", e.getKey(),
- ids[0]);
+ out.printf(" tzCanonicalIDMap.put(\"%s\", \"%s\");\n", CLDRConverter.escape(e.getKey()),
+ CLDRConverter.escape(ids[0]));
for (int i = 1; i < ids.length; i++) {
- out.printf(" tzCanonicalIDMap.put(\"%s\", \"%s\");\n", ids[i],
- ids[0]);
+ out.printf(" tzCanonicalIDMap.put(\"%s\", \"%s\");\n", CLDRConverter.escape(ids[i]),
+ CLDRConverter.escape(ids[0]));
}
});
out.println();
@@ -352,8 +352,9 @@ class ResourceBundleGenerator implements BundleGenerator {
if (key.startsWith(CLDRConverter.LIKELY_SCRIPT_PREFIX)) {
// ensure spaces at the begin/end for delimiting purposes
out.printf(" likelyScriptMap.put(\"%s\", \"%s\");\n",
- key.substring(CLDRConverter.LIKELY_SCRIPT_PREFIX.length()),
- " " + metaInfo.get(key).stream().collect(Collectors.joining(" ")) + " ");
+ CLDRConverter.escape(key.substring(CLDRConverter.LIKELY_SCRIPT_PREFIX.length())),
+ " " + metaInfo.get(key).stream()
+ .map(l -> CLDRConverter.escape(l)).collect(Collectors.joining(" ")) + " ");
}
}
out.printf(" }\n }\n");
@@ -371,7 +372,7 @@ class ResourceBundleGenerator implements BundleGenerator {
return " %s";
}
""",
- toLocaleList(applyLanguageAliases(metaInfo.get("AvailableLocales")), false));
+ CLDRConverter.escape(toLocaleList(applyLanguageAliases(metaInfo.get("AvailableLocales")), false)));
if(CLDRConverter.isBaseModule) {
out.printf("""
@@ -408,7 +409,7 @@ class ResourceBundleGenerator implements BundleGenerator {
int count = 0;
for (int i = 0; i < children.length; i++) {
String child = children[i];
- out.printf("\"%s\", ", child);
+ out.printf("\"%s\", ", CLDRConverter.escape(child));
count += child.length() + 4;
if (i != children.length - 1 && count > 64) {
out.printf("\n ");
diff --git a/make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java b/make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java
index 101ee81b256..35e1cef1e87 100644
--- a/make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java
+++ b/make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -85,7 +85,9 @@ class SupplementalMetadataParseHandler extends AbstractLDMLHandler {
public Stream deprecatedMap() {
return keySet().stream()
- .map(k -> String.format(" \"%s\", \"%s\",", k, get(k)))
+ .map(k -> " \"%s\", \"%s\",".formatted(
+ CLDRConverter.escape(k),
+ CLDRConverter.escape((String)get(k))))
.sorted();
}
Map getLanguageAliasData() {
diff --git a/make/modules/java.naming/Java.gmk b/make/modules/java.naming/Java.gmk
index 1c7a2a1668a..b08a5ea515b 100644
--- a/make/modules/java.naming/Java.gmk
+++ b/make/modules/java.naming/Java.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,5 @@ DISABLED_WARNINGS_java += dangling-doc-comments this-escape
DOCLINT += -Xdoclint:all/protected \
'-Xdoclint/package:java.*,javax.*'
-CLEAN += jndiprovider.properties
################################################################################
diff --git a/make/modules/jdk.internal.vm.ci/Java.gmk b/make/modules/jdk.jcmd/Copy.gmk
similarity index 73%
rename from make/modules/jdk.internal.vm.ci/Java.gmk
rename to make/modules/jdk.jcmd/Copy.gmk
index 75a52a3128d..f3240f9f61d 100644
--- a/make/modules/jdk.internal.vm.ci/Java.gmk
+++ b/make/modules/jdk.jcmd/Copy.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,18 @@
################################################################################
-DISABLED_WARNINGS_java += dangling-doc-comments this-escape
+include CopyCommon.gmk
-# -parameters provides method's parameters information in class file,
-# JVMCI compilers make use of that information for various sanity checks.
-# Don't use Indy strings concatenation to have good JVMCI startup performance.
+# the autocompletion script is Bash-only
+ifneq ($(call isTargetOsType, windows), true)
+ JCMD_CONF_DIR := $(TOPDIR)/src/jdk.jcmd/share/conf
+ $(eval $(call SetupCopyFiles, COPY_JCMD_BASH_COMPLETION, \
+ DEST := $(CONF_DST_DIR)/bash-completion, \
+ SRC := $(JCMD_CONF_DIR)/bash-completion, \
+ FILES := jcmd, \
+ ))
+endif
-JAVAC_FLAGS += -parameters -XDstringConcat=inline
-
-TARGET_RELEASE := $(TARGET_RELEASE_BOOTJDK)
+TARGETS += $(COPY_JCMD_BASH_COMPLETION)
################################################################################
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 4fbbfc9d1dc..f31514e666c 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1508,8 +1508,6 @@ source %{
case Op_CompareAndSwapL:
case Op_CompareAndSwapP:
case Op_CompareAndSwapN:
- case Op_ShenandoahCompareAndSwapP:
- case Op_ShenandoahCompareAndSwapN:
case Op_CompareAndSwapB:
case Op_CompareAndSwapS:
case Op_GetAndSetI:
@@ -1531,10 +1529,6 @@ source %{
case Op_WeakCompareAndSwapL:
case Op_WeakCompareAndSwapP:
case Op_WeakCompareAndSwapN:
- case Op_ShenandoahWeakCompareAndSwapP:
- case Op_ShenandoahWeakCompareAndSwapN:
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN:
return maybe_volatile;
default:
return false;
@@ -2647,19 +2641,45 @@ static bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
}
// (XorV src (Replicate m1))
-// (XorVMask src (MaskAll m1))
static bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
if (n != nullptr && m != nullptr) {
- return (n->Opcode() == Op_XorV || n->Opcode() == Op_XorVMask) &&
+ return n->Opcode() == Op_XorV &&
VectorNode::is_all_ones_vector(m);
}
return false;
}
+// Returns true if (n, m) matches "(XorVMask vm2 (MaskAll m1))" and that XorVMask
+// is used only by an AndVMask. In that case, cloning m (the MaskAll) lets the
+// matcher avoid sharing the MaskAll node and subsume the pattern into rule:
+// "(AndVMask vm1 (XorVMask vm2 (MaskAll m1)))".
+//
+// Limitation: the "andNot" rule still cannot be matched if "m" has other
+// uses outside this pattern.
+static bool is_vector_mask_not_operand_in_andnot_pattern(Node* n, Node* m) {
+ if (n == nullptr || m == nullptr) {
+ return false;
+ }
+
+ if (VectorNode::is_all_ones_vector(m) &&
+ n->Opcode() == Op_XorVMask &&
+ n->outcnt() == 1 &&
+ n->unique_out()->Opcode() == Op_AndVMask) {
+ // If another input of the AndVMask is also a mask-not pattern that would
+ // qualify for the `maskAll` cloning, do not clone the "maskAll" here,
+ // because the match rule can only consume one such pattern.
+ Node* use = n->unique_out();
+ Node* other_input = use->in(1) == n ? use->in(2) : use->in(1);
+ return !VectorNode::is_vectormask_bitwise_not_pattern(other_input);
+ }
+ return false;
+}
+
// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
if (is_vshift_con_pattern(n, m) ||
is_vector_bitwise_not_pattern(n, m) ||
+ is_vector_mask_not_operand_in_andnot_pattern(n, m) ||
is_valid_sve_arith_imm_pattern(n, m) ||
is_encode_and_store_pattern(n, m)) {
mstack.push(m, Visit);
@@ -3389,12 +3409,13 @@ encode %{
assert(rtype == relocInfo::none || rtype == relocInfo::external_word_type, "unexpected reloc type");
// load fake address constants using a normal move
if (! __ is_valid_AArch64_address(con) ||
- con < (address)(uintptr_t)os::vm_page_size()) {
+ con < (address)(uintptr_t)os::vm_page_size() ||
+ rtype == relocInfo::none) {
__ mov(dst_reg, con);
} else {
- // no reloc so just use adrp and add
+ // use shorter adrp/add sequence for external_word relocation
uint64_t offset;
- __ adrp(dst_reg, con, offset);
+ __ adrp(dst_reg, Address(con, rtype), offset);
__ add(dst_reg, dst_reg, offset);
}
}
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index 2ff93c9e288..b9899995531 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -1671,24 +1671,42 @@ instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
// vector not - predicated
-instruct vnotI_masked(vReg dst_src, immI_M1 m1, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnotI_masked(vReg dst, vReg src, immI_M1 m1, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
- format %{ "vnotI_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (XorV (Binary src (Replicate m1)) pg));
+ format %{ "vnotI_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_not($dst$$FloatRegister, get_reg_variant(this),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vnotL_masked(vReg dst_src, immL_M1 m1, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnotL_masked(vReg dst, vReg src, immL_M1 m1, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
- format %{ "vnotL_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (XorV (Binary src (Replicate m1)) pg));
+ format %{ "vnotL_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_not($dst$$FloatRegister, get_reg_variant(this),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -1985,62 +2003,116 @@ instruct vabsD(vReg dst, vReg src) %{
// vector abs - predicated
-instruct vabsB_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsB_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (AbsVB dst_src pg));
- format %{ "vabsB_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (AbsVB src pg));
+ format %{ "vabsB_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_abs($dst_src$$FloatRegister, __ B, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_abs($dst$$FloatRegister, __ B, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vabsS_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsS_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (AbsVS dst_src pg));
- format %{ "vabsS_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (AbsVS src pg));
+ format %{ "vabsS_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_abs($dst_src$$FloatRegister, __ H, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_abs($dst$$FloatRegister, __ H, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vabsI_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsI_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (AbsVI dst_src pg));
- format %{ "vabsI_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (AbsVI src pg));
+ format %{ "vabsI_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_abs($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_abs($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vabsL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsL_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (AbsVL dst_src pg));
- format %{ "vabsL_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (AbsVL src pg));
+ format %{ "vabsL_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_abs($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_abs($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vabsF_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsF_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (AbsVF dst_src pg));
- format %{ "vabsF_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (AbsVF src pg));
+ format %{ "vabsF_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_fabs($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_fabs($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vabsD_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsD_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (AbsVD dst_src pg));
- format %{ "vabsD_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (AbsVD src pg));
+ format %{ "vabsD_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_fabs($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_fabs($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -2158,44 +2230,80 @@ instruct vnegD(vReg dst, vReg src) %{
// vector neg - predicated
-instruct vnegI_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegI_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (NegVI dst_src pg));
- format %{ "vnegI_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (NegVI src pg));
+ format %{ "vnegI_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
- __ sve_neg($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_neg($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vnegL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegL_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (NegVL dst_src pg));
- format %{ "vnegL_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (NegVL src pg));
+ format %{ "vnegL_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_neg($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_neg($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vnegF_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegF_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (NegVF dst_src pg));
- format %{ "vnegF_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (NegVF src pg));
+ format %{ "vnegF_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_fneg($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_fneg($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vnegD_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegD_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (NegVD dst_src pg));
- format %{ "vnegD_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (NegVD src pg));
+ format %{ "vnegD_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_fneg($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_fneg($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -2251,22 +2359,40 @@ instruct vsqrtD(vReg dst, vReg src) %{
// vector sqrt - predicated
-instruct vsqrtF_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vsqrtF_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (SqrtVF dst_src pg));
- format %{ "vsqrtF_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (SqrtVF src pg));
+ format %{ "vsqrtF_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_fsqrt($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_fsqrt($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vsqrtD_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vsqrtD_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (SqrtVD dst_src pg));
- format %{ "vsqrtD_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (SqrtVD src pg));
+ format %{ "vsqrtD_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_fsqrt($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_fsqrt($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -5331,9 +5457,7 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx,
__ sve_index($tmp$$FloatRegister, size, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
- if ($dst$$FloatRegister != $src$$FloatRegister) {
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- }
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
%}
ins_pipe(pipe_slow);
@@ -5356,9 +5480,7 @@ instruct insertI_index_ge32(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg t
__ sve_dup($tmp2$$FloatRegister, size, (int)($idx$$constant));
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
- if ($dst$$FloatRegister != $src$$FloatRegister) {
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- }
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
%}
ins_pipe(pipe_slow);
@@ -5392,9 +5514,7 @@ instruct insertL_gt128b(vReg dst, vReg src, iRegL val, immI idx,
__ sve_index($tmp$$FloatRegister, __ D, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
- if ($dst$$FloatRegister != $src$$FloatRegister) {
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- }
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$Register);
%}
ins_pipe(pipe_slow);
@@ -5432,7 +5552,7 @@ instruct insertF_index_lt32(vReg dst, vReg src, vRegF val, immI idx,
__ sve_index($dst$$FloatRegister, __ S, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
$dst$$FloatRegister, (int)($idx$$constant) - 16);
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -5451,7 +5571,7 @@ instruct insertF_index_ge32(vReg dst, vReg src, vRegF val, immI idx, vReg tmp,
__ sve_dup($dst$$FloatRegister, __ S, (int)($idx$$constant));
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
$tmp$$FloatRegister, $dst$$FloatRegister);
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -5486,7 +5606,7 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
__ sve_index($dst$$FloatRegister, __ D, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
$dst$$FloatRegister, (int)($idx$$constant) - 16);
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -5656,8 +5776,12 @@ instruct extractF(vRegF dst, vReg src, immI idx) %{
__ ins($dst$$FloatRegister, __ S, $src$$FloatRegister, 0, index);
} else {
assert(UseSVE > 0, "must be sve");
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- __ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << 2);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the second source of ext. The movprfx destination register
+ // must not appear in any source operand of the following instruction
+ // except as the destructive operand.
+ __ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << 2);
}
%}
ins_pipe(pipe_slow);
@@ -5677,8 +5801,12 @@ instruct extractD(vRegD dst, vReg src, immI idx) %{
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, index);
} else {
assert(UseSVE > 0, "must be sve");
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- __ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << 3);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the second source of ext. The movprfx destination register
+ // must not appear in any source operand of the following instruction
+ // except as the destructive operand.
+ __ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << 3);
}
%}
ins_pipe(pipe_slow);
@@ -6855,25 +6983,43 @@ instruct vpopcountL(vReg dst, vReg src) %{
// vector popcount - predicated
-instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vpopcountI_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (PopCountVI dst_src pg));
- format %{ "vpopcountI_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (PopCountVI src pg));
+ format %{ "vpopcountI_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
- __ sve_cnt($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
-instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vpopcountL_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (PopCountVL dst_src pg));
- format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (PopCountVL src pg));
+ format %{ "vpopcountL_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_cnt($dst_src$$FloatRegister, __ D,
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_cnt($dst$$FloatRegister, __ D,
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -7240,14 +7386,23 @@ instruct vcountLeadingZeros(vReg dst, vReg src) %{
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
-instruct vcountLeadingZeros_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vcountLeadingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (CountLeadingZerosV dst_src pg));
- format %{ "vcountLeadingZeros_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (CountLeadingZerosV src pg));
+ format %{ "vcountLeadingZeros_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
- __ sve_clz($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_clz($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -7296,19 +7451,26 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
ins_pipe(pipe_slow);
%}
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vcountTrailingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (CountTrailingZerosV dst_src pg));
- format %{ "vcountTrailingZeros_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (CountTrailingZerosV src pg));
+ format %{ "vcountTrailingZeros_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
- __ sve_rbit($dst_src$$FloatRegister, size,
- $pg$$PRegister, $dst_src$$FloatRegister);
- __ sve_clz($dst_src$$FloatRegister, size,
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_rbit($dst$$FloatRegister, size,
+ $pg$$PRegister, $src$$FloatRegister);
+ __ sve_clz($dst$$FloatRegister, size,
+ $pg$$PRegister, $dst$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -7347,14 +7509,23 @@ instruct vreverse(vReg dst, vReg src) %{
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
-instruct vreverse_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vreverse_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (ReverseV dst_src pg));
- format %{ "vreverse_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (ReverseV src pg));
+ format %{ "vreverse_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
- __ sve_rbit($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_rbit($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -7393,19 +7564,28 @@ instruct vreverseBytes(vReg dst, vReg src) %{
ins_pipe(pipe_slow);
%}
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vreverseBytes_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (ReverseBytesV dst_src pg));
- format %{ "vreverseBytes_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (ReverseBytesV src pg));
+ format %{ "vreverseBytes_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (bt == T_BYTE) {
- // do nothing
+ if ($dst$$FloatRegister != $src$$FloatRegister) {
+ __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ }
} else {
- __ sve_revb($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_revb($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
}
%}
ins_pipe(pipe_slow);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index c5df949dfb6..a53efd43d5d 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -899,13 +899,22 @@ dnl
dnl VECTOR_NOT_PREDICATE($1 )
dnl VECTOR_NOT_PREDICATE(type)
define(`VECTOR_NOT_PREDICATE', `
-instruct vnot$1_masked`'(vReg dst_src, imm$1_M1 m1, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnot$1_masked`'(vReg dst, vReg src, imm$1_M1 m1, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
- format %{ "vnot$1_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (XorV (Binary src (Replicate m1)) pg));
+ format %{ "vnot$1_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_not($dst$$FloatRegister, get_reg_variant(this),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}')dnl
@@ -1042,14 +1051,23 @@ dnl
dnl UNARY_OP_PREDICATE($1, $2, $3 )
dnl UNARY_OP_PREDICATE(rule_name, op_name, insn)
define(`UNARY_OP_PREDICATE', `
-instruct $1_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct $1_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src ($2 dst_src pg));
- format %{ "$1_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst ($2 src pg));
+ format %{ "$1_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
- __ $3($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ $3($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}')dnl
@@ -1057,12 +1075,21 @@ dnl
dnl UNARY_OP_PREDICATE_WITH_SIZE($1, $2, $3, $4 )
dnl UNARY_OP_PREDICATE_WITH_SIZE(rule_name, op_name, insn, size)
define(`UNARY_OP_PREDICATE_WITH_SIZE', `
-instruct $1_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct $1_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src ($2 dst_src pg));
- format %{ "$1_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst ($2 src pg));
+ format %{ "$1_masked $dst, $pg, $src" %}
ins_encode %{
- __ $3($dst_src$$FloatRegister, __ $4, $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ $3($dst$$FloatRegister, __ $4, $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}')dnl
@@ -3368,9 +3395,7 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx,
__ sve_index($tmp$$FloatRegister, size, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
- if ($dst$$FloatRegister != $src$$FloatRegister) {
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- }
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
%}
ins_pipe(pipe_slow);
@@ -3393,9 +3418,7 @@ instruct insertI_index_ge32(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg t
__ sve_dup($tmp2$$FloatRegister, size, (int)($idx$$constant));
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
- if ($dst$$FloatRegister != $src$$FloatRegister) {
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- }
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
%}
ins_pipe(pipe_slow);
@@ -3429,9 +3452,7 @@ instruct insertL_gt128b(vReg dst, vReg src, iRegL val, immI idx,
__ sve_index($tmp$$FloatRegister, __ D, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
$tmp$$FloatRegister, (int)($idx$$constant) - 16);
- if ($dst$$FloatRegister != $src$$FloatRegister) {
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- }
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$Register);
%}
ins_pipe(pipe_slow);
@@ -3469,7 +3490,7 @@ instruct insertF_index_lt32(vReg dst, vReg src, vRegF val, immI idx,
__ sve_index($dst$$FloatRegister, __ S, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
$dst$$FloatRegister, (int)($idx$$constant) - 16);
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -3488,7 +3509,7 @@ instruct insertF_index_ge32(vReg dst, vReg src, vRegF val, immI idx, vReg tmp,
__ sve_dup($dst$$FloatRegister, __ S, (int)($idx$$constant));
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
$tmp$$FloatRegister, $dst$$FloatRegister);
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -3523,7 +3544,7 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
__ sve_index($dst$$FloatRegister, __ D, -16, 1);
__ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
$dst$$FloatRegister, (int)($idx$$constant) - 16);
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
__ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -3621,8 +3642,12 @@ instruct extract$1(vReg$1 dst, vReg src, immI idx) %{
__ ins($dst$$FloatRegister, __ $4, $src$$FloatRegister, 0, index);
} else {
assert(UseSVE > 0, "must be sve");
- __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
- __ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << $5);
+ __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the second source of ext. The movprfx destination register
+ // must not appear in any source operand of the following instruction
+ // except as the destructive operand.
+ __ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << $5);
}
%}
ins_pipe(pipe_slow);
@@ -4682,13 +4707,22 @@ instruct vpopcountL(vReg dst, vReg src) %{
// vector popcount - predicated
UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
-instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vpopcountL_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (PopCountVL dst_src pg));
- format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (PopCountVL src pg));
+ format %{ "vpopcountL_masked $dst, $pg, $src" %}
ins_encode %{
- __ sve_cnt($dst_src$$FloatRegister, __ D,
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_cnt($dst$$FloatRegister, __ D,
+ $pg$$PRegister, $src$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -5100,19 +5134,26 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
ins_pipe(pipe_slow);
%}
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vcountTrailingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (CountTrailingZerosV dst_src pg));
- format %{ "vcountTrailingZeros_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (CountTrailingZerosV src pg));
+ format %{ "vcountTrailingZeros_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
- __ sve_rbit($dst_src$$FloatRegister, size,
- $pg$$PRegister, $dst_src$$FloatRegister);
- __ sve_clz($dst_src$$FloatRegister, size,
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_rbit($dst$$FloatRegister, size,
+ $pg$$PRegister, $src$$FloatRegister);
+ __ sve_clz($dst$$FloatRegister, size,
+ $pg$$PRegister, $dst$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -5186,19 +5227,28 @@ instruct vreverseBytes(vReg dst, vReg src) %{
ins_pipe(pipe_slow);
%}
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vreverseBytes_masked(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src (ReverseBytesV dst_src pg));
- format %{ "vreverseBytes_masked $dst_src, $pg, $dst_src" %}
+ match(Set dst (ReverseBytesV src pg));
+ format %{ "vreverseBytes_masked $dst, $pg, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (bt == T_BYTE) {
- // do nothing
+ if ($dst$$FloatRegister != $src$$FloatRegister) {
+ __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+ }
} else {
- __ sve_revb($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
- $pg$$PRegister, $dst_src$$FloatRegister);
+ __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+ // Although dst and src hold the same value after movprfx, we must use src
+ // (not dst) as the source of the following instruction. The movprfx
+ // destination register must not appear in any source operand of the
+ // following instruction except as the destructive operand.
+ __ sve_revb($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src$$FloatRegister);
}
%}
ins_pipe(pipe_slow);
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 4eb4e3d5ac7..87451b5a07a 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -1494,13 +1494,11 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
__ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
__ cset(rscratch1, Assembler::NE);
- __ membar(__ AnyAny);
}
void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
__ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
__ cset(rscratch1, Assembler::NE);
- __ membar(__ AnyAny);
}
@@ -3107,9 +3105,6 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
default:
ShouldNotReachHere();
}
- if(!UseLSE) {
- __ membar(__ AnyAny);
- }
}
#undef __
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 67dc4966d64..cb9e308197e 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -2494,8 +2494,12 @@ void C2_MacroAssembler::sve_extract_integral(Register dst, BasicType bt, FloatRe
smov(dst, src, size, idx);
}
} else {
- sve_orr(vtmp, src, src);
- sve_ext(vtmp, vtmp, idx << size);
+ sve_movprfx(vtmp, src);
+ // Although vtmp and src hold the same value after movprfx, we must use src
+ // (not vtmp) as the second source of ext. The movprfx destination register
+ // must not appear in any source operand of the following instruction except
+ // as the destructive operand.
+ sve_ext(vtmp, src, idx << size);
if (bt == T_INT || bt == T_LONG) {
umov(dst, vtmp, size, 0);
} else {
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
index bdbef53bfdb..c3bbc540ed4 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -464,11 +464,11 @@ frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
intptr_t* unextended_sp = interpreter_frame_sender_sp();
intptr_t* sender_fp = link();
-#if defined(COMPILER1) || COMPILER2_OR_JVMCI
+#if COMPILER1_OR_COMPILER2
if (map->update_map()) {
update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
}
-#endif // defined(COMPILER1) || COMPILER1_OR_COMPILER2
+#endif // COMPILER1_OR_COMPILER2
// For ROP protection, Interpreter will have signed the sender_pc,
// but there is no requirement to authenticate it here.
diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
index 3d5261c31d1..bb93cd9a9d6 100644
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,9 +36,6 @@
#include "utilities/align.hpp"
#include "utilities/debug.hpp"
#include "utilities/formatBuffer.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciRuntime.hpp"
-#endif
static int slow_path_size(nmethod* nm) {
// The slow path code is out of line with C2
@@ -81,34 +78,21 @@ class NativeNMethodBarrier {
public:
NativeNMethodBarrier(nmethod* nm): _nm(nm) {
-#if INCLUDE_JVMCI
- if (nm->is_compiled_by_jvmci()) {
- address pc = nm->code_begin() + nm->jvmci_nmethod_data()->nmethod_entry_patch_offset();
- RelocIterator iter(nm, pc, pc + 4);
- guarantee(iter.next(), "missing relocs");
- guarantee(iter.type() == relocInfo::section_word_type, "unexpected reloc");
-
- _guard_addr = (int*) iter.section_word_reloc()->target();
- _instruction_address = pc;
- } else
-#endif
- {
- _instruction_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
- if (nm->is_compiled_by_c2()) {
- // With c2 compiled code, the guard is out-of-line in a stub
- // We find it using the RelocIterator.
- RelocIterator iter(nm);
- while (iter.next()) {
- if (iter.type() == relocInfo::entry_guard_type) {
- entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
- _guard_addr = reinterpret_cast(reloc->addr());
- return;
- }
+ _instruction_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
+ if (nm->is_compiled_by_c2()) {
+ // With c2 compiled code, the guard is out-of-line in a stub
+ // We find it using the RelocIterator.
+ RelocIterator iter(nm);
+ while (iter.next()) {
+ if (iter.type() == relocInfo::entry_guard_type) {
+ entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
+ _guard_addr = reinterpret_cast(reloc->addr());
+ return;
}
- ShouldNotReachHere();
}
- _guard_addr = reinterpret_cast(instruction_address() + local_guard_offset(nm));
+ ShouldNotReachHere();
}
+ _guard_addr = reinterpret_cast(instruction_address() + local_guard_offset(nm));
}
int get_value() {
@@ -225,10 +209,3 @@ int BarrierSetNMethod::guard_value(nmethod* nm) {
NativeNMethodBarrier barrier(nm);
return barrier.get_value();
}
-
-#if INCLUDE_JVMCI
-bool BarrierSetNMethod::verify_barrier(nmethod* nm, err_msg& msg) {
- NativeNMethodBarrier barrier(nm);
- return barrier.check_barrier(msg);
-}
-#endif
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
deleted file mode 100644
index e31a58243b5..00000000000
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
- * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "c1/c1_LIRAssembler.hpp"
-#include "c1/c1_MacroAssembler.hpp"
-#include "compiler/compilerDefinitions.inline.hpp"
-#include "gc/shared/gc_globals.hpp"
-#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-
-#define __ masm->masm()->
-
-void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
- Register addr = _addr->as_register_lo();
- Register newval = _new_value->as_register();
- Register cmpval = _cmp_value->as_register();
- Register tmp1 = _tmp1->as_register();
- Register tmp2 = _tmp2->as_register();
- Register result = result_opr()->as_register();
-
- if (UseCompressedOops) {
- __ encode_heap_oop(tmp1, cmpval);
- cmpval = tmp1;
- __ encode_heap_oop(tmp2, newval);
- newval = tmp2;
- }
-
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ true, /*release*/ true, /*is_cae*/ false, result);
-
- // The membar here is necessary to prevent reordering between the
- // release store in the CAS above and a subsequent volatile load.
- // See also: LIR_Assembler::casw, LIR_Assembler::casl.
- __ membar(__ AnyAny);
-}
-
-#undef __
-
-#ifdef ASSERT
-#define __ gen->lir(__FILE__, __LINE__)->
-#else
-#define __ gen->lir()->
-#endif
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
- BasicType bt = access.type();
- if (access.is_oop()) {
- LIRGenerator *gen = access.gen();
- if (ShenandoahSATBBarrier) {
- pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
- LIR_OprFact::illegalOpr /* pre_val */);
- }
- if (ShenandoahCASBarrier) {
- cmp_value.load_item();
- new_value.load_item();
-
- LIR_Opr t1 = gen->new_register(T_OBJECT);
- LIR_Opr t2 = gen->new_register(T_OBJECT);
- LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
- LIR_Opr result = gen->new_register(T_INT);
-
- __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result));
-
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), new_value.result());
- }
- return result;
- }
- }
- return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
-}
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
- LIRGenerator* gen = access.gen();
- BasicType type = access.type();
-
- LIR_Opr result = gen->new_register(type);
- value.load_item();
- LIR_Opr value_opr = value.result();
-
- assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
- LIR_Opr tmp = gen->new_register(T_INT);
- __ xchg(access.resolved_addr(), value_opr, result, tmp);
-
- if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
- LIR_Opr tmp = gen->new_register(type);
- __ move(result, tmp);
- result = tmp;
- if (ShenandoahSATBBarrier) {
- pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
- result /* pre_val */);
- }
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), result);
- }
- }
-
- return result;
-}
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index 56835e799f0..56ac2eec0a9 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -42,6 +42,10 @@
#include "c1/c1_MacroAssembler.hpp"
#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
#endif
+#ifdef COMPILER2
+#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
+#include "opto/output.hpp"
+#endif
#define __ masm->
@@ -831,3 +835,433 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
+
+#ifdef COMPILER2
+
+#undef __
+#define __ masm->
+
+
+void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
+ // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
+ if (is_narrow) {
+ if (is_acquire) {
+ assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
+ "is_acquire path requires address to be base-only");
+ __ ldarw(dst, src.base());
+ } else {
+ __ ldrw(dst, src);
+ }
+ } else {
+ if (is_acquire) {
+ assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
+ "is_acquire path requires address to be base-only");
+ __ ldar(dst, src.base());
+ } else {
+ __ ldr(dst, src);
+ }
+ }
+
+ ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
+}
+
+void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
+ Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile) {
+
+ ShenandoahBarrierStubC2::store_pre(masm, node, tmp1, dst, tmp2, tmp3, dst_narrow);
+
+ // Do the actual store
+ if (dst_narrow) {
+ if (!src_narrow) {
+ // Need to encode into rscratch, because we cannot clobber src.
+ if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
+ __ encode_heap_oop(tmp2, src);
+ } else {
+ __ encode_heap_oop_not_null(tmp2, src);
+ }
+ src = tmp2;
+ }
+
+ if (is_volatile) {
+ assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
+ "is_acquire path requires address to be base-only");
+ __ stlrw(src, dst.base());
+ } else {
+ __ strw(src, dst);
+ }
+ } else {
+ if (is_volatile) {
+ assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
+ "is_acquire path requires address to be base-only");
+ __ stlr(src, dst.base());
+ } else {
+ __ str(src, dst);
+ }
+ }
+
+ ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
+}
+
+void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
+ Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire) {
+ Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, addr, tmp2, tmp3, narrow);
+
+ // CAS!
+ __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
+
+ // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
+ if (!exchange) {
+ assert(res != noreg, "need result register");
+ __ cset(res, Assembler::EQ);
+ }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
+}
+
+void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
+ Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
+ bool is_narrow = node->bottom_type()->isa_narrowoop();
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, addr, tmp2, tmp3, is_narrow);
+
+ if (is_narrow) {
+ if (is_acquire) {
+ __ atomic_xchgalw(preval, newval, addr);
+ } else {
+ __ atomic_xchgw(preval, newval, addr);
+ }
+ } else {
+ if (is_acquire) {
+ __ atomic_xchgal(preval, newval, addr);
+ } else {
+ __ atomic_xchg(preval, newval, addr);
+ }
+ }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
+}
+
+#undef __
+#define __ masm.
+
+void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
+ assert(CardTable::dirty_card_val() == 0, "must be");
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+
+ // tmp1 = card table base (holder)
+ Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
+ __ ldr(tmp1, curr_ct_holder_addr);
+
+ // tmp2 = effective address
+ __ lea(tmp2, address);
+
+ // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
+ __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
+
+ if (UseCondCardMark) {
+ Label L_already_dirty;
+ __ ldrb(tmp1, Address(tmp2));
+ __ cbz(tmp1, L_already_dirty);
+ __ strb(zr, Address(tmp2));
+ __ bind(L_already_dirty);
+ } else {
+ __ strb(zr, Address(tmp2));
+ }
+}
+
+void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ PhaseOutput* const output = Compile::current()->output();
+ Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
+
+ // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
+ // We'll use that information to decide whether we need a far jump to the
+ // stub entry point or not. In scratch_emit_size mode we don't bind entry()
+ // because otherwise it will be rebound when we later emit the instructions
+ // for real.
+ if (_needs_far_jump) {
+ __ ldrb(tmp, gc_state_fast);
+ __ cbz(tmp, *continuation());
+ __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
+ } else {
+ __ ldrb(tmp, gc_state_fast);
+ __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());
+ }
+
+ // This is were the slowpath stub will return to or the code above will
+ // jump to if the checks are false
+ __ bind(*continuation());
+}
+
+void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
+ PhaseOutput* const output = Compile::current()->output();
+
+ // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
+ // We'll use that information to decide whether we need a far jump to the
+ // stub entry point or not. In scratch_emit_size mode we don't bind entry()
+ // because otherwise it will be rebound when we later emit the instructions
+ // for real.
+ if (!output->in_scratch_emit_size()) {
+ __ bind(*entry());
+ }
+
+ // If we need to load ourselves, do it here.
+ if (_do_load) {
+ if (_narrow) {
+ __ ldrw(_obj, _addr);
+ } else {
+ __ ldr(_obj, _addr);
+ }
+ }
+
+ // If the object is null, there is no point in applying barriers.
+ maybe_far_jump_if_zero(masm, _obj);
+
+ // We need to make sure that loads done by callers survive across slow-path calls.
+ // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
+ bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
+ if (!_do_load || needs_both_barriers) {
+ preserve(_obj);
+ }
+
+ // Go for barriers. Barriers can return straight to continuation, as long
+ // as another barrier is not needed and we can reach the fastpath.
+ if (needs_both_barriers) {
+ // The Load match rule in the .ad file may have legitimized the load
+ // address using a TEMP register and in that case we need to explicitly
+ // preserve them here, because the RA does not consider TEMP as live-in,
+ // and the KA runtime call may clobber them and cause a crash on the
+ // subsequent LRB stub.
+ if (_addr.base() != noreg) {
+ preserve(_addr.base());
+ }
+ if (_addr.index() != noreg) {
+ preserve(_addr.index());
+ }
+ keepalive(masm, nullptr);
+ lrb(masm);
+ } else if (_needs_keep_alive_barrier) {
+ keepalive(masm, continuation());
+ } else if (_needs_load_ref_barrier) {
+ lrb(masm);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
+ if (_needs_far_jump) {
+ Label L_short_jump;
+ __ cbnz(reg, L_short_jump);
+ __ b(*continuation());
+ __ bind(L_short_jump);
+ } else {
+ __ cbz(reg, *continuation());
+ }
+}
+
+void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
+ Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
+ Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+ Label L_through, L_slowpath;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_load_ref_barrier) {
+ assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
+ __ ldrb(_tmp1, gcstate);
+ __ cbz(_tmp1, L_through);
+ }
+
+ // Fast-path: put object into buffer.
+ // If buffer is already full, go slow.
+ __ ldr(_tmp1, index);
+ __ cbz(_tmp1, L_slowpath);
+ __ sub(_tmp1, _tmp1, wordSize);
+ __ str(_tmp1, index);
+ __ ldr(_tmp2, buffer);
+
+ // Store the object in queue.
+ // If object is narrow, we need to decode it before inserting.
+ if (_narrow) {
+ __ add(_tmp2, _tmp2, _tmp1);
+ __ decode_heap_oop_not_null(_tmp1, _obj);
+ __ str(_tmp1, Address(_tmp2));
+ } else {
+ // Buffer is 64-bit address, must be in base register.
+ __ str(_obj, Address(_tmp2, _tmp1));
+ }
+
+ // Fast-path exits here.
+ if (L_done != nullptr) {
+ __ b(*L_done);
+ } else {
+ __ b(L_through);
+ }
+
+ // Slow-path: call runtime to handle.
+ __ bind(L_slowpath);
+
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Go to runtime and handle the rest there.
+ __ mov(c_rarg0, _obj);
+ __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
+ __ blr(lr);
+ }
+ if (L_done != nullptr) {
+ __ b(*L_done);
+ } else {
+ __ bind(L_through);
+ }
+}
+
+void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
+ Label L_slow;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_keep_alive_barrier) {
+ char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
+ Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
+ __ ldrb(_tmp1, gc_state_fast);
+ maybe_far_jump_if_zero(masm, _tmp1);
+ }
+
+ // If weak references are being processed, weak/phantom loads need to go slow,
+ // regardless of their cset status.
+ if (_needs_load_ref_weak_barrier) {
+ Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
+ __ ldrb(_tmp1, gc_state_fast);
+ __ cbnz(_tmp1, L_slow);
+ }
+
+ // Cset-check. Fall-through to slow if in collection set.
+ bool is_aot = AOTCodeCache::is_on_for_dump();
+ if (!is_aot) {
+ __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
+ if (_narrow) {
+ __ decode_heap_oop_not_null(_tmp2, _obj);
+ __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ } else {
+ __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ }
+ } else {
+ // Generating AOT code, pull the cset bitmap and region shift from AOT table.
+ if (_narrow) {
+ __ decode_heap_oop_not_null(_tmp1, _obj);
+ } else {
+ __ mov(_tmp1, _obj);
+ }
+ __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
+ __ ldrw(_tmp2, Address(_tmp2));
+ __ lsrv(_tmp2, _tmp1, _tmp2);
+ __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
+ __ ldr(_tmp1, Address(_tmp1));
+ __ add(_tmp1, _tmp1, _tmp2);
+ }
+ __ ldrb(_tmp1, Address(_tmp1, 0));
+ maybe_far_jump_if_zero(masm, _tmp1);
+
+ // Slow path
+ __ bind(L_slow);
+
+ // Obj is the result, need to temporarily stop preserving it.
+ bool is_obj_preserved = is_preserved(_obj);
+ if (is_obj_preserved) {
+ dont_preserve(_obj);
+ }
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Shuffle in the arguments. The end result should be:
+ // c_rarg0 <-- obj
+ // c_rarg1 <-- lea(addr)
+ if (c_rarg0 == _obj) {
+ __ lea(c_rarg1, _addr);
+ } else if (c_rarg1 == _obj) {
+ // Set up arguments in reverse, and then flip them
+ __ lea(c_rarg0, _addr);
+ // flip them
+ __ mov(_tmp1, c_rarg0);
+ __ mov(c_rarg0, c_rarg1);
+ __ mov(c_rarg1, _tmp1);
+ } else {
+ assert_different_registers(c_rarg1, _obj);
+ __ lea(c_rarg1, _addr);
+ __ mov(c_rarg0, _obj);
+ }
+
+ // Go to runtime and handle the rest there.
+ __ lea(lr, RuntimeAddress(lrb_runtime_entry_addr()));
+ __ blr(lr);
+
+ // Save the result where needed. Narrow entries return narrowOop (32 bits)
+ // and AAPCS does not guarantee the upper 32 bits of x0 are zero.
+ if (_narrow) {
+ __ movw(_obj, r0);
+ } else if (_obj != r0) {
+ __ mov(_obj, r0);
+ }
+ }
+ if (is_obj_preserved) {
+ preserve(_obj);
+ }
+
+ __ b(*continuation());
+}
+
+int ShenandoahBarrierStubC2::available_gp_registers() {
+ Unimplemented(); // Not used
+ return 0;
+}
+
+bool ShenandoahBarrierStubC2::is_special_register(Register r) {
+ Unimplemented(); // Not used
+ return true;
+}
+
+static ShenandoahBarrierSetC2State* barrier_set_state() {
+ return reinterpret_cast(Compile::current()->barrier_set_state());
+}
+
+static int get_stub_size(ShenandoahBarrierStubC2* stub) {
+ PhaseOutput* const output = Compile::current()->output();
+ assert(output->in_scratch_emit_size(), "only used when in scratch_emit_size.");
+ BufferBlob* const blob = output->scratch_buffer_blob();
+ CodeBuffer cb(blob->content_begin(), (address)output->scratch_locs_memory() - blob->content_begin());
+ MacroAssembler masm(&cb);
+ stub->emit_code(masm);
+ return cb.insts_size();
+}
+
+void ShenandoahBarrierStubC2::post_init() {
+ // If we are in scratch emit mode we assume worst case, and force the use of
+ // far branches.
+ PhaseOutput* const output = Compile::current()->output();
+ ShenandoahBarrierSetC2State* state = barrier_set_state();
+ if (output->in_scratch_emit_size()) {
+ state->inc_stubs_current_total_size(get_stub_size(this));
+ _needs_far_jump = true;
+ return;
+ }
+
+ // The logic implemented in this stub only uses short jumps (cbz, cbnz) if
+ // the aggregation of all relevant code sections of a method is less than 1MB
+ // - 2KB. We could be more aggressive and try and compute the distance
+ // between the fastpath branch and the stub entry but in practice not many
+ // methods reach the 1MB size.
+ const BufferSizingData* sizing = output->buffer_sizing_data();
+ const int code_size = sizing->_code + state->stubs_current_total_size();
+
+ // Maximum backward range is 1M. Maximum forward reach is 1M - 4bytes.
+ // Subtract 2K to be ultra conservative.
+ const int cond_branch_max_reach = (int)(1*M - 2*K);
+ _needs_far_jump = code_size >= cond_branch_max_reach;
+}
+
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
index 3f96177d009..e4c7007eb17 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
@@ -36,6 +36,9 @@ class ShenandoahPreBarrierStub;
class ShenandoahLoadReferenceBarrierStub;
class StubAssembler;
#endif
+#ifdef COMPILER2
+class MachNode;
+#endif // COMPILER2
class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
@@ -61,13 +64,6 @@ private:
public:
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_instruction_and_data_patch; }
-#ifdef COMPILER1
- void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
- void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
- void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-#endif
-
virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register src, Register dst, Register count, RegSet saved_regs);
virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
@@ -82,6 +78,22 @@ public:
Register tmp, Label& slow_path);
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
bool acquire, bool release, bool is_cae, Register result);
+
+#ifdef COMPILER1
+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
+#endif
+
+#ifdef COMPILER2
+ // Entry points from Matcher
+ void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address addr, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire);
+ void store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow, Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile);
+ void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval,
+ Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire);
+ void get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval, Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool acquire);
+#endif
};
#endif // CPU_AARCH64_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad
index d5dcf7f9534..fa18bc46bda 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad
@@ -22,238 +22,646 @@
//
//
-source_hpp %{
+source %{
#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp"
%}
-encode %{
- enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
- %}
-
- enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
- %}
-%}
-
-instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-
- match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
- ins_cost(2 * VOLATILE_REF_COST);
-
- effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
- %}
-
- ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res));
-
- ins_pipe(pipe_slow);
-%}
-
-instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-
- match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
- ins_cost(2 * VOLATILE_REF_COST);
-
- effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
- %}
+// ---------------------------------- LOADS ---------------------------------------
+//
+instruct load_P_Normal_shenandoah(iRegPNoSp dst, memory8 mem, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set dst (LoadP mem));
+ predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(opnd_array(1)->opcode() == INDOFFL8);
+ format %{ "ldr $dst, $mem\t# ptr" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
+ Address addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ if (addr.getMode() == Address::base_plus_offset) {
+ addr = __ legitimize_address(addr, /* size_in_memory */ 8, $tmp$$Register);
+ }
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ addr,
+ rscratch1,
+ rscratch2,
+ /* narrow = */ false,
+ /* acquire = */ false);
%}
-
- ins_pipe(pipe_slow);
+ ins_cost(3*INSN_COST);
+ ins_pipe(pipe_class_memory);
%}
-instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-
- predicate(needs_acquiring_load_exclusive(n));
- match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
- ins_cost(VOLATILE_REF_COST);
-
- effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
- %}
-
- ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res));
-
- ins_pipe(pipe_slow);
-%}
-
-instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-
- predicate(needs_acquiring_load_exclusive(n));
- match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
- ins_cost(VOLATILE_REF_COST);
-
- effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
- %}
-
+instruct load_P_Volatile_shenandoah(iRegPNoSp dst, indirect mem, rFlagsReg cr)
+%{
+ match(Set dst (LoadP mem));
+ predicate(UseShenandoahGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "ldar $dst, $mem\t# ptr" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ Address($mem$$Register),
+ rscratch1,
+ rscratch2,
+ /* narrow = */ false,
+ /* acquire = */ true);
%}
-
- ins_pipe(pipe_slow);
+ ins_cost(3*INSN_COST);
+ ins_pipe(pipe_class_memory);
%}
-instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
- ins_cost(2 * VOLATILE_REF_COST);
+instruct load_N_Normal_shenandoah(iRegNNoSp dst, memory4 mem, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set dst (LoadN mem));
+ predicate(UseShenandoahGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(opnd_array(1)->opcode() == INDOFFL4);
+ format %{ "ldrw $dst, $mem\t# ptr" %}
+ ins_encode %{
+ Address addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ if (addr.getMode() == Address::base_plus_offset) {
+ addr = __ legitimize_address(addr, /* size_in_memory */ 4, $tmp$$Register);
+ }
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ addr,
+ rscratch1,
+ rscratch2,
+ /* narrow = */ true,
+ /* acquire = */ false);
+ %}
+ ins_cost(3*INSN_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct load_N_Volatile_shenandoah(iRegNNoSp dst, indirect mem, rFlagsReg cr)
+%{
+ match(Set dst (LoadN mem));
+ predicate(UseShenandoahGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "ldarw $dst, $mem\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ Address($mem$$Register),
+ rscratch1,
+ rscratch2,
+ /* narrow = */ true,
+ /* acquire = */ true);
+ %}
+ ins_cost(VOLATILE_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+
+// ---------------------------------- STORES ---------------------------------------
+//
+
+instruct store_P_Normal_shenandoah(memory8 mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ match(Set mem (StoreP mem src));
+ predicate(UseShenandoahGC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ format %{ "str $src, $mem" %}
+ ins_encode %{
+ Address addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ if (addr.getMode() == Address::base_plus_offset) {
+ addr = __ legitimize_address(addr, /* size_in_memory */ 8, $tmp2$$Register);
+ }
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ addr, /* dst_narrow = */ false,
+ $src$$Register, /* src_narrow = */ false,
+ $tmp1$$Register, rscratch1, rscratch2,
+ /* is_volatile = */ false);
+ %}
+ ins_cost(3*INSN_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct store_P_Volatile_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreP mem src));
+ predicate(UseShenandoahGC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "stlr $src, $mem" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ Address($mem$$Register), /* dst_narrow = */ false,
+ $src$$Register, /* src_narrow = */ false,
+ $tmp$$Register, rscratch1, rscratch2,
+ /* is_volatile = */ true);
+ %}
+ ins_cost(VOLATILE_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct store_N_Normal_shenandoah(memory4 mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem src));
+ predicate(UseShenandoahGC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ format %{ "strw $src, $mem" %}
+ ins_encode %{
+ Address addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ if (addr.getMode() == Address::base_plus_offset) {
+ addr = __ legitimize_address(addr, /* size_in_memory */ 4, $tmp2$$Register);
+ }
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ addr, /* dst_narrow = */ true,
+ $src$$Register, /* src_narrow = */ true,
+ $tmp1$$Register, rscratch1, rscratch2,
+ /* is_volatile = */ false);
+ %}
+ ins_cost(3*INSN_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct store_N_Volatile_shenandoah(indirect mem, iRegN src, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem src));
+ predicate(UseShenandoahGC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "stlrw $src, $mem" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ Address($mem$$Register), /* dst_narrow = */ true,
+ $src$$Register, /* src_narrow = */ true,
+ $tmp$$Register, rscratch1, rscratch2,
+ /* is_volatile = */ true);
+ %}
+ ins_cost(VOLATILE_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct encodePAndStoreN_Normal_shenandoah(memory4 mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem (EncodeP src)));
+ predicate(UseShenandoahGC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ format %{ "encode_heap_oop tmp, $src\n\t"
+ "str tmp, $mem\t# compressed ptr" %}
+ ins_encode %{
+ Address addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ if (addr.getMode() == Address::base_plus_offset) {
+ addr = __ legitimize_address(addr, /* size_in_memory */ 4, $tmp2$$Register);
+ }
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ addr, /* dst_narrow = */ true,
+ $src$$Register, /* src_narrow = */ false,
+ $tmp1$$Register, rscratch1, rscratch2,
+ /* is_volatile = */ false);
+ %}
+ ins_cost(4*INSN_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct encodePAndStoreN_Volatile_shenandoah(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem (EncodeP src)));
+ predicate(UseShenandoahGC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "encode_heap_oop tmp, $src\n\t"
+ "stlrw tmp, $mem\t# compressed ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ Address($mem$$Register), /* dst_narrow = */ true,
+ $src$$Register, /* src_narrow = */ false,
+ $tmp$$Register, rscratch1, rscratch2,
+ /* is_volatile = */ true);
+ %}
+ ins_cost(4*INSN_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+
+// ---------------------- LOAD-STORES -----------------------------------
+//
+
+instruct compareAndSwap_P_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
effect(TEMP_DEF res, TEMP tmp, KILL cr);
format %{
- "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
+ "cmpxchg_P_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register);
- %}
- ins_pipe(pipe_slow);
-%}
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
- ins_cost(2 * VOLATILE_REF_COST);
- effect(TEMP_DEF res, TEMP tmp, KILL cr);
- format %{
- "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ false,
+ /* weak = */ false,
+ /* acquire = */ false);
%}
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register);
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
- effect(TEMP_DEF res, TEMP tmp, KILL cr);
- format %{
- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
- %}
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register);
- %}
ins_pipe(pipe_slow);
%}
-instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
+instruct compareAndSwap_P_A_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ false,
+ /* weak = */ false,
+ /* acquire = */ true);
+ %}
ins_cost(VOLATILE_REF_COST);
- effect(TEMP_DEF res, TEMP tmp, KILL cr);
- format %{
- "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
- %}
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register);
- %}
ins_pipe(pipe_slow);
%}
-instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
- ins_cost(2 * VOLATILE_REF_COST);
- effect(TEMP tmp, KILL cr);
+instruct compareAndSwap_N_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
format %{
- "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
+ "cmpxchg_N_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ true,
+ /* weak = */ false,
+ /* acquire = */ false);
+ %}
+ ins_cost(VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwap_N_A_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ true,
+ /* weak = */ false,
+ /* acquire = */ true);
+ %}
+ ins_cost(VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchange_N_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ true,
+ /* narrow = */ true,
+ /* weak = */ false,
+ /* acquire = */ false);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchange_N_A_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ true,
+ /* narrow = */ true,
+ /* weak = */ false,
+ /* acquire = */ true);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchange_P_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ true,
+ /* narrow = */ false,
+ /* weak = */ false,
+ /* acquire = */ false);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchange_P_A_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ true,
+ /* narrow = */ false,
+ /* weak = */ false,
+ /* acquire = */ true);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwap_N_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (N, weak) if $mem == $oldval then $mem <-- $newval\n\t"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
- %}
- ins_pipe(pipe_slow);
-%}
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
- ins_cost(2 * VOLATILE_REF_COST);
- effect(TEMP tmp, KILL cr);
- format %{
- "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ true,
+ /* weak = */ true,
+ /* acquire = */ false);
%}
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
- effect(TEMP tmp, KILL cr);
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwap_N_A_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
format %{
- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
+ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (N, weak) if $mem == $oldval then $mem <-- $newval\n\t"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ true,
+ /* weak = */ true,
+ /* acquire = */ true);
%}
+ ins_cost(VOLATILE_REF_COST);
ins_pipe(pipe_slow);
%}
-instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
- ins_cost(VOLATILE_REF_COST);
- effect(TEMP tmp, KILL cr);
+instruct weakCompareAndSwap_P_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
format %{
- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
+ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (P, weak) if $mem == $oldval then $mem <-- $newval\n\t"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ false,
+ /* weak = */ true,
+ /* acquire = */ false);
%}
+ ins_cost(VOLATILE_REF_COST);
ins_pipe(pipe_slow);
%}
+
+instruct weakCompareAndSwap_P_A_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{
+ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (P, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+ "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+ ins_encode %{
+ assert($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$base$$Register,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* exchange = */ false,
+ /* narrow = */ false,
+ /* weak = */ true,
+ /* acquire */ true);
+ %}
+ ins_cost(VOLATILE_REF_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct getAndSet_P_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetP mem newval));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "atomic_xchg $preval, $newval, [$mem]" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $preval$$Register,
+ $newval$$Register,
+ $mem$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* acquire = */ false);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_serial);
+%}
+
+instruct getAndSet_P_A_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetP mem newval));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $preval$$Register,
+ $newval$$Register,
+ $mem$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* acquire = */ true);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_serial);
+%}
+
+instruct getAndSet_N_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetN mem newval));
+ predicate(UseShenandoahGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "atomic_xchgw $preval, $newval, [$mem]" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $preval$$Register,
+ $newval$$Register,
+ $mem$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* acquire = */ false);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_serial);
+%}
+
+instruct getAndSet_N_A_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetN mem newval));
+ predicate(UseShenandoahGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "atomic_xchgw_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $preval$$Register,
+ $newval$$Register,
+ $mem$$Register,
+ $tmp$$Register,
+ rscratch1,
+ rscratch2,
+ /* acquire = */ true);
+ %}
+ ins_cost(2*VOLATILE_REF_COST);
+ ins_pipe(pipe_serial);
+%}
diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
index dfeba73bede..59c7e44b0e5 100644
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im
define_pd_global(bool, TrapBasedNullChecks, false);
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls past to check cast
-define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
+define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_PRESENT(true) NOT_COMPILER2(false));
define_pd_global(size_t, CodeCacheSegmentSize, 64);
define_pd_global(uint, CodeEntryAlignment, 64);
diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
index 980fedb406d..22c2383816c 100644
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
@@ -1174,7 +1174,7 @@ void InterpreterMacroAssembler::notify_method_exit(
// Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
// track stack depth. If it is possible to enter interp_only_mode we add
// the code to check if the event should be sent.
- if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+ if (mode == NotifyJVMTI && (JvmtiExport::can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
Label L;
// Note: frame::interpreter_frame_result has a dependency on how the
// method result is saved across the call to post_method_exit. If this
@@ -1183,8 +1183,15 @@ void InterpreterMacroAssembler::notify_method_exit(
// template interpreter will leave the result on the top of the stack.
push(state);
- ldrw(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
- cbz(r3, L);
+
+ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+ cbz(rscratch1, L); // if (thread->jvmti_thread_state() == nullptr) exit;
+
+ ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::frame_pop_cnt_offset()));
+ ldrw(rscratch2, Address(rthread, JavaThread::interp_only_mode_offset()));
+ orrw(rscratch1, rscratch1, rscratch2);
+ cbzw(rscratch1, L);
+
call_VM(noreg,
CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
bind(L);
diff --git a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
deleted file mode 100644
index 071dd2c4179..00000000000
--- a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "asm/macroAssembler.hpp"
-#include "jvmci/jvmci.hpp"
-#include "jvmci/jvmciCodeInstaller.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#include "jvmci/jvmciCompilerToVM.hpp"
-#include "jvmci/jvmciJavaClasses.hpp"
-#include "oops/compressedKlass.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/jniHandles.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "vmreg_aarch64.inline.hpp"
-#if INCLUDE_ZGC
-#include "gc/z/zBarrierSetAssembler.hpp"
-#endif
-
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMCI_TRAPS) {
- if (inst->is_call() || inst->is_jump() || inst->is_blr()) {
- return pc_offset + NativeCall::instruction_size;
- } else if (inst->is_general_jump()) {
- return pc_offset + NativeGeneralJump::instruction_size;
- } else if (NativeInstruction::is_adrp_at((address)inst)) {
- // adrp; add; blr
- return pc_offset + 3 * NativeInstruction::instruction_size;
- } else {
- JVMCI_ERROR_0("unsupported type of instruction for call site");
- }
-}
-
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& obj, bool compressed, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
-#ifdef ASSERT
- {
- NativeInstruction *insn = nativeInstruction_at(pc);
- if (compressed) {
- // Mov narrow constant: movz n << 16, movk
- assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
- nativeInstruction_at(pc+4)->is_movk(), "wrong insn in patch");
- } else {
- // Move wide constant: movz n, movk, movk.
- assert(nativeInstruction_at(pc+4)->is_movk()
- && nativeInstruction_at(pc+8)->is_movk(), "wrong insn in patch");
- }
- }
-#endif // ASSERT
- jobject value = JNIHandles::make_local(obj());
- MacroAssembler::patch_oop(pc, cast_from_oop(obj()));
- int oop_index = _oop_recorder->find_index(value);
- RelocationHolder rspec = oop_Relocation::spec(oop_index);
- _instructions->relocate(pc, rspec);
-}
-
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, HotSpotCompiledCodeStream* stream, u1 tag, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- if (tag == PATCH_NARROW_KLASS) {
- narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, stream, tag, JVMCI_CHECK);
- MacroAssembler::patch_narrow_klass(pc, narrowOop);
- JVMCI_event_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
- } else {
- NativeMovConstReg* move = nativeMovConstReg_at(pc);
- void* reference = record_metadata_reference(_instructions, pc, stream, tag, JVMCI_CHECK);
- move->set_data((intptr_t) reference);
- JVMCI_event_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
- }
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- NativeInstruction* inst = nativeInstruction_at(pc);
- if (inst->is_adr_aligned() || inst->is_ldr_literal()
- || (NativeInstruction::maybe_cpool_ref(pc))) {
- address dest = _constants->start() + data_offset;
- _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
- JVMCI_event_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
- } else {
- JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc));
- }
-}
-
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, JVMCI_TRAPS) {
- address pc = (address) inst;
- if (inst->is_call()) {
- NativeCall* call = nativeCall_at(pc);
- call->set_destination((address) foreign_call_destination);
- _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
- } else if (inst->is_jump()) {
- NativeJump* jump = nativeJump_at(pc);
- jump->set_jump_destination((address) foreign_call_destination);
- _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
- } else if (inst->is_general_jump()) {
- NativeGeneralJump* jump = nativeGeneralJump_at(pc);
- jump->set_jump_destination((address) foreign_call_destination);
- _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
- } else if (NativeInstruction::is_adrp_at((address)inst)) {
- // adrp; add; blr
- MacroAssembler::pd_patch_instruction_size((address)inst,
- (address)foreign_call_destination);
- } else {
- JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
- }
- JVMCI_event_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
-}
-
-void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, methodHandle& method, jint pc_offset, JVMCI_TRAPS) {
- NativeCall* call = nullptr;
- switch (_next_call_type) {
- case INLINE_INVOKE:
- return;
- case INVOKEVIRTUAL:
- case INVOKEINTERFACE: {
- assert(!method->is_static(), "cannot call static method with invokeinterface");
- call = nativeCall_at(_instructions->start() + pc_offset);
- _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
- call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub(), JVMCI_CHECK);
- break;
- }
- case INVOKESTATIC: {
- assert(method->is_static(), "cannot call non-static method with invokestatic");
- call = nativeCall_at(_instructions->start() + pc_offset);
- _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
- call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub(), JVMCI_CHECK);
- break;
- }
- case INVOKESPECIAL: {
- assert(!method->is_static(), "cannot call static method with invokespecial");
- call = nativeCall_at(_instructions->start() + pc_offset);
- _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
- call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub(), JVMCI_CHECK);
- break;
- }
- default:
- JVMCI_ERROR("invalid _next_call_type value");
- break;
- }
- if (Continuations::enabled()) {
- // Check for proper post_call_nop
- NativePostCallNop* nop = nativePostCallNop_at(call->next_instruction_address());
- if (nop == nullptr) {
- JVMCI_ERROR("missing post call nop at offset %d", pc_offset);
- } else {
- _instructions->relocate(call->next_instruction_address(), relocInfo::post_call_nop_type);
- }
- }
-}
-
-bool CodeInstaller::pd_relocate(address pc, jint mark) {
- switch (mark) {
- case POLL_NEAR:
- // This is unhandled and will be reported by the caller
- return false;
- case POLL_FAR:
- _instructions->relocate(pc, relocInfo::poll_type);
- return true;
- case POLL_RETURN_NEAR:
- // This is unhandled and will be reported by the caller
- return false;
- case POLL_RETURN_FAR:
- _instructions->relocate(pc, relocInfo::poll_return_type);
- return true;
-#if INCLUDE_ZGC
- case Z_BARRIER_RELOCATION_FORMAT_LOAD_GOOD_BEFORE_TB_X:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeTbX);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_MARK_BAD_BEFORE_MOV:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadBeforeMov);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_STORE_GOOD_BEFORE_MOV:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodBeforeMov);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_STORE_BAD_BEFORE_MOV:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreBadBeforeMov);
- return true;
-#endif
-
- }
- return false;
-}
-
-// convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, JVMCI_TRAPS) {
- if (jvmci_reg < Register::number_of_registers) {
- return as_Register(jvmci_reg)->as_VMReg();
- } else {
- jint floatRegisterNumber = jvmci_reg - Register::number_of_declared_registers;
- if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegister::number_of_registers) {
- return as_FloatRegister(floatRegisterNumber)->as_VMReg();
- }
- JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
- }
-}
-
-bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
- return !hotspotRegister->is_FloatRegister();
-}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index fdb016acf31..eb658ba4e30 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -505,21 +505,6 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
return instructions * NativeInstruction::instruction_size;
}
-int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
- // Metadata pointers are either narrow (32 bits) or wide (48 bits).
- // We encode narrow ones by setting the upper 16 bits in the first
- // instruction.
- NativeInstruction *insn = nativeInstruction_at(insn_addr);
- assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
- nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
-
- MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
-
- Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
- Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
- return 2 * NativeInstruction::instruction_size;
-}
-
void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp) {
ldr(tmp, Address(rthread, JavaThread::polling_word_offset()));
if (at_return) {
@@ -2162,7 +2147,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
Register offset = rscratch2;
Label L_loop_search_receiver, L_loop_search_empty;
- Label L_restart, L_found_recv, L_found_empty, L_polymorphic, L_count_update;
+ Label L_restart, L_found_recv, L_found_empty, L_count_update;
// The code here recognizes three major cases:
// A. Fastest: receiver found in the table
@@ -2192,21 +2177,20 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
// if (receiver(i) == recv) goto found_recv(i);
// }
//
- // // Fast: no receiver, but profile is full
+ // // Fast: no receiver, but profile is not full
// for (i = 0; i < receiver_count(); i++) {
// if (receiver(i) == null) goto found_null(i);
// }
- // goto polymorphic
+ //
+ // // Slow: profile is full, polymorphic case
+ // count++;
+ // return
//
// // Slow: try to install receiver
// found_null(i):
// CAS(&receiver(i), null, recv);
// goto restart
//
- // polymorphic:
- // count++;
- // return
- //
// found_recv(i):
// *receiver_count(i)++
//
@@ -2223,7 +2207,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
sub(rscratch1, offset, end_receiver_offset);
cbnz(rscratch1, L_loop_search_receiver);
- // Fast: no receiver, but profile is full
+ // Fast: no receiver, but profile is not full
mov(offset, base_receiver_offset);
bind(L_loop_search_empty);
ldr(rscratch1, Address(mdp, offset));
@@ -2231,9 +2215,13 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
add(offset, offset, receiver_step);
sub(rscratch1, offset, end_receiver_offset);
cbnz(rscratch1, L_loop_search_empty);
- b(L_polymorphic);
- // Slow: try to install receiver
+ // Slow: Receiver is not found and table is full.
+ // Increment polymorphic counter instead of receiver slot.
+ mov(offset, poly_count_offset);
+ b(L_count_update);
+
+ // Slowest: try to install receiver
bind(L_found_empty);
// Atomically swing receiver slot: null -> recv.
@@ -2252,17 +2240,11 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
// and just restart the search from the beginning.
b(L_restart);
- // Counter updates:
-
- // Increment polymorphic counter instead of receiver slot.
- bind(L_polymorphic);
- mov(offset, poly_count_offset);
- b(L_count_update);
-
// Found a receiver, convert its slot offset to corresponding count offset.
bind(L_found_recv);
add(offset, offset, receiver_to_count_step);
+ // Finally, update the counter
bind(L_count_update);
increment(Address(mdp, offset), DataLayout::counter_increment);
}
@@ -2667,7 +2649,7 @@ int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
void MacroAssembler::membar(Membar_mask_bits order_constraint) {
address prev = pc() - NativeMembar::instruction_size;
- address last = code()->last_insn();
+ address last = code()->last_merge_candidate();
if (last != nullptr && nativeInstruction_at(last)->is_Membar() && prev == last) {
NativeMembar *bar = NativeMembar_at(prev);
if (AlwaysMergeDMB) {
@@ -2687,10 +2669,11 @@ void MacroAssembler::membar(Membar_mask_bits order_constraint) {
BLOCK_COMMENT("merged membar");
return;
} else {
- // A special case like "DMB ST;DMB LD;DMB ST", the last DMB can be skipped
- // We need check the last 2 instructions
+ // A special case like "DMB ST;DMB LD;DMB ST", the last DMB can be skipped.
+ // We need to check the second-to-last instruction, only if it is inside
+ // the current code section.
address prev2 = prev - NativeMembar::instruction_size;
- if (last != code()->last_label() && nativeInstruction_at(prev2)->is_Membar()) {
+ if (prev2 >= begin() && last != code()->last_label() && nativeInstruction_at(prev2)->is_Membar()) {
NativeMembar *bar2 = NativeMembar_at(prev2);
assert(bar2->get_kind() == order_constraint, "it should be merged before");
BLOCK_COMMENT("merged membar(elided)");
@@ -2698,21 +2681,21 @@ void MacroAssembler::membar(Membar_mask_bits order_constraint) {
}
}
}
- code()->set_last_insn(pc());
+ code()->set_last_merge_candidate(pc());
dmb(Assembler::barrier(order_constraint));
}
bool MacroAssembler::try_merge_ldst(Register rt, const Address &adr, size_t size_in_bytes, bool is_store) {
if (ldst_can_merge(rt, adr, size_in_bytes, is_store)) {
merge_ldst(rt, adr, size_in_bytes, is_store);
- code()->clear_last_insn();
+ code()->clear_last_merge_candidate();
return true;
} else {
assert(size_in_bytes == 8 || size_in_bytes == 4, "only 8 bytes or 4 bytes load/store is supported.");
const uint64_t mask = size_in_bytes - 1;
if (adr.getMode() == Address::base_plus_offset &&
(adr.offset() & mask) == 0) { // only supports base_plus_offset.
- code()->set_last_insn(pc());
+ code()->set_last_merge_candidate(pc());
}
return false;
}
@@ -3875,7 +3858,7 @@ bool MacroAssembler::ldst_can_merge(Register rt,
size_t cur_size_in_bytes,
bool is_store) const {
address prev = pc() - NativeInstruction::instruction_size;
- address last = code()->last_insn();
+ address last = code()->last_merge_candidate();
if (last == nullptr || !nativeInstruction_at(last)->is_Imm_LdSt()) {
return false;
@@ -6735,13 +6718,14 @@ void MacroAssembler::java_round_float(Register dst, FloatRegister src,
// by the call to JavaThread::aarch64_get_thread_helper() or, indeed,
// the call setup code.
//
-// On Linux, aarch64_get_thread_helper() clobbers only r0, r1, and flags.
+// On Linux and Windows, aarch64_get_thread_helper() is implemented in
+// assembly and clobbers only r0, r1, and flags.
// On other systems, the helper is a usual C function.
//
void MacroAssembler::get_thread(Register dst) {
RegSet saved_regs =
- LINUX_ONLY(RegSet::range(r0, r1) + lr - dst)
- NOT_LINUX (RegSet::range(r0, r17) + lr - dst);
+ BSD_ONLY(RegSet::range(r0, r17) + lr - dst)
+ NOT_BSD (RegSet::range(r0, r1) + lr - dst);
protect_return_address();
push(saved_regs, sp);
@@ -7291,3 +7275,26 @@ void MacroAssembler::neon_vector_rotate(FloatRegister dst, SIMD_Arrangement T,
sli(dst, T, src, lshift);
}
}
+
+void MacroAssembler::try_to_replace_prev_vector_copy_with_movprfx(FloatRegister dst) {
+ if (code_section()->is_empty()) {
+ return;
+ }
+
+ address prev = pc() - NativeInstruction::instruction_size;
+ uint32_t insn = nativeInstruction_at(prev)->encoding();
+ if (!NativeInstruction::is_neon_vector_mov_alias(insn) &&
+ !NativeInstruction::is_sve_vector_mov_alias(insn)) {
+ return;
+ }
+
+ // The destructive instruction must reuse the mov alias destination.
+ uint32_t rd = Instruction_aarch64::extract(insn, 4, 0);
+ if (rd != (uint32_t)dst->encoding()) {
+ return;
+ }
+
+ uint32_t rn = Instruction_aarch64::extract(insn, 9, 5);
+ Instruction_aarch64::patch(prev, 31, 0,
+ NativeInstruction::encode_sve_movprfx(rd, rn));
+}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index c02df666a87..b1050b45731 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -168,7 +168,7 @@ class MacroAssembler: public Assembler {
void bind(Label& L) {
Assembler::bind(L);
- code()->clear_last_insn();
+ code()->clear_last_merge_candidate();
code()->set_last_label(pc());
}
@@ -693,7 +693,6 @@ public:
#endif
static int patch_oop(address insn_addr, address o);
- static int patch_narrow_klass(address insn_addr, narrowKlass n);
// Return whether code is emitted to a scratch blob.
virtual bool in_scratch_emit_size() {
@@ -1735,7 +1734,103 @@ public:
private:
// Check the current thread doesn't need a cross modify fence.
void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
+ void try_to_replace_prev_vector_copy_with_movprfx(FloatRegister dst);
+public:
+ void maybe_movprfx(FloatRegister dst, FloatRegister src) {
+ if (dst != src) {
+ sve_movprfx(dst, src);
+ }
+ }
+
+// Wrappers for SVE explicit destructive instructions, overriding the
+// same-signature Assembler entry points to enable movprfx fusion optimization.
+//
+// Implicit destructive instructions (e.g. predicated unary ops like sve_abs/
+// sve_neg/sve_not, whose ISA encoding allows Zd != Zn but whose use as a Java
+// Vector API masked operation requires pass-through of the first source) are
+// not covered here. For those, the .ad file is responsible for emitting
+// movprfx explicitly via maybe_movprfx() before the destructive op.
+#define SVE_DESTRUCTIVE_BINARY_INS(NAME) \
+ using Assembler::NAME; \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, \
+ FloatRegister Zm) { \
+ if (Zd != Zm) { \
+ try_to_replace_prev_vector_copy_with_movprfx(Zd); \
+ } \
+ Assembler::NAME(Zd, T, Pg, Zm); \
+ }
+
+#define SVE_DESTRUCTIVE_BINARY_5(I1, I2, I3, I4, I5) \
+ SVE_DESTRUCTIVE_BINARY_INS(I1); SVE_DESTRUCTIVE_BINARY_INS(I2); \
+ SVE_DESTRUCTIVE_BINARY_INS(I3); SVE_DESTRUCTIVE_BINARY_INS(I4); \
+ SVE_DESTRUCTIVE_BINARY_INS(I5);
+
+ SVE_DESTRUCTIVE_BINARY_5(sve_add, sve_and, sve_asr, sve_bic, sve_eor)
+ SVE_DESTRUCTIVE_BINARY_5(sve_fabd, sve_fadd, sve_fdiv, sve_fmax, sve_fmin)
+ SVE_DESTRUCTIVE_BINARY_5(sve_fmul, sve_fsub, sve_lsl, sve_lsr, sve_mul)
+ SVE_DESTRUCTIVE_BINARY_5(sve_orr, sve_smax, sve_smin, sve_sqadd, sve_sqsub)
+ SVE_DESTRUCTIVE_BINARY_5(sve_sub, sve_uqadd, sve_uqsub, sve_umax, sve_umin)
+
+#undef SVE_DESTRUCTIVE_BINARY_INS
+#undef SVE_DESTRUCTIVE_BINARY_5
+
+#define SVE_DESTRUCTIVE_SHIFT_IMM_INS(NAME) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int shift) { \
+ try_to_replace_prev_vector_copy_with_movprfx(Zd); \
+ Assembler::NAME(Zd, T, Pg, shift); \
+ }
+
+ SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_asr);
+ SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_lsl);
+ SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_lsr);
+
+#undef SVE_DESTRUCTIVE_SHIFT_IMM_INS
+
+#define SVE_DESTRUCTIVE_UNPRED_IMM_INS(NAME, IMM_TYPE) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, IMM_TYPE imm) { \
+ try_to_replace_prev_vector_copy_with_movprfx(Zd); \
+ Assembler::NAME(Zd, T, imm); \
+ }
+
+ SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_add, unsigned);
+ SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_sub, unsigned);
+ SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_and, uint64_t);
+ SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_eor, uint64_t);
+ SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_orr, uint64_t);
+
+#undef SVE_DESTRUCTIVE_UNPRED_IMM_INS
+
+#define SVE_DESTRUCTIVE_TERNARY_INS(NAME) \
+ using Assembler::NAME; \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, \
+ FloatRegister Zn, FloatRegister Zm) { \
+ if (Zd != Zn && Zd != Zm) { \
+ try_to_replace_prev_vector_copy_with_movprfx(Zd); \
+ } \
+ Assembler::NAME(Zd, T, Pg, Zn, Zm); \
+ }
+
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fmad);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fmla);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fmls);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fmsb);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmad);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmla);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmls);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmsb);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_mla);
+ SVE_DESTRUCTIVE_TERNARY_INS(sve_mls);
+
+#undef SVE_DESTRUCTIVE_TERNARY_INS
+
+ using Assembler::sve_eor3;
+ void sve_eor3(FloatRegister Zd, FloatRegister Zm, FloatRegister Zk) {
+ if (Zd != Zm && Zd != Zk) {
+ try_to_replace_prev_vector_copy_with_movprfx(Zd);
+ }
+ Assembler::sve_eor3(Zd, Zm, Zk);
+ }
};
#ifdef ASSERT
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
index 8b76b96d345..2ddea2fdcb5 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
@@ -37,9 +37,6 @@
#ifdef COMPILER1
#include "c1/c1_Runtime1.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciEnv.hpp"
-#endif
void NativeCall::verify() {
assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
@@ -363,30 +360,6 @@ void NativeCallTrampolineStub::set_destination(address new_destination) {
OrderAccess::release();
}
-#if INCLUDE_JVMCI
-// Generate a trampoline for a branch to dest. If there's no need for a
-// trampoline, simply patch the call directly to dest.
-void NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest, JVMCI_TRAPS) {
- MacroAssembler a(&cbuf);
-
- if (!a.far_branches()) {
- // If not using far branches, patch this call directly to dest.
- set_destination(dest);
- } else if (!is_NativeCallTrampolineStub_at(instruction_address() + displacement())) {
- // If we want far branches and there isn't a trampoline stub, emit one.
- address stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest);
- if (stub == nullptr) {
- JVMCI_ERROR("could not emit trampoline stub - code cache is full");
- }
- // The relocation created while emitting the stub will ensure this
- // call instruction is subsequently patched to call the stub.
- } else {
- // Not sure how this can be happen but be defensive
- JVMCI_ERROR("single-use stub should not exist");
- }
-}
-#endif
-
void NativePostCallNop::make_deopt() {
NativeDeoptInstruction::insert(addr_at(0));
}
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index ab9896fa426..57bb9a91533 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -30,9 +30,6 @@
#include "runtime/icache.hpp"
#include "runtime/os.hpp"
#include "runtime/os.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciExceptions.hpp"
-#endif
// We have interfaces for the following instructions:
@@ -143,6 +140,29 @@ public:
Instruction_aarch64::extract(insn, 23, 23) == 0b0 &&
Instruction_aarch64::extract(insn, 26, 25) == 0b00;
}
+
+ static bool is_neon_vector_mov_alias(uint32_t insn) {
+ if (Instruction_aarch64::extract(insn, 31, 31) != 0 ||
+ Instruction_aarch64::extract(insn, 29, 21) != 0b001110101 ||
+ Instruction_aarch64::extract(insn, 15, 10) != 0b000111) {
+ return false;
+ }
+ return Instruction_aarch64::extract(insn, 9, 5) ==
+ Instruction_aarch64::extract(insn, 20, 16);
+ }
+
+ static bool is_sve_vector_mov_alias(uint32_t insn) {
+ if (Instruction_aarch64::extract(insn, 31, 21) != 0b00000100011 ||
+ Instruction_aarch64::extract(insn, 15, 10) != 0b001100) {
+ return false;
+ }
+ return Instruction_aarch64::extract(insn, 9, 5) ==
+ Instruction_aarch64::extract(insn, 20, 16);
+ }
+
+ static uint32_t encode_sve_movprfx(uint32_t dst, uint32_t src) {
+ return 0x1082f << 10 | (src << 5) | dst;
+ }
};
inline NativeInstruction* nativeInstruction_at(address address) {
@@ -215,9 +235,6 @@ public:
void set_destination_mt_safe(address dest);
address get_trampoline();
-#if INCLUDE_JVMCI
- void trampoline_jump(CodeBuffer &cbuf, address dest, JVMCI_TRAPS);
-#endif
};
inline NativeCall* nativeCall_at(address address) {
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index 73b631029a0..0e3d9d76b94 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -61,9 +61,6 @@
#include "adfiles/ad_aarch64.hpp"
#include "opto/runtime.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciJavaClasses.hpp"
-#endif
#define __ masm->
@@ -120,17 +117,14 @@ int RegisterSaver::reg_offset_in_bytes(Register r) {
int slots_per_vect = FloatRegister::save_slots_per_register;
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (_save_vectors) {
slots_per_vect = FloatRegister::slots_per_neon_register;
-
-#ifdef COMPILER2
if (Matcher::supports_scalable_vector()) {
slots_per_vect = Matcher::scalable_vector_reg_size(T_FLOAT);
}
-#endif
}
-#endif
+#endif // COMPILER2
int r0_offset = v0_offset_in_bytes() + (slots_per_vect * FloatRegister::number_of_registers) * BytesPerInt;
return r0_offset + r->encoding() * wordSize;
@@ -170,7 +164,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
#endif
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (_save_vectors) {
int extra_save_slots_per_register = 0;
// Save upper half of vector registers
@@ -185,8 +179,8 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
additional_frame_words += ((extra_vector_bytes + total_predicate_in_bytes) / wordSize);
}
#else
- assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
-#endif
+ assert(!_save_vectors, "vectors are generated only by C2");
+#endif // COMPILER2
int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
reg_save_size * BytesPerInt, 16);
@@ -241,9 +235,7 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
__ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
#else
-#if !INCLUDE_JVMCI
- assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
-#endif
+ assert(!_save_vectors, "vectors are generated only by C2");
__ pop_CPU_state(_save_vectors);
#endif
__ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
@@ -569,18 +561,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
// Pre-load the register-jump target early, to schedule it better.
__ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- // check if this call should be routed towards a specific entry point
- __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
- Label no_alternative_target;
- __ cbz(rscratch2, no_alternative_target);
- __ mov(rscratch1, rscratch2);
- __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
- __ bind(no_alternative_target);
- }
-#endif // INCLUDE_JVMCI
-
// Now generate the shuffle code.
for (int i = 0; i < total_args_passed; i++) {
if (sig_bt[i] == T_VOID) {
@@ -2076,11 +2056,6 @@ void SharedRuntime::generate_deopt_blob() {
ResourceMark rm;
// Setup code generation tools
int pad = 0;
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- pad += 512; // Increase the buffer size when compiling for JVMCI
- }
-#endif
const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
if (blob != nullptr) {
@@ -2093,7 +2068,7 @@ void SharedRuntime::generate_deopt_blob() {
int frame_size_in_words;
OopMap* map = nullptr;
OopMapSet *oop_maps = new OopMapSet();
- RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
+ RegisterSaver reg_save(COMPILER2_PRESENT(true) NOT_COMPILER2(false));
// -------------
// This code enters when returning to a de-optimized nmethod. A return
@@ -2138,13 +2113,6 @@ void SharedRuntime::generate_deopt_blob() {
__ b(cont);
int reexecute_offset = __ pc() - start;
-#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (UseJVMCICompiler) {
- // JVMCI does not use this kind of deoptimization
- __ should_not_reach_here();
- }
-#endif
-
// Reexecute case
// return address is the pc describes what bci to do re-execute at
@@ -2154,45 +2122,6 @@ void SharedRuntime::generate_deopt_blob() {
__ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
__ b(cont);
-#if INCLUDE_JVMCI
- Label after_fetch_unroll_info_call;
- int implicit_exception_uncommon_trap_offset = 0;
- int uncommon_trap_offset = 0;
-
- if (EnableJVMCI) {
- implicit_exception_uncommon_trap_offset = __ pc() - start;
-
- __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
- __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
-
- uncommon_trap_offset = __ pc() - start;
-
- // Save everything in sight.
- reg_save.save_live_registers(masm, 0, &frame_size_in_words);
- // fetch_unroll_info needs to call last_java_frame()
- Label retaddr;
- __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
-
- __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
- __ movw(rscratch1, -1);
- __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
-
- __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
- __ mov(c_rarg0, rthread);
- __ movw(c_rarg2, rcpool); // exec mode
- __ lea(rscratch1,
- RuntimeAddress(CAST_FROM_FN_PTR(address,
- Deoptimization::uncommon_trap)));
- __ blr(rscratch1);
- __ bind(retaddr);
- oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
-
- __ reset_last_Java_frame(false);
-
- __ b(after_fetch_unroll_info_call);
- } // EnableJVMCI
-#endif // INCLUDE_JVMCI
-
int exception_offset = __ pc() - start;
// Prolog for exception case
@@ -2283,12 +2212,6 @@ void SharedRuntime::generate_deopt_blob() {
__ reset_last_Java_frame(false);
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- __ bind(after_fetch_unroll_info_call);
- }
-#endif
-
// Load UnrollBlock* into r5
__ mov(r5, r0);
@@ -2445,12 +2368,6 @@ void SharedRuntime::generate_deopt_blob() {
_deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
_deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
- _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
- }
-#endif
AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
}
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index fddb37b7b8d..8e9af2b7b8a 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2025, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -6276,6 +6276,24 @@ class StubGenerator: public StubCodeGenerator {
// static int implKyberNttMult(
// short[] result, short[] ntta, short[] nttb, short[] zetas) {}
//
+ // The actual algorithm that is used here differs from the one in the Java
+ // implementation, it uses Montgomery multiplications instead of Barrett
+ // reduction, but the end result modulo MLKEM_Q is the same. This is the
+ // Java equivalent of this intrinsic implementation:
+ // static void implKyberNttMultJava(short[] result, short[] ntta, short[] nttb) {
+ // for (int m = 0; m < ML_KEM_N / 2; m++) {
+ // int a0 = ntta[2 * m];
+ // int a1 = ntta[2 * m + 1];
+ // int b0 = nttb[2 * m];
+ // int b1 = nttb[2 * m + 1];
+ // int r = montMul(a0, b0) +
+ // montMul(montMul(a1, b1), MONT_ZETAS_FOR_NTT_MULT[m]);
+ // result[2 * m] = (short) montMul(r, MONT_R_SQUARE_MOD_Q);
+ // result[2 * m + 1] = (short) montMul(
+ // (montMul(a0, b1) + montMul(a1, b0)), MONT_R_SQUARE_MOD_Q);
+ // }
+ // }
+ //
// result (short[256]) = c_rarg0
// ntta (short[256]) = c_rarg1
// nttb (short[256]) = c_rarg2
@@ -12636,7 +12654,7 @@ class StubGenerator: public StubCodeGenerator {
}
void generate_compiler_stubs() {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (UseSVE == 0) {
generate_iota_indices(StubId::stubgen_vector_iota_indices_id);
@@ -12664,7 +12682,6 @@ class StubGenerator: public StubCodeGenerator {
generate_string_indexof_stubs();
-#ifdef COMPILER2
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
@@ -12712,8 +12729,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_montgomerySquare = start;
}
-#endif // COMPILER2
-
if (UseChaCha20Intrinsics) {
StubRoutines::_chacha20Block = generate_chacha20Block_blockpar();
}
@@ -12795,7 +12810,7 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
public:
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
index 6067408ef13..97bdf5055c4 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -48,9 +48,6 @@ enum platform_dependent_constants {
class aarch64 {
friend class StubGenerator;
friend class StubRoutines;
-#if INCLUDE_JVMCI
- friend class JVMCIVMStructs;
-#endif
// declare fields for arch-specific entries
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index dd70c98797f..fd6247bf362 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -524,30 +524,6 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
// null last_sp until next java call
__ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
-#if INCLUDE_JVMCI
- // Check if we need to take lock at entry of synchronized method. This can
- // only occur on method entry so emit it only for vtos with step 0.
- if (EnableJVMCI && state == vtos && step == 0) {
- Label L;
- __ ldrb(rscratch1, Address(rthread, JavaThread::pending_monitorenter_offset()));
- __ cbz(rscratch1, L);
- // Clear flag.
- __ strb(zr, Address(rthread, JavaThread::pending_monitorenter_offset()));
- // Take lock.
- lock_method();
- __ bind(L);
- } else {
-#ifdef ASSERT
- if (EnableJVMCI) {
- Label L;
- __ ldrb(rscratch1, Address(rthread, JavaThread::pending_monitorenter_offset()));
- __ cbz(rscratch1, L);
- __ stop("unexpected pending monitor in deopt entry");
- __ bind(L);
- }
-#endif
- }
-#endif
// handle exceptions
{
Label L;
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index 4c64b265d92..b6cf58d6062 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -2622,7 +2622,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
// membar it's possible for a simple Dekker test to fail if loads
// use LDR;DMB but stores use STLR. This can happen if C2 compiles
// the stores in one method and we interpret the loads in another.
- if (!CompilerConfig::is_c1_or_interpreter_only_no_jvmci()){
+ if (!CompilerConfig::is_c1_or_interpreter_only()){
Label notVolatile;
__ tbz(flags, ResolvedFieldEntry::is_volatile_shift, notVolatile);
__ membar(MacroAssembler::AnyAny);
@@ -3200,7 +3200,7 @@ void TemplateTable::fast_accessfield(TosState state)
// membar it's possible for a simple Dekker test to fail if loads
// use LDR;DMB but stores use STLR. This can happen if C2 compiles
// the stores in one method and we interpret the loads in another.
- if (!CompilerConfig::is_c1_or_interpreter_only_no_jvmci()) {
+ if (!CompilerConfig::is_c1_or_interpreter_only()) {
Label notVolatile;
__ tbz(r3, ResolvedFieldEntry::is_volatile_shift, notVolatile);
__ membar(MacroAssembler::AnyAny);
@@ -3263,7 +3263,7 @@ void TemplateTable::fast_xaccess(TosState state)
// membar it's possible for a simple Dekker test to fail if loads
// use LDR;DMB but stores use STLR. This can happen if C2 compiles
// the stores in one method and we interpret the loads in another.
- if (!CompilerConfig::is_c1_or_interpreter_only_no_jvmci()) {
+ if (!CompilerConfig::is_c1_or_interpreter_only()) {
Label notVolatile;
__ load_unsigned_byte(r3, Address(r2, in_bytes(ResolvedFieldEntry::flags_offset())));
__ tbz(r3, ResolvedFieldEntry::is_volatile_shift, notVolatile);
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
index c67455e6b79..0683202515c 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -38,7 +38,6 @@ class stringStream;
class VM_Version : public Abstract_VM_Version {
friend class VMStructs;
- friend class JVMCIVMStructs;
protected:
static int _cpu;
@@ -70,9 +69,9 @@ protected:
// Read additional info using OS-specific interfaces
static void get_os_cpu_info();
- // Sets the SVE length and returns a new actual value or negative on error.
- // If the len is larger than the system largest supported SVE vector length,
- // the function sets the largest supported value.
+ // Set the SVE vector length to len. If the vector length cannot be
+ // changed to len, set the length to the largest possible value.
+ // Return the length that will be used, or -ve if an error occurred.
static int set_and_get_current_sve_vector_length(int len);
static int get_current_sve_vector_length();
diff --git a/src/hotspot/cpu/arm/arm_32.ad b/src/hotspot/cpu/arm/arm_32.ad
index 9438e8da8b5..2af7e253a1a 100644
--- a/src/hotspot/cpu/arm/arm_32.ad
+++ b/src/hotspot/cpu/arm/arm_32.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -501,7 +501,7 @@ operand immIRotn() %{
%}
operand immPRot() %{
- predicate(n->get_ptr() == 0 || (AsmOperand::is_rotated_imm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none));
+ predicate(n->get_ptr() == 0 || (AsmOperand::is_rotated_imm(n->get_ptr()) && ((ConPNode*)n)->type()->is_ptr()->reloc() == relocInfo::none));
match(ConP);
diff --git a/src/hotspot/cpu/arm/compiledIC_arm.cpp b/src/hotspot/cpu/arm/compiledIC_arm.cpp
index 86927cd24ab..eb035e54faa 100644
--- a/src/hotspot/cpu/arm/compiledIC_arm.cpp
+++ b/src/hotspot/cpu/arm/compiledIC_arm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,7 @@
#include "runtime/safepoint.hpp"
// ----------------------------------------------------------------------------
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
#define __ masm->
// emit call stub, compiled java to interpreter
address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark) {
@@ -86,7 +86,7 @@ address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address ma
int CompiledDirectCall::reloc_to_interp_stub() {
return 10; // 4 in emit_to_interp_stub + 1 in Java_Static_Call
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
int CompiledDirectCall::to_trampoline_stub_size() {
// ARM doesn't use trampolines.
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp
index aee407864ee..89f7791626d 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -1576,14 +1576,21 @@ void InterpreterMacroAssembler::notify_method_exit(
// Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
// track stack depth. If it is possible to enter interp_only_mode we add
// the code to check if the event should be sent.
- if (mode == NotifyJVMTI && can_post_interpreter_events()) {
+ if (mode == NotifyJVMTI && (can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
Label L;
+ const Register thread_state = R2_tmp;
+
// Note: frame::interpreter_frame_result has a dependency on how the
// method result is saved across the call to post_method_exit. If this
// is changed then the interpreter_frame_result implementation will
// need to be updated too.
+ ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset()));
+ cbz(thread_state, L); // if (thread->jvmti_thread_state() == nullptr) exit;
+
+ ldr_s32(thread_state, Address(thread_state, JvmtiThreadState::frame_pop_cnt_offset()));
ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
+ orr(Rtemp, Rtemp, thread_state);
cbz(Rtemp, L);
if (native) {
diff --git a/src/hotspot/cpu/arm/jvmciCodeInstaller_arm.cpp b/src/hotspot/cpu/arm/jvmciCodeInstaller_arm.cpp
deleted file mode 100644
index 5b480afb1fe..00000000000
--- a/src/hotspot/cpu/arm/jvmciCodeInstaller_arm.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "jvmci/jvmciCodeInstaller.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#include "jvmci/jvmciCompilerToVM.hpp"
-#include "jvmci/jvmciJavaClasses.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "vmreg_arm.inline.hpp"
-
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
- Unimplemented();
- return 0;
-}
-
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
- Unimplemented();
-}
-
-// convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
- return nullptr;
-}
-
-bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
- return false;
-}
diff --git a/src/hotspot/cpu/arm/vm_version_arm.hpp b/src/hotspot/cpu/arm/vm_version_arm.hpp
index 11c89da2005..c400cad24b4 100644
--- a/src/hotspot/cpu/arm/vm_version_arm.hpp
+++ b/src/hotspot/cpu/arm/vm_version_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,8 +29,6 @@
#include "runtime/globals_extension.hpp"
class VM_Version: public Abstract_VM_Version {
- friend class JVMCIVMStructs;
-
static bool _has_simd;
static bool _has_mp_ext;
diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
index 61780a73969..65e9505c812 100644
--- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -443,15 +443,13 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
return; // CodeCache is full
}
- bool success = ce->emit_trampoline_stub_for_call(SharedRuntime::get_resolve_static_call_stub());
- if (!success) { return; }
-
- __ relocate(relocInfo::static_call_type);
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ code()->set_insts_mark();
- __ bl(__ pc());
+ AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
+ relocInfo::static_call_type);
+ address call_pc = __ trampoline_call(resolve);
+ if (call_pc == nullptr) {
+ ce->bailout("const/stub overflow in call with trampoline");
+ return;
+ }
ce->add_call_info_here(info());
ce->verify_oop_map(info());
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 777b41577be..1270471d150 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -609,67 +609,25 @@ void LIR_Assembler::align_call(LIR_Code) {
// do nothing since all instructions are word aligned on ppc
}
-
-bool LIR_Assembler::emit_trampoline_stub_for_call(address target, Register Rtoc) {
- int start_offset = __ offset();
- // Put the entry point as a constant into the constant pool.
- const address entry_point_toc_addr = __ address_constant(target, RelocationHolder::none);
- if (entry_point_toc_addr == nullptr) {
- bailout("const section overflow");
- return false;
- }
- const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
-
- // Emit the trampoline stub which will be related to the branch-and-link below.
- address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset, Rtoc);
- if (!stub) {
- bailout("no space for trampoline stub");
- return false;
- }
- return true;
-}
-
-
void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
assert(rtype==relocInfo::opt_virtual_call_type || rtype==relocInfo::static_call_type, "unexpected rtype");
- bool success = emit_trampoline_stub_for_call(op->addr());
- if (!success) { return; }
-
- __ relocate(rtype);
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ code()->set_insts_mark();
- __ bl(__ pc());
+ address call_pc = __ trampoline_call(AddressLiteral(op->addr(), rtype));
+ if (call_pc == nullptr) {
+ bailout("const/stub overflow in call with trampoline");
+ return;
+ }
add_call_info(code_offset(), op->info());
__ post_call_nop();
}
-
void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
__ calculate_address_from_global_toc(R2_TOC, __ method_toc());
-
- // Virtual call relocation will point to ic load.
- address virtual_call_meta_addr = __ pc();
- // Load a clear inline cache.
- AddressLiteral empty_ic((address) Universe::non_oop_word());
- bool success = __ load_const_from_method_toc(R19_inline_cache_reg, empty_ic, R2_TOC);
+ bool success = __ ic_call(R2_TOC, op->addr());
if (!success) {
- bailout("const section overflow");
+ bailout("const/stub overflow in ic_call with trampoline");
return;
}
- // Call to fixup routine. Fixup routine uses ScopeDesc info
- // to determine who we intended to call.
- __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
-
- success = emit_trampoline_stub_for_call(op->addr(), R2_TOC);
- if (!success) { return; }
-
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ bl(__ pc());
add_call_info(code_offset(), op->info());
__ post_call_nop();
}
@@ -2268,39 +2226,12 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
}
+// kills recv
void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
ciMethodData *md, ciProfileData *data,
- Register recv, Register tmp1, Label* update_done) {
- uint i;
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
- Label next_test;
- // See if the receiver is receiver[n].
- __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
- __ verify_klass_ptr(tmp1);
- __ cmpd(CR0, recv, tmp1);
- __ bne(CR0, next_test);
-
- __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
- __ addi(tmp1, tmp1, DataLayout::counter_increment);
- __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
- __ b(*update_done);
-
- __ bind(next_test);
- }
-
- // Didn't find receiver; find next empty slot and fill it in.
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
- Label next_test;
- __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
- __ cmpdi(CR0, tmp1, 0);
- __ bne(CR0, next_test);
- __ li(tmp1, DataLayout::counter_increment);
- __ std(recv, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
- __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
- __ b(*update_done);
-
- __ bind(next_test);
- }
+ Register recv, Register tmp) {
+ int mdp_offset = md->byte_offset_of_slot(data, in_ByteSize(0)) - mdo_offset_bias;
+ __ profile_receiver_type(recv, mdo, mdp_offset, tmp, noreg);
}
@@ -2362,15 +2293,9 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
__ b(*obj_is_null);
__ bind(not_null);
- Label update_done;
Register recv = klass_RInfo;
__ load_klass(recv, obj);
- type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1, &update_done);
- const int slot_offset = md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias;
- __ ld(Rtmp1, slot_offset, mdo);
- __ addi(Rtmp1, Rtmp1, DataLayout::counter_increment);
- __ std(Rtmp1, slot_offset, mdo);
- __ bind(update_done);
+ type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1); // kills recv
} else {
__ cmpdi(CR0, obj, 0);
__ beq(CR0, *obj_is_null);
@@ -2469,15 +2394,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ b(done);
__ bind(not_null);
- Label update_done;
Register recv = klass_RInfo;
__ load_klass(recv, value);
- type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1, &update_done);
- const int slot_offset = md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias;
- __ ld(Rtmp1, slot_offset, mdo);
- __ addi(Rtmp1, Rtmp1, DataLayout::counter_increment);
- __ std(Rtmp1, slot_offset, mdo);
- __ bind(update_done);
+ type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1); // kills recv
} else {
__ cmpdi(CR0, value, 0);
__ beq(CR0, done);
@@ -2690,55 +2609,27 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
// We know the type that will be seen at this call site; we can
// statically update the MethodData* rather than needing to do
// dynamic tests on the receiver type.
-
- // NOTE: we should probably put a lock around this search to
- // avoid collisions by concurrent compilations.
ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
- uint i;
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ for (uint i = 0; i < VirtualCallData::row_limit(); i++) {
ciKlass* receiver = vc_data->receiver(i);
if (known_klass->equals(receiver)) {
- __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
- __ addi(tmp1, tmp1, DataLayout::counter_increment);
- __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+ __ increment_mem64(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias,
+ DataLayout::counter_increment, tmp1);
return;
}
}
- // Receiver type not found in profile data; select an empty slot.
-
- // Note that this is less efficient than it should be because it
- // always does a write to the receiver part of the
- // VirtualCallData rather than just the first time.
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
- ciKlass* receiver = vc_data->receiver(i);
- if (receiver == nullptr) {
- metadata2reg(known_klass->constant_encoding(), tmp1);
- __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) - mdo_offset_bias, mdo);
-
- __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
- __ addi(tmp1, tmp1, DataLayout::counter_increment);
- __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
- return;
- }
- }
+ // Receiver type is not found in profile data.
+ // Fall back to runtime helper to handle the rest at runtime.
+ metadata2reg(known_klass->constant_encoding(), recv);
} else {
__ load_klass(recv, recv);
- Label update_done;
- type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
- // Receiver did not match any saved receiver and there is no empty row for it.
- // Increment total counter to indicate polymorphic case.
- __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
- __ addi(tmp1, tmp1, DataLayout::counter_increment);
- __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
-
- __ bind(update_done);
}
+ type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1); // kills recv
} else {
// Static call
- __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
- __ addi(tmp1, tmp1, DataLayout::counter_increment);
- __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+ __ increment_mem64(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias,
+ DataLayout::counter_increment, tmp1);
}
}
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
index dea055710cd..5a065d364b2 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,9 @@
#ifndef CPU_PPC_C1_LIRASSEMBLER_PPC_HPP
#define CPU_PPC_C1_LIRASSEMBLER_PPC_HPP
+// ArrayCopyStub needs access to bailout
+friend class ArrayCopyStub;
+
private:
//////////////////////////////////////////////////////////////////////////////
@@ -49,16 +52,13 @@
// Record the type of the receiver in ReceiverTypeData.
void type_profile_helper(Register mdo, int mdo_offset_bias,
ciMethodData *md, ciProfileData *data,
- Register recv, Register tmp1, Label* update_done);
+ Register recv, Register tmp);
// Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
void setup_md_access(ciMethod* method, int bci,
ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
public:
static const ConditionRegister BOOL_RESULT;
- // Emit trampoline stub for call. Call bailout() if failed. Return true on success.
- bool emit_trampoline_stub_for_call(address target, Register Rtoc = noreg);
-
enum {
_static_call_stub_size = 4 * BytesPerInstWord + MacroAssembler::b64_patchable_size, // or smaller
_call_stub_size = _static_call_stub_size + MacroAssembler::trampoline_stub_size, // or smaller
diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp
index 6b6a792117d..7d2e22b5965 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.cpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -319,7 +319,7 @@ void frame::patch_pc(Thread* thread, address pc) {
#ifdef ASSERT
{
- frame f(this->sp(), pc, this->unextended_sp());
+ frame f(sp(), unextended_sp(), fp(), pc, cb(), oop_map(), is_heap_frame());
assert(f.is_deoptimized_frame() == this->is_deoptimized_frame() && f.pc() == this->pc() && f.raw_pc() == this->raw_pc(),
"must be (f.is_deoptimized_frame(): %d this->is_deoptimized_frame(): %d "
"f.pc(): " INTPTR_FORMAT " this->pc(): " INTPTR_FORMAT " f.raw_pc(): " INTPTR_FORMAT " this->raw_pc(): " INTPTR_FORMAT ")",
diff --git a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
index 3c05f950d0c..123e6d8a0b1 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp
deleted file mode 100644
index 5b24259103f..00000000000
--- a/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2018, 2023, Red Hat, Inc. All rights reserved.
- * Copyright (c) 2012, 2023 SAP SE. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.inline.hpp"
-#include "c1/c1_LIRAssembler.hpp"
-#include "c1/c1_MacroAssembler.hpp"
-#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-
-#define __ masm->masm()->
-
-void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler *masm) {
- __ block_comment("LIR_OpShenandoahCompareAndSwap (shenandaohgc) {");
-
- Register addr = _addr->as_register_lo();
- Register new_val = _new_value->as_register();
- Register cmp_val = _cmp_value->as_register();
- Register tmp1 = _tmp1->as_register();
- Register tmp2 = _tmp2->as_register();
- Register result = result_opr()->as_register();
-
- if (UseCompressedOops) {
- __ encode_heap_oop(cmp_val, cmp_val);
- __ encode_heap_oop(new_val, new_val);
- }
-
- // There might be a volatile load before this Unsafe CAS.
- if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
- __ sync();
- } else {
- __ lwsync();
- }
-
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmp_val, new_val, tmp1, tmp2,
- false, result);
-
- if (UseCompressedOops) {
- __ decode_heap_oop(cmp_val);
- __ decode_heap_oop(new_val);
- }
-
- if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
- __ isync();
- } else {
- __ sync();
- }
-
- __ block_comment("} LIR_OpShenandoahCompareAndSwap (shenandaohgc)");
-}
-
-#undef __
-
-#ifdef ASSERT
-#define __ gen->lir(__FILE__, __LINE__)->
-#else
-#define __ gen->lir()->
-#endif
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess &access, LIRItem &cmp_value, LIRItem &new_value) {
- BasicType bt = access.type();
-
- if (access.is_oop()) {
- LIRGenerator* gen = access.gen();
-
- if (ShenandoahSATBBarrier) {
- pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
- LIR_OprFact::illegalOpr);
- }
-
- if (ShenandoahCASBarrier) {
- cmp_value.load_item();
- new_value.load_item();
-
- LIR_Opr t1 = gen->new_register(T_OBJECT);
- LIR_Opr t2 = gen->new_register(T_OBJECT);
- LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
- LIR_Opr result = gen->new_register(T_INT);
-
- __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result));
-
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), new_value.result());
- }
-
- return result;
- }
- }
-
- return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
-}
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess &access, LIRItem &value) {
- LIRGenerator* gen = access.gen();
- BasicType type = access.type();
-
- LIR_Opr result = gen->new_register(type);
- value.load_item();
- LIR_Opr value_opr = value.result();
-
- assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
- LIR_Opr tmp_xchg = gen->new_register(T_INT);
- __ xchg(access.resolved_addr(), value_opr, result, tmp_xchg);
-
- if (access.is_oop()) {
- result = load_reference_barrier_impl(access.gen(), result, LIR_OprFact::addressConst(0),
- access.decorators());
-
- LIR_Opr tmp_barrier = gen->new_register(type);
- __ move(result, tmp_barrier);
- result = tmp_barrier;
-
- if (ShenandoahSATBBarrier) {
- pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, result);
- }
-
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), result);
- }
- }
-
- return result;
-}
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
index 43fd54eb78a..1f6ca4655de 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
@@ -48,6 +48,9 @@
#include "c1/c1_MacroAssembler.hpp"
#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
#endif
+#ifdef COMPILER2
+#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
+#endif
#define __ masm->
@@ -893,13 +896,11 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
Register tmp2 = stub->tmp2()->as_register();
assert_different_registers(addr, res, tmp1, tmp2);
-#ifdef ASSERT
- // Ensure that 'res' is 'R3_ARG1' and contains the same value as 'obj' to reduce the number of required
- // copy instructions.
assert(R3_RET == res, "res must be r3");
- __ cmpd(CR0, res, obj);
- __ asm_assert_eq("result register must contain the reference stored in obj");
-#endif
+
+ if (res != obj) {
+ __ mr(res, obj);
+ }
DecoratorSet decorators = stub->decorators();
@@ -1034,7 +1035,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
__ save_volatile_gprs(R1_SP, -nbytes_save, true, false);
// Load arguments from stack.
- // No load required, as assured by assertions in 'ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub'.
+ // No load required, as caller has already loaded obj into R3.
Register R3_obj = R3_ARG1;
Register R4_load_addr = R4_ARG2;
__ ld(R4_load_addr, -8, R1_SP);
@@ -1091,3 +1092,320 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
+
+#ifdef COMPILER2
+
+#undef __
+#define __ masm->
+
+void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Register addr, int disp, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
+ if (is_narrow) {
+ __ lwz(dst, disp, addr);
+ } else {
+ __ ld(dst, disp, addr);
+ }
+ if (is_acquire) {
+ __ twi_0(dst);
+ __ isync();
+ }
+
+ ShenandoahBarrierStubC2::load_post(masm, node, dst, Address(addr, disp), tmp1, tmp2, is_narrow);
+}
+
+void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
+ Register dst, int disp, bool dst_narrow, Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3) {
+
+ ShenandoahBarrierStubC2::store_pre(masm, node, tmp1, Address(dst, disp), tmp2, tmp3, dst_narrow);
+
+ if (dst_narrow && !src_narrow) {
+ // Need to encode into tmp, because we cannot clobber src.
+ if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
+ src = __ encode_heap_oop(tmp1, src);
+ } else {
+ src = __ encode_heap_oop_not_null(tmp1, src);
+ }
+ }
+ if (dst_narrow) {
+ __ stw(src, disp, dst);
+ } else {
+ __ std(src, disp, dst);
+ }
+
+ ShenandoahBarrierStubC2::store_post(masm, node, Address(dst, disp), tmp1, tmp2);
+}
+
+void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
+ Register oldval, Register newval, Register tmp1, Register tmp2, bool exchange, bool narrow, bool weak, bool acquire) {
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, res, addr, tmp1, tmp2, narrow);
+
+ Register dest_current = exchange ? res : R0;
+ Label no_update;
+ int semantics = MacroAssembler::MemBarNone;
+
+ if (acquire) {
+ semantics = support_IRIW_for_not_multiple_copy_atomic_cpu ?
+ MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter;
+ }
+
+ if (!exchange) { __ li(res, 0); }
+ if (narrow) {
+ __ cmpxchgw(CR0, dest_current, oldval, newval, addr,
+ semantics, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, weak);
+ } else {
+ __ cmpxchgd(CR0, dest_current, oldval, newval, addr,
+ semantics, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, weak);
+ }
+ if (!exchange) { __ li(res, 1); }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp1, tmp2);
+
+ __ bind(no_update);
+}
+
+void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval, Register newval, Register addr, Register tmp1, Register tmp2) {
+ bool is_narrow = node->bottom_type()->isa_narrowoop();
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, preval, addr, tmp1, tmp2, is_narrow);
+
+ if (is_narrow) {
+ __ getandsetw(preval, newval, addr, MacroAssembler::cmpxchgx_hint_atomic_update());
+ } else {
+ __ getandsetd(preval, newval, addr, MacroAssembler::cmpxchgx_hint_atomic_update());
+ }
+
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ __ sync();
+ }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp1, tmp2);
+}
+
+#undef __
+#define __ masm.
+
+void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ assert_different_registers(tmp1, tmp2, address.index(), address.base());
+
+ __ ld(tmp1, in_bytes(ShenandoahThreadLocalData::card_table_offset()), R16_thread);
+ if (address.index() == noreg) {
+ __ add_const_optimized(tmp2, address.base(), address.disp(), R0);
+ } else {
+ __ add(tmp2, address.index(), address.base());
+ if (address.disp() != 0) {
+ __ addi(tmp2, tmp2, address.disp());
+ }
+ }
+ __ srdi(tmp2, tmp2, CardTable::card_shift());
+ __ li(R0, CardTable::dirty_card_val());
+ __ stbx(R0, tmp2, tmp1);
+}
+
+void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+
+ __ lbz(tmp, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)), R16_thread);
+ __ cmpdi(CR0, tmp, 0);
+ // Branch to entry if not equal
+ __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CR0, Assembler::equal), *entry());
+ // This is were the slowpath stub will return to
+ __ bind(*continuation());
+}
+
+void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
+
+ __ bind(*entry());
+
+ // If we need to load ourselves, do it here.
+ if (_do_load) {
+ if (_narrow) {
+ __ lwz(_obj, _addr.disp(), _addr.base());
+ } else {
+ __ ld(_obj, _addr.disp(), _addr.base());
+ }
+ }
+
+ // If the object is null, there is no point in applying barriers.
+ maybe_far_jump_if_zero(masm, _obj);
+
+ // We need to make sure that loads done by callers survive across slow-path calls.
+ // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
+ bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
+ if (!_do_load || needs_both_barriers) {
+ preserve(_obj);
+ }
+
+ // Go for barriers. Barriers can return straight to continuation, as long
+ // as another barrier is not needed and we can reach the fastpath.
+ if (needs_both_barriers) {
+ keepalive(masm, nullptr);
+ lrb(masm);
+ } else if (_needs_keep_alive_barrier) {
+ keepalive(masm, continuation());
+ } else if (_needs_load_ref_barrier) {
+ lrb(masm);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
+ __ cmpdi(CR0, reg, 0);
+ // Branch to continuation if equal
+ __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *continuation());
+}
+
+void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
+ const int gcstate_offset = in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING));
+ const int index_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset());
+ const int buffer_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset());
+ Label L_through, L_slowpath;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_load_ref_barrier) {
+ assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
+ __ lbz(_tmp1, gcstate_offset, R16_thread);
+ __ cmpdi(CR0, _tmp1, 0);
+ __ beq(CR0, L_through);
+ }
+
+ // Fast-path: put object into buffer.
+ // If buffer is already full, go slow.
+ __ ld(_tmp1, index_offset, R16_thread);
+ __ cmpdi(CR0, _tmp1, 0);
+ __ beq(CR0, L_slowpath);
+ __ addi(_tmp1, _tmp1, -wordSize);
+ __ std(_tmp1, index_offset, R16_thread);
+ __ ld(_tmp2, buffer_offset, R16_thread);
+
+ // Store the object in queue.
+ // If object is narrow, we need to decode it before inserting.
+ if (_narrow) {
+ __ add(_tmp2, _tmp2, _tmp1);
+ Register decoded = __ decode_heap_oop_not_null(_tmp1, _obj);
+ __ stdx(decoded, _tmp2);
+ } else {
+ __ stdx(_obj, _tmp2, _tmp1);
+ }
+
+ // Fast-path exits here.
+ if (L_done != nullptr) {
+ __ b(*L_done);
+ } else {
+ __ b(L_through);
+ }
+
+ // Slow-path: call runtime to handle.
+ __ bind(L_slowpath);
+
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Go to runtime and handle the rest there.
+ __ call_VM_leaf(keepalive_runtime_entry_addr(), _obj);
+ }
+
+ if (L_done != nullptr) {
+ __ b(*L_done);
+ } else {
+ __ bind(L_through);
+ }
+}
+
+void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
+ Label L_slow;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_keep_alive_barrier) {
+ char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
+ __ lbz(_tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)), R16_thread);
+ maybe_far_jump_if_zero(masm, _tmp1);
+ }
+
+ // If weak references are being processed, weak/phantom loads need to go slow,
+ // regardless of their cset status.
+ if (_needs_load_ref_weak_barrier) {
+ __ lbz(_tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)), R16_thread);
+ __ cmpdi(CR0, _tmp1, 0);
+ __ bne(CR0, L_slow);
+ }
+
+ // Cset-check. Fall-through to slow if in collection set.
+ __ load_const_optimized(_tmp1, ShenandoahHeap::in_cset_fast_test_addr(), _tmp2);
+ if (_narrow) {
+ Register decoded = __ decode_heap_oop_not_null(_tmp2, _obj);
+ __ srdi(_tmp2, decoded, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ } else {
+ __ srdi(_tmp2, _obj, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ }
+ __ lbzx(_tmp2, _tmp2, _tmp1);
+ maybe_far_jump_if_zero(masm, _tmp2);
+
+ // Slow path
+ __ bind(L_slow);
+
+ // Obj is the result, need to temporarily stop preserving it.
+ bool is_obj_preserved = is_preserved(_obj);
+ if (is_obj_preserved) {
+ dont_preserve(_obj);
+ }
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Shuffle in the arguments. The end result should be:
+ // c_rarg0 <-- obj
+ // c_rarg1 <-- lea(addr)
+ Register c_rarg0 = R3_ARG1;
+ Register c_rarg1 = R4_ARG2;
+ if (c_rarg0 == _obj) {
+ __ addi(c_rarg1, _addr.base(), _addr.disp());
+ } else if (c_rarg1 == _obj) {
+ // Set up arguments in reverse, and then flip them
+ __ addi(c_rarg0, _addr.base(), _addr.disp());
+ // flip them
+ __ mr(_tmp1, c_rarg0);
+ __ mr(c_rarg0, c_rarg1);
+ __ mr(c_rarg1, _tmp1);
+ } else {
+ assert_different_registers(c_rarg1, _obj);
+ __ addi(c_rarg1, _addr.base(), _addr.disp());
+ __ mr(c_rarg0, _obj);
+ }
+
+ // Go to runtime and handle the rest there.
+ __ call_VM_leaf(lrb_runtime_entry_addr(), c_rarg0, c_rarg1);
+
+ // Save the result where needed.
+ if (_obj != R3_RET) {
+ __ mr(_obj, R3_RET);
+ }
+ }
+ if (is_obj_preserved) {
+ preserve(_obj);
+ }
+
+ __ b(*continuation());
+}
+
+int ShenandoahBarrierStubC2::available_gp_registers() {
+ Unimplemented(); // Not used
+ return 0;
+}
+
+bool ShenandoahBarrierStubC2::is_special_register(Register r) {
+ Unimplemented(); // Not used
+ return true;
+}
+
+void ShenandoahBarrierStubC2::post_init() {
+ // Do nothing.
+}
+
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp
index 6033fbc54c4..5c70a308691 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp
@@ -40,6 +40,12 @@ class StubAssembler;
#endif
+#ifdef COMPILER2
+
+class MachNode;
+
+#endif
+
class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
@@ -125,6 +131,20 @@ public:
virtual void try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
Register tmp, Label& slow_path);
+
+#ifdef COMPILER2
+ // Entry points from Matcher
+ void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Register addr, int disp, Register tmp1, Register tmp2, bool narrow, bool acquire);
+
+ void store_c2(const MachNode* node, MacroAssembler* masm,
+ Register dst, int disp, bool dst_narrow, Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3);
+
+ void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval,
+ Register newval, Register tmp1, Register tmp2, bool exchange, bool narrow, bool weak, bool acquire);
+
+ void get_and_set_c2(const MachNode* node, MacroAssembler* masm,
+ Register preval, Register newval, Register addr, Register tmp1, Register tmp2);
+#endif // COMPILER2
};
#endif // CPU_PPC_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_PPC_HPP
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad
index eb4894656e2..f2e5d4f3a27 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad
@@ -1,6 +1,6 @@
//
// Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
-// Copyright (c) 2012, 2021 SAP SE. All rights reserved.
+// Copyright (c) 2012, 2026 SAP SE. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -26,192 +26,383 @@
source_hpp %{
#include "gc/shenandoah/shenandoahBarrierSet.hpp"
#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+
+bool need_acquire_load(const Node* n);
+bool need_acquire_load_store(const Node *load);
%}
-// Weak compareAndSwap operations are treated as strong compareAndSwap operations.
-// This is motivated by the retry logic of ShenandoahBarrierSetAssembler::cmpxchg_oop which is hard to realise
-// using weak CAS operations.
+source %{
+bool need_acquire_load(const Node* n) {
+ return !n->as_Load()->is_unordered() && !followed_by_acquire(n);
+}
+bool need_acquire_load_store(const Node* n) {
+ MemNode::MemOrd order = ((CompareAndSwapNode*)n->as_LoadStore())->order();
+ return (order == MemNode::acquire) || (order == MemNode::seqcst);
+}
+%}
-instruct compareAndSwapP_shenandoah(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval,
- iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
+// ---------------------------------- LOADS ---------------------------------------
+//
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire
- && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-
- format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
+instruct loadN_shenandoah(iRegNdst dst, memory mem, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set dst (LoadN mem));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0);
+ predicate(UseShenandoahGC && (n->as_Load()->barrier_data() != 0) && !need_acquire_load(n));
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "shenandoah_load $dst, $mem\t# ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- false, $res$$Register
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ $mem$$base$$Register,
+ $mem$$disp,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* narrow = */ true,
+ /* acquire = */ false
);
%}
+ ins_cost(MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct loadN_acq_shenandoah(iRegNdst dst, memory mem, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set dst (LoadN mem));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0);
+ predicate(UseShenandoahGC && (n->as_Load()->barrier_data() != 0) && need_acquire_load(n));
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "shenandoah_load $dst, $mem\t# ptr (acquire)" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ $mem$$base$$Register,
+ $mem$$disp,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* narrow = */ true,
+ /* acquire = */ true
+ );
+ %}
+ ins_cost(3*MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct loadP_shenandoah(iRegPdst dst, memoryAlg4 mem, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set dst (LoadP mem));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0);
+ predicate(UseShenandoahGC && (n->as_Load()->barrier_data() != 0) && !need_acquire_load(n));
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "shenandoah_load $dst, $mem\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ $mem$$base$$Register,
+ $mem$$disp,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* narrow = */ false,
+ /* acquire = */ false
+ );
+ %}
+ ins_cost(MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct loadP_acq_shenandoah(iRegPdst dst, memoryAlg4 mem, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set dst (LoadP mem));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0);
+ predicate(UseShenandoahGC && (n->as_Load()->barrier_data() != 0) && need_acquire_load(n));
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "shenandoah_load $dst, $mem\t# ptr (acquire)" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ $mem$$base$$Register,
+ $mem$$disp,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* narrow = */ false,
+ /* acquire = */ true
+ );
+ %}
+ ins_cost(3*MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+// ---------------------------------- STORES ---------------------------------------
+//
+
+instruct storeN_shenandoah(memory dst, iRegN_P2N src, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, iRegPdstNoScratch tmp3, flagsRegCR0 cr0) %{
+ match(Set dst (StoreN dst src));
+ predicate(UseShenandoahGC && (n->as_Store()->barrier_data() != 0));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr0);
+ format %{ "shenandoah_store $dst, $src\t# compressed ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ $dst$$base$$Register,
+ $dst$$disp,
+ /* dst_narrow = */ true,
+ $src$$Register,
+ /* src_narrow = */ true,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ $tmp3$$Register
+ );
+ %}
+ ins_cost(MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct storeP_shenandoah(memoryAlg4 dst, iRegPsrc src, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, iRegPdstNoScratch tmp3, flagsRegCR0 cr0) %{
+ match(Set dst (StoreP dst src));
+ predicate(UseShenandoahGC && (n->as_Store()->barrier_data() != 0));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr0);
+ format %{ "shenandoah_store $dst, $src\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ $dst$$base$$Register,
+ $dst$$disp,
+ /* dst_narrow = */ false,
+ $src$$Register,
+ /* src_narrow = */ false,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ $tmp3$$Register
+ );
+ %}
+ ins_cost(MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct encodePAndStoreN_shenandoah(memory dst, iRegPsrc src, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, iRegPdstNoScratch tmp3, flagsRegCR0 cr0) %{
+ match(Set dst (StoreN dst (EncodeP src)));
+ predicate(UseShenandoahGC && (n->as_Store()->barrier_data() != 0));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr0);
+ format %{ "shenandoah_store $dst, $src\t# compressed ptr (with encoding)" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ $dst$$base$$Register,
+ $dst$$disp,
+ /* dst_narrow = */ true,
+ $src$$Register,
+ /* src_narrow = */ false,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ $tmp3$$Register
+ );
+ %}
+ ins_cost(MEMORY_REF_COST);
+ ins_pipe(pipe_class_memory);
+%}
+
+// ---------------------- LOAD-STORES -----------------------------------
+//
+
+// Strong CAS also handles WeakCompareAndSwap* on PPC, see JDK-8385633.
+instruct compareAndSwapN_regP_regN_regN_shenandoah(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+ match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && !need_acquire_load_store(n));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); // TEMP_DEF to avoid jump
+ format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ false,
+ /* is_narrow */ true,
+ /* weak */ false,
+ /* acquire */ false);
+ %}
ins_pipe(pipe_class_default);
%}
-instruct compareAndSwapN_shenandoah(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval,
- iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire
- && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-
- format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
+instruct compareAndSwapN_acq_regP_regN_regN_shenandoah(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+ match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && need_acquire_load_store(n));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); // TEMP_DEF to avoid jump
+ format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- false, $res$$Register
- );
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ false,
+ /* is_narrow */ true,
+ /* weak */ false,
+ /* acquire */ true);
%}
ins_pipe(pipe_class_default);
%}
-instruct compareAndSwapP_acq_shenandoah(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval,
- iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire
- || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-
- format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+instruct compareAndSwapP_regP_regP_regP_shenandoah(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
+ match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); // TEMP_DEF to avoid jump
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && !need_acquire_load_store(n));
+ format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- false, $res$$Register
- );
- if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
- __ isync();
- } else {
- __ sync();
- }
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ false,
+ /* is_narrow */ false,
+ /* weak */ false,
+ /* acquire */ false);
%}
ins_pipe(pipe_class_default);
%}
-instruct compareAndSwapN_acq_shenandoah(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval,
- iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire
- || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-
- format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+instruct compareAndSwapP_acq_regP_regP_regP_shenandoah(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
+ match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); // TEMP_DEF to avoid jump
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && need_acquire_load_store(n));
+ format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- false, $res$$Register
- );
- if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
- __ isync();
- } else {
- __ sync();
- }
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ false,
+ /* is_narrow */ false,
+ /* weak */ false,
+ /* acquire */ true);
%}
ins_pipe(pipe_class_default);
%}
-instruct compareAndExchangeP_shenandoah(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval,
- iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire
- && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-
- format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as ptr; ptr" %}
+instruct compareAndExchangeN_regP_regN_regN_shenandoah(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && !need_acquire_load_store(n));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- true, $res$$Register
- );
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ true,
+ /* is_narrow */ true,
+ /* weak */ false,
+ /* acquire */ false);
%}
ins_pipe(pipe_class_default);
%}
-instruct compareAndExchangeN_shenandoah(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval,
- iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire
- && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-
- format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as ptr; ptr" %}
+instruct compareAndExchangeN_acq_regP_regN_regN_shenandoah(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && need_acquire_load_store(n));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- true, $res$$Register
- );
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ true,
+ /* is_narrow */ true,
+ /* weak */ false,
+ /* acquire */ true);
%}
ins_pipe(pipe_class_default);
%}
-instruct compareAndExchangePAcq_shenandoah(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval,
- iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire
- || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-
- format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
+instruct compareAndExchangeP_regP_regP_regP_shenandoah(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && !need_acquire_load_store(n));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- true, $res$$Register
- );
- if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
- __ isync();
- } else {
- __ sync();
- }
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ true,
+ /* is_narrow */ false,
+ /* weak */ false,
+ /* acquire */ false);
%}
ins_pipe(pipe_class_default);
%}
-instruct compareAndExchangeNAcq_shenandoah(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval,
- iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{
- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
- effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr);
-
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire
- || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-
- format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
+instruct compareAndExchangeP_acq_regP_regP_regP_shenandoah(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0) && need_acquire_load_store(n));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(
- masm,
- $mem$$Register, $oldval$$Register, $newval$$Register,
- $tmp1$$Register, $tmp2$$Register,
- true, $res$$Register
- );
- if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
- __ isync();
- } else {
- __ sync();
- }
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem_ptr$$Register,
+ $src1$$Register,
+ $src2$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ /* exchange */ true,
+ /* is_narrow */ false,
+ /* weak */ false,
+ /* acquire */ true);
%}
ins_pipe(pipe_class_default);
%}
+
+instruct getAndSetP_shenandoah(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (GetAndSetP mem_ptr src));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "GetAndSetP $res, $mem_ptr, $src" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $res$$Register,
+ $src$$Register,
+ $mem_ptr$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct getAndSetN_shenandoah(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, iRegPdstNoScratch tmp1, iRegPdstNoScratch tmp2, flagsRegCR0 cr0) %{
+ match(Set res (GetAndSetN mem_ptr src));
+ predicate(UseShenandoahGC && (n->as_LoadStore()->barrier_data() != 0));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "GetAndSetN $res, $mem_ptr, $src" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $res$$Register,
+ $src$$Register,
+ $mem_ptr$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
diff --git a/src/hotspot/cpu/ppc/globals_ppc.hpp b/src/hotspot/cpu/ppc/globals_ppc.hpp
index d46bb733ea7..aba3a99b0d8 100644
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp
@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for impli
define_pd_global(bool, TrapBasedNullChecks, true);
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls passed to check cast.
-define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
+define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_PRESENT(true) NOT_COMPILER2(false));
#define DEFAULT_STACK_YELLOW_PAGES (2)
#define DEFAULT_STACK_RED_PAGES (1)
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
index 275ff92c699..45af9bfc252 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -264,8 +264,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_switch_default(Register scratch1, Register scratch2);
void profile_switch_case(Register index, Register scratch1,Register scratch2, Register scratch3);
void profile_null_seen(Register Rscratch1, Register Rscratch2);
- void record_klass_in_profile(Register receiver, Register scratch1, Register scratch2);
- void record_klass_in_profile_helper(Register receiver, Register scratch1, Register scratch2, int start_row, Label& done);
// Argument and return type profiling.
void profile_obj_type(Register obj, Register mdo_addr_base, RegisterOrConstant mdo_addr_offs, Register tmp, Register tmp2);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index 56eade8e533..789f8da9574 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1348,7 +1348,7 @@ void InterpreterMacroAssembler::profile_virtual_call(Register Rreceiver,
test_method_data_pointer(profile_continue);
// Record the receiver type.
- record_klass_in_profile(Rreceiver, Rscratch1, Rscratch2);
+ profile_receiver_type(Rreceiver, R28_mdx, 0, Rscratch1, Rscratch2);
// The method data pointer needs to be updated to reflect the new target.
update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
@@ -1367,7 +1367,7 @@ void InterpreterMacroAssembler::profile_typecheck(Register Rklass, Register Rscr
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
// Record the object type.
- record_klass_in_profile(Rklass, Rscratch1, Rscratch2);
+ profile_receiver_type(Rklass, R28_mdx, 0, Rscratch1, Rscratch2);
}
// The method data pointer needs to be updated.
@@ -1481,88 +1481,6 @@ void InterpreterMacroAssembler::profile_null_seen(Register Rscratch1, Register R
}
}
-void InterpreterMacroAssembler::record_klass_in_profile(Register Rreceiver,
- Register Rscratch1, Register Rscratch2) {
- assert(ProfileInterpreter, "must be profiling");
- assert_different_registers(Rreceiver, Rscratch1, Rscratch2);
-
- Label done;
- record_klass_in_profile_helper(Rreceiver, Rscratch1, Rscratch2, 0, done);
- bind (done);
-}
-
-void InterpreterMacroAssembler::record_klass_in_profile_helper(
- Register receiver, Register scratch1, Register scratch2,
- int start_row, Label& done) {
- if (TypeProfileWidth == 0) {
- increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
- return;
- }
-
- int last_row = VirtualCallData::row_limit() - 1;
- assert(start_row <= last_row, "must be work left to do");
- // Test this row for both the receiver and for null.
- // Take any of three different outcomes:
- // 1. found receiver => increment count and goto done
- // 2. found null => keep looking for case 1, maybe allocate this cell
- // 3. found something else => keep looking for cases 1 and 2
- // Case 3 is handled by a recursive call.
- for (int row = start_row; row <= last_row; row++) {
- Label next_test;
- bool test_for_null_also = (row == start_row);
-
- // See if the receiver is receiver[n].
- int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
- test_mdp_data_at(recvr_offset, receiver, next_test, scratch1);
- // delayed()->tst(scratch);
-
- // The receiver is receiver[n]. Increment count[n].
- int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
- increment_mdp_data_at(count_offset, scratch1, scratch2);
- b(done);
- bind(next_test);
-
- if (test_for_null_also) {
- Label found_null;
- // Failed the equality check on receiver[n]... Test for null.
- if (start_row == last_row) {
- // The only thing left to do is handle the null case.
- // Scratch1 contains test_out from test_mdp_data_at.
- cmpdi(CR0, scratch1, 0);
- beq(CR0, found_null);
- // Receiver did not match any saved receiver and there is no empty row for it.
- // Increment total counter to indicate polymorphic case.
- increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
- b(done);
- bind(found_null);
- break;
- }
- // Since null is rare, make it be the branch-taken case.
- cmpdi(CR0, scratch1, 0);
- beq(CR0, found_null);
-
- // Put all the "Case 3" tests here.
- record_klass_in_profile_helper(receiver, scratch1, scratch2, start_row + 1, done);
-
- // Found a null. Keep searching for a matching receiver,
- // but remember that this is an empty (unused) slot.
- bind(found_null);
- }
- }
-
- // In the fall-through case, we found no matching receiver, but we
- // observed the receiver[start_row] is null.
-
- // Fill in the receiver field and increment the count.
- int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
- set_mdp_data_at(recvr_offset, receiver);
- int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
- li(scratch1, DataLayout::counter_increment);
- set_mdp_data_at(count_offset, scratch1);
- if (start_row > 0) {
- b(done);
- }
-}
// Argument and return type profilig.
// kills: tmp, tmp2, R0, CR0, CR1
@@ -2345,12 +2263,20 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta
// entry/exit events are sent for that thread to track stack
// depth. If it is possible to enter interp_only_mode we add
// the code to check if the event should be sent.
- if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+ if (mode == NotifyJVMTI && (JvmtiExport::can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
Label jvmti_post_done;
- lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
- cmpwi(CR0, R0, 0);
+ // if (thread->jvmti_thread_state() == nullptr) exit;
+ ld(R11_scratch1, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
+ cmpdi(CR0, R11_scratch1, 0);
beq(CR0, jvmti_post_done);
+
+ // if (interp_only_mode() == false && frame_pop_cnt() == 0) exit;
+ lwz(R12_scratch2, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
+ lwz(R11_scratch1, in_bytes(JvmtiThreadState::frame_pop_cnt_offset()), R11_scratch1);
+ or_(R0, R11_scratch1, R12_scratch2);
+ beq(CR0, jvmti_post_done);
+
if (!is_native_method) { push(state); } // Expose tos to GC.
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), check_exceptions);
if (!is_native_method) { pop(state); }
diff --git a/src/hotspot/cpu/ppc/jvmciCodeInstaller_ppc.cpp b/src/hotspot/cpu/ppc/jvmciCodeInstaller_ppc.cpp
deleted file mode 100644
index 923a5fd9da9..00000000000
--- a/src/hotspot/cpu/ppc/jvmciCodeInstaller_ppc.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "jvmci/jvmciCodeInstaller.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#include "jvmci/jvmciCompilerToVM.hpp"
-#include "jvmci/jvmciJavaClasses.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "vmreg_ppc.inline.hpp"
-
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
- Unimplemented();
- return 0;
-}
-
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
- Unimplemented();
-}
-
-// convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
- return nullptr;
-}
-
-bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
- return false;
-}
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 24c314b8be3..b5bfcb0fced 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -1199,6 +1199,75 @@ address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd,
}
#endif // ABI_ELFv2
+bool MacroAssembler::ic_call(Register Rmethod_toc,
+ address target,
+ jint method_index,
+ bool scratch_emit,
+ bool fixed_size) {
+ AddressLiteral target_al(target, virtual_call_Relocation::spec(pc(), method_index));
+ DEBUG_ONLY(int ic_load_offset = offset());
+
+ // Load a clear inline cache.
+ AddressLiteral empty_ic((address) Universe::non_oop_word());
+ bool success = load_const_from_method_toc(R19_inline_cache_reg, empty_ic, Rmethod_toc, fixed_size);
+ if (!success) return false;
+
+ assert(MacroAssembler::is_load_const_from_method_toc_at(addr_at(ic_load_offset)),
+ "should be load from TOC");
+
+ address call_pc = trampoline_call(target_al, Rmethod_toc, scratch_emit);
+ return call_pc != nullptr;
+}
+
+address MacroAssembler::trampoline_call(AddressLiteral target,
+ Register Rmethod_toc,
+ bool scratch_emit) {
+ // First, emit the trampoline stub
+ if (!scratch_emit) {
+ RelocationHolder rh = trampoline_stub_Relocation::spec(pc() /* of the bl below */);
+
+ // Put the target's entry point as a constant into the constant pool.
+ const address target_toc_addr = address_constant((address)target.value());
+ if (target_toc_addr == nullptr) return nullptr;
+
+ const int target_toc_offset = offset_to_method_toc(target_toc_addr);
+ address stub = start_a_stub(64);
+ if (stub == nullptr) return nullptr;
+
+ // Annotate the stub with a relocation that points to the owning call instruction.
+ relocate(rh);
+ DEBUG_ONLY(int stub_start_offset = offset());
+
+ // For java_to_interp stubs we use R11_scratch1 as scratch register
+ // and in call trampoline stubs we use R12_scratch2. This way we
+ // can distinguish them (see is_NativeCallTrampolineStub_at()).
+ Register reg_scratch = R12_scratch2;
+
+ if (Rmethod_toc == noreg) {
+ calculate_address_from_global_toc(reg_scratch, method_toc());
+ Rmethod_toc = reg_scratch;
+ }
+
+ ld_largeoffset_unchecked(reg_scratch, target_toc_offset, Rmethod_toc, false);
+ mtctr(reg_scratch);
+ bctr();
+
+ assert(target_toc_offset == NativeCallTrampolineStub_at(addr_at(stub_start_offset))->destination_toc_offset(),
+ "encoded offset into the constant pool must match");
+ assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
+ assert(is_NativeCallTrampolineStub_at(addr_at(stub_start_offset)), "doesn't look like a trampoline");
+
+ // End the stub.
+ end_a_stub();
+ }
+
+ // The call will be resolved / patched later.
+ address call_pc = pc();
+ relocate(target.rspec());
+ bl(call_pc);
+ return call_pc;
+}
+
void MacroAssembler::post_call_nop() {
// Make inline again when loom is always enabled.
if (!Continuations::enabled()) {
@@ -2615,50 +2684,6 @@ void MacroAssembler::tlab_allocate(
//verify_tlab(); not implemented
}
-address MacroAssembler::emit_trampoline_stub(int destination_toc_offset,
- int insts_call_instruction_offset, Register Rtoc) {
- // Start the stub.
- address stub = start_a_stub(64);
- if (stub == nullptr) { return nullptr; } // CodeCache full: bail out
-
- // Create a trampoline stub relocation which relates this trampoline stub
- // with the call instruction at insts_call_instruction_offset in the
- // instructions code-section.
- relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset));
- const int stub_start_offset = offset();
-
- // For java_to_interp stubs we use R11_scratch1 as scratch register
- // and in call trampoline stubs we use R12_scratch2. This way we
- // can distinguish them (see is_NativeCallTrampolineStub_at()).
- Register reg_scratch = R12_scratch2;
-
- // Now, create the trampoline stub's code:
- // - load the TOC
- // - load the call target from the constant pool
- // - call
- if (Rtoc == noreg) {
- calculate_address_from_global_toc(reg_scratch, method_toc());
- Rtoc = reg_scratch;
- }
-
- ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
- mtctr(reg_scratch);
- bctr();
-
- const address stub_start_addr = addr_at(stub_start_offset);
-
- // Assert that the encoded destination_toc_offset can be identified and that it is correct.
- assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
- "encoded offset into the constant pool must match");
- // Trampoline_stub_size should be good.
- assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
- assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
-
- // End the stub.
- end_a_stub();
- return stub;
-}
-
// "The box" is the space on the stack where we copy the object mark.
void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register obj, Register box,
Register tmp1, Register tmp2, Register tmp3) {
@@ -3202,24 +3227,6 @@ void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop);
}
-int MacroAssembler::instr_size_for_load_klass() {
- static int computed_size = -1;
-
- // Not yet computed?
- if (computed_size == -1) {
-
- // Determine by scratch emit.
- ResourceMark rm;
- int code_size = 16 * BytesPerInstWord;
- CodeBuffer cb("load_klass scratch buffer", code_size, 0);
- MacroAssembler* a = new MacroAssembler(&cb);
- a->load_klass(R11_scratch1, R11_scratch1);
- computed_size = a->offset();
- }
-
- return computed_size;
-}
-
void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
assert(dst != R0, "Dst reg may not be R0, as R0 is used here.");
if (src == noreg) src = dst;
@@ -4322,6 +4329,173 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen,
bind(L_done);
} // multiply_to_len
+void MacroAssembler::increment_mem64(Register base, RegisterOrConstant ind_or_offs, int val, Register tmp) {
+ ld(tmp, ind_or_offs, base);
+ addi(tmp, tmp, val);
+ std(tmp, ind_or_offs, base);
+}
+
+// Handle the receiver type profile update given the "recv" klass.
+//
+// Normally updates the ReceiverData (RD) that starts at "mdp" + "mdp_offset".
+// If there are no matching or claimable receiver entries in RD, updates
+// the polymorphic counter.
+//
+// This code expected to run by either the interpreter or JIT-ed code, without
+// extra synchronization. For safety, receiver cells are claimed atomically, which
+// avoids grossly misrepresenting the profiles under concurrent updates. For speed,
+// counter updates are not atomic.
+//
+void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register tmp1, Register tmp2) {
+ assert_different_registers(recv, mdp, tmp1, tmp2);
+
+ int base_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(0));
+ int poly_count_offset = in_bytes(CounterData::count_offset());
+ int receiver_step = in_bytes(ReceiverTypeData::receiver_offset(1)) - base_receiver_offset;
+ int receiver_to_count_step = in_bytes(ReceiverTypeData::receiver_count_offset(0)) - base_receiver_offset;
+
+ // Adjust for MDP offsets.
+ base_receiver_offset += mdp_offset;
+ poly_count_offset += mdp_offset;
+
+#ifdef ASSERT
+ // We are about to walk the MDO slots without asking for offsets.
+ // Check that our math hits all the right spots.
+ for (uint c = 0; c < ReceiverTypeData::row_limit(); c++) {
+ int real_recv_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_offset(c));
+ int real_count_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_count_offset(c));
+ int offset = base_receiver_offset + receiver_step*c;
+ int count_offset = offset + receiver_to_count_step;
+ assert(offset == real_recv_offset, "receiver slot math");
+ assert(count_offset == real_count_offset, "receiver count math");
+ }
+ int real_poly_count_offset = mdp_offset + in_bytes(CounterData::count_offset());
+ assert(poly_count_offset == real_poly_count_offset, "poly counter math");
+#endif
+
+ // Corner case: no profile table. Increment poly counter and exit.
+ if (ReceiverTypeData::row_limit() == 0) {
+ increment_mem64(mdp, poly_count_offset, DataLayout::counter_increment, tmp1);
+ return;
+ }
+
+ Label L_loop_search_receiver, L_loop_search_empty;
+ Label L_restart, L_found_recv, L_found_empty, L_count_update;
+ Register offset = tmp1, count = tmp2;
+
+ // The code here recognizes three major cases:
+ // A. Fastest: receiver found in the table
+ // B. Fast: no receiver in the table, and the table is full
+ // C. Slow: no receiver in the table, free slots in the table
+ //
+ // The case A performance is most important, as perfectly-behaved code would end up
+ // there, especially with larger TypeProfileWidth. The case B performance is
+ // important as well, this is where bulk of code would land for normally megamorphic
+ // cases. The case C performance is not essential, its job is to deal with installation
+ // races, we optimize for code density instead. Case C needs to make sure that receiver
+ // rows are only claimed once. This makes sure we never overwrite a row for another
+ // receiver and never duplicate the receivers in the list, making profile type-accurate.
+ //
+ // It is very tempting to handle these cases in a single loop, and claim the first slot
+ // without checking the rest of the table. But, profiling code should tolerate free slots
+ // in the table, as class unloading can clear them. After such cleanup, the receiver
+ // we need might be _after_ the free slot. Therefore, we need to let at least full scan
+ // to complete, before trying to install new slots. Splitting the code in several tight
+ // loops also helpfully optimizes for cases A and B.
+ //
+ // This code is effectively:
+ //
+ // restart:
+ // // Fastest: receiver is already installed
+ // for (i = 0; i < receiver_count(); i++) {
+ // if (receiver(i) == recv) goto found_recv(i);
+ // }
+ //
+ // // Fast: no receiver, but profile is not full
+ // for (i = 0; i < receiver_count(); i++) {
+ // if (receiver(i) == null) goto found_null(i);
+ // }
+ //
+ // // Slow: profile is full, polymorphic case
+ // count++;
+ // return
+ //
+ // // Slow: try to install receiver
+ // found_null(i):
+ // CAS(&receiver(i), null, recv);
+ // goto restart
+ //
+ // found_recv(i):
+ // *receiver_count(i)++
+ //
+
+ if (count != noreg) {
+ li(count, ReceiverTypeData::row_limit());
+ }
+
+ bind(L_restart);
+
+ // Fastest: receiver is already installed
+ if (count != noreg) {
+ mtctr(count);
+ } else {
+ li(R0, ReceiverTypeData::row_limit());
+ mtctr(R0);
+ }
+ li(offset, base_receiver_offset);
+ bind(L_loop_search_receiver);
+ ldx(R0, offset, mdp);
+ cmpd(CR0, R0, recv);
+ beq(CR0, L_found_recv);
+ addi(offset, offset, receiver_step);
+ bdnz(L_loop_search_receiver);
+
+ // Fast: no receiver, but profile is full
+ if (count != noreg) {
+ mtctr(count);
+ } else {
+ li(R0, ReceiverTypeData::row_limit());
+ mtctr(R0);
+ }
+ li(offset, base_receiver_offset);
+ bind(L_loop_search_empty);
+ ldx(R0, offset, mdp);
+ cmpdi(CR0, R0, 0);
+ beq(CR0, L_found_empty);
+ addi(offset, offset, receiver_step);
+ bdnz(L_loop_search_empty);
+
+ // Slow: Receiver is not found and table is full.
+ // Increment polymorphic counter instead of receiver slot.
+ li(offset, poly_count_offset);
+ b(L_count_update);
+
+ // Slowest: try to install receiver
+ bind(L_found_empty);
+
+ // Atomically swing receiver slot: null -> recv.
+ {
+ Register receiver_addr = offset;
+ add(receiver_addr, mdp, offset); // kills offset
+ cmpxchgd(CR0, R0, RegisterOrConstant(0), recv, receiver_addr, MemBarNone, cmpxchgx_hint_atomic_update(),
+ noreg, nullptr, /* check without ldarx first */ false, /* weak */ true);
+ }
+
+ // CAS success means the slot now has the receiver we want. CAS failure means
+ // something had claimed the slot concurrently: it can be the same receiver we want,
+ // or something else. Since this is a slow path, we can optimize for code density,
+ // and just restart the search from the beginning.
+ b(L_restart);
+
+ // Found a receiver, convert its slot offset to corresponding count offset.
+ bind(L_found_recv);
+ addi(offset, offset, receiver_to_count_step);
+
+ // Finally, update the counter
+ bind(L_count_update);
+ increment_mem64(mdp, offset, DataLayout::counter_increment, /* temp */ (count != noreg) ? count : recv);
+}
+
#ifdef ASSERT
void MacroAssembler::asm_assert(AsmAssertCond cond, const char *msg) {
Label ok;
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index b2f5e8f0b60..bbfa75f5151 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -380,9 +380,20 @@ class MacroAssembler: public Assembler {
Register toc);
#endif
+ // CompiledIC call
+ bool ic_call(Register Rmethod_toc,
+ address target,
+ jint method_index = 0,
+ bool scratch_emit = false,
+ bool fixed_size = false);
static int ic_check_size();
int ic_check(int end_alignment);
+ enum { trampoline_stub_size = 6 * 4 };
+ address trampoline_call(AddressLiteral target,
+ Register Rmethod_toc = noreg,
+ bool scratch_emit = false);
+
protected:
// It is imperative that all calls into the VM are handled via the
@@ -702,9 +713,6 @@ class MacroAssembler: public Assembler {
Label& slow_case // continuation point if fast allocation fails
);
- enum { trampoline_stub_size = 6 * 4 };
- address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
-
void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3);
@@ -802,7 +810,6 @@ class MacroAssembler: public Assembler {
MacroAssembler::PreservationLevel preservation_level);
void load_method_holder(Register holder, Register method);
- static int instr_size_for_load_klass();
void decode_klass_not_null(Register dst, Register src = noreg);
Register encode_klass_not_null(Register dst, Register src = noreg);
@@ -863,6 +870,12 @@ class MacroAssembler: public Assembler {
Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
Register tmp11, Register tmp12, Register tmp13);
+ // non-atomic 64-bit memory increment by simm16
+ void increment_mem64(Register base, RegisterOrConstant ind_or_offs, int val, Register tmp);
+
+ // Bytecode profiling (tmp2 = noreg is allowed, but then recv is killed)
+ void profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register tmp1, Register tmp2);
+
// Emitters for CRC32 calculation.
// A note on invertCRC:
// Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index 00549ac8508..7360ed604f1 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -1177,18 +1177,7 @@ int MachCallStaticJavaNode::ret_addr_offset() {
}
int MachCallDynamicJavaNode::ret_addr_offset() {
- // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
- // postalloc expanded calls if we use inline caches and do not update method data.
- if (UseInlineCaches) return 4;
-
- int vtable_index = this->_vtable_index;
- if (vtable_index < 0) {
- // Must be invalid_vtable_index, not nonvirtual_vtable_index.
- assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
- return 12;
- } else {
- return 20 + MacroAssembler::instr_size_for_load_klass();
- }
+ return 12;
}
int MachCallRuntimeNode::ret_addr_offset() {
@@ -1311,9 +1300,6 @@ class CallStubImpl {
public:
- // Emit call stub, compiled java to interpreter.
- static void emit_trampoline_stub(C2_MacroAssembler *masm, int destination_toc_offset, int insts_call_instruction_offset);
-
// Size of call trampoline stub.
// This doesn't need to be accurate to the byte, but it
// must be larger than or equal to the real size of the stub.
@@ -1332,81 +1318,6 @@ class CallStubImpl {
source %{
-// Emit a trampoline stub for a call to a target which is too far away.
-//
-// code sequences:
-//
-// call-site:
-// branch-and-link to or
-//
-// Related trampoline stub for this call-site in the stub section:
-// load the call target from the constant pool
-// branch via CTR (LR/link still points to the call-site above)
-
-void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler *masm, int destination_toc_offset, int insts_call_instruction_offset) {
- address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
- if (stub == nullptr) {
- ciEnv::current()->record_out_of_memory_failure();
- }
-}
-
-//=============================================================================
-
-// Emit an inline branch-and-link call and a related trampoline stub.
-//
-// code sequences:
-//
-// call-site:
-// branch-and-link to or
-//
-// Related trampoline stub for this call-site in the stub section:
-// load the call target from the constant pool
-// branch via CTR (LR/link still points to the call-site above)
-//
-
-typedef struct {
- int insts_call_instruction_offset;
- int ret_addr_offset;
-} EmitCallOffsets;
-
-// Emit a branch-and-link instruction that branches to a trampoline.
-// - Remember the offset of the branch-and-link instruction.
-// - Add a relocation at the branch-and-link instruction.
-// - Emit a branch-and-link.
-// - Remember the return pc offset.
-EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler *masm, address entry_point, relocInfo::relocType rtype) {
- EmitCallOffsets offsets = { -1, -1 };
- const int start_offset = __ offset();
- offsets.insts_call_instruction_offset = __ offset();
-
- // No entry point given, use the current pc.
- if (entry_point == nullptr) entry_point = __ pc();
-
- // Put the entry point as a constant into the constant pool.
- const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
- if (entry_point_toc_addr == nullptr) {
- ciEnv::current()->record_out_of_memory_failure();
- return offsets;
- }
- const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
-
- // Emit the trampoline stub which will be related to the branch-and-link below.
- CallStubImpl::emit_trampoline_stub(masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
- if (ciEnv::current()->failing()) { return offsets; } // Code cache may be full.
- __ relocate(rtype);
-
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ bl((address) __ pc());
-
- offsets.ret_addr_offset = __ offset() - start_offset;
-
- return offsets;
-}
-
-//=============================================================================
-
// Factory for creating loadConL* nodes for large/small constant pool.
static inline jlong replicate_immF(float con) {
@@ -1426,7 +1337,7 @@ int ConstantTable::calculate_table_base_offset() const {
bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) {
- iRegPdstOper *op_dst = new iRegPdstOper();
+ iRegLdstOper *op_dst = new iRegLdstOper();
MachNode *m1 = new loadToc_hiNode();
MachNode *m2 = new loadToc_loNode();
@@ -2632,7 +2543,7 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
// operands for new nodes
m1->_opnds[0] = new iRegLdstOper(); // dst
m1->_opnds[1] = immSrc; // src
- m1->_opnds[2] = new iRegPdstOper(); // toc
+ m1->_opnds[2] = new iRegLdstOper(); // toc
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegLdstOper(); // base
@@ -2663,7 +2574,7 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
// operands for new nodes
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
- m2->_opnds[2] = new iRegPdstOper(); // toc
+ m2->_opnds[2] = new iRegLdstOper(); // toc
// Initialize ins_attrib instruction offset.
m2->_cbuf_insts_offset = -1;
@@ -2714,7 +2625,7 @@ loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, Pha
// operands for new nodes
m1->_opnds[0] = new iRegLdstOper(); // dst
m1->_opnds[1] = immSrc; // src
- m1->_opnds[2] = new iRegPdstOper(); // toc
+ m1->_opnds[2] = new iRegLdstOper(); // toc
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
@@ -2760,7 +2671,7 @@ loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, Pha
// operands for new nodes
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
- m2->_opnds[2] = new iRegPdstOper(); // toc
+ m2->_opnds[2] = new iRegLdstOper(); // toc
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
@@ -2888,6 +2799,11 @@ encode %{
loadConP_hiNode *m1 = new loadConP_hiNode();
loadConP_loNode *m2 = new loadConP_loNode();
+ // If this is an oop, both m1 and m2 must be consider oops so postalloc scheduling does not
+ // put a safepoint between them
+ m1->_bottom_type = bottom_type();
+ m2->_bottom_type = bottom_type();
+
// inputs for new nodes
m1->add_req(nullptr, n_toc);
m2->add_req(nullptr, m1);
@@ -2895,7 +2811,8 @@ encode %{
// operands for new nodes
m1->_opnds[0] = new iRegPdstOper(); // dst
m1->_opnds[1] = op_src; // src
- m1->_opnds[2] = new iRegPdstOper(); // toc
+ m1->_opnds[2] = new iRegLdstOper(); // toc
+
m2->_opnds[0] = new iRegPdstOper(); // dst
m2->_opnds[1] = op_src; // src
m2->_opnds[2] = new iRegLdstOper(); // base
@@ -2920,7 +2837,7 @@ encode %{
// operands for new nodes
m2->_opnds[0] = new iRegPdstOper(); // dst
m2->_opnds[1] = op_src; // src
- m2->_opnds[2] = new iRegPdstOper(); // toc
+ m2->_opnds[2] = new iRegLdstOper(); // toc
// Register allocation for new nodes.
ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -2947,7 +2864,7 @@ encode %{
// operands for new nodes
m2->_opnds[0] = op_dst;
m2->_opnds[1] = op_src;
- m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
+ m2->_opnds[2] = new iRegLdstOper(); // constanttablebase
// register allocation for new nodes
ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -2971,7 +2888,7 @@ encode %{
// operands for new nodes
m2->_opnds[0] = op_dst;
m2->_opnds[1] = op_src;
- m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
+ m2->_opnds[2] = new iRegLdstOper(); // constanttablebase
// register allocation for new nodes
ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -3321,45 +3238,27 @@ encode %{
// Usage of r1 and r2 in the stubs allows to distinguish them.
enc_class enc_java_static_call(method meth) %{
address entry_point = (address)$meth$$method;
+ address call_pc;
if (!_method) {
// A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
- emit_call_with_trampoline_stub(masm, entry_point, relocInfo::runtime_call_type);
- if (ciEnv::current()->failing()) { return; } // Code cache may be full.
- } else {
- // Remember the offset not the address.
- const int start_offset = __ offset();
-
- // The trampoline stub.
- // No entry point given, use the current pc.
- // Make sure branch fits into
- if (entry_point == nullptr) entry_point = __ pc();
-
- // Put the entry point as a constant into the constant pool.
- const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
- if (entry_point_toc_addr == nullptr) {
- ciEnv::current()->record_out_of_memory_failure();
+ call_pc = __ trampoline_call(AddressLiteral(entry_point, relocInfo::runtime_call_type));
+ if (call_pc == nullptr) {
+ ciEnv::current()->record_failure("CodeCache is full");
return;
}
- const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
-
- // Emit the trampoline stub which will be related to the branch-and-link below.
- CallStubImpl::emit_trampoline_stub(masm, entry_point_toc_offset, start_offset);
- if (ciEnv::current()->failing()) { return; } // Code cache may be full.
+ } else {
int method_index = resolved_method_index(masm);
- __ relocate(_optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
- : static_call_Relocation::spec(method_index));
+ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
+ : static_call_Relocation::spec(method_index);
+ call_pc = __ trampoline_call(AddressLiteral(entry_point, rspec));
+ if (call_pc == nullptr) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
- // The real call.
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ set_inst_mark();
- __ bl(__ pc()); // Emits a relocation.
-
- // The stub for call to interpreter.
- address stub = CompiledDirectCall::emit_to_interp_stub(masm);
- __ clear_inst_mark();
+ // Emit stub for static call
+ address stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
if (stub == nullptr) {
ciEnv::current()->record_failure("CodeCache is full");
return;
@@ -3368,157 +3267,21 @@ encode %{
__ post_call_nop();
%}
- // Second node of expanded dynamic call - the call.
- enc_class enc_java_dynamic_call_sched(method meth) %{
- if (!ra_->C->output()->in_scratch_emit_size()) {
- // Create a call trampoline stub for the given method.
- const address entry_point = !($meth$$method) ? nullptr : (address)$meth$$method;
- const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
- if (entry_point_const == nullptr) {
- ciEnv::current()->record_out_of_memory_failure();
- return;
- }
- const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
- CallStubImpl::emit_trampoline_stub(masm, entry_point_const_toc_offset, __ offset());
- if (ra_->C->env()->failing()) { return; } // Code cache may be full.
-
- // Build relocation at call site with ic position as data.
- assert((_load_ic_hi_node != nullptr && _load_ic_node == nullptr) ||
- (_load_ic_hi_node == nullptr && _load_ic_node != nullptr),
- "must have one, but can't have both");
- assert((_load_ic_hi_node != nullptr && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
- (_load_ic_node != nullptr && _load_ic_node->_cbuf_insts_offset != -1),
- "must contain instruction offset");
- const int virtual_call_oop_addr_offset = _load_ic_hi_node != nullptr
- ? _load_ic_hi_node->_cbuf_insts_offset
- : _load_ic_node->_cbuf_insts_offset;
- const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
- assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
- "should be load from TOC");
- int method_index = resolved_method_index(masm);
- __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
- }
-
- // At this point I do not have the address of the trampoline stub,
- // and the entry point might be too far away for bl. Pc() serves
- // as dummy and bl will be patched later.
- __ bl((address) __ pc());
- __ post_call_nop();
- %}
-
- // postalloc expand emitter for virtual calls.
- enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{
-
- // Create the nodes for loading the IC from the TOC.
- loadConLNodesTuple loadConLNodes_IC =
- loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) Universe::non_oop_word()),
- OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));
-
- // Create the call node.
- CallDynamicJavaDirectSchedNode *call = new CallDynamicJavaDirectSchedNode();
- call->_vtable_index = _vtable_index;
- call->_method = _method;
- call->_optimized_virtual = _optimized_virtual;
- call->_tf = _tf;
- call->_entry_point = _entry_point;
- call->_cnt = _cnt;
- call->_guaranteed_safepoint = true;
- call->_oop_map = _oop_map;
- call->_jvms = _jvms;
- call->_jvmadj = _jvmadj;
- call->_has_ea_local_in_scope = _has_ea_local_in_scope;
- call->_in_rms = _in_rms;
- call->_nesting = _nesting;
- call->_override_symbolic_info = _override_symbolic_info;
- call->_arg_escape = _arg_escape;
-
- // New call needs all inputs of old call.
- // Req...
- for (uint i = 0; i < req(); ++i) {
- // The expanded node does not need toc any more.
- // Add the inline cache constant here instead. This expresses the
- // register of the inline cache must be live at the call.
- // Else we would have to adapt JVMState by -1.
- if (i == mach_constant_base_node_input()) {
- call->add_req(loadConLNodes_IC._last);
- } else {
- call->add_req(in(i));
- }
- }
- // ...as well as prec
- for (uint i = req(); i < len(); ++i) {
- call->add_prec(in(i));
- }
-
- // Remember nodes loading the inline cache into r19.
- call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
- call->_load_ic_node = loadConLNodes_IC._small;
-
- // Operands for new nodes.
- call->_opnds[0] = _opnds[0];
- call->_opnds[1] = _opnds[1];
-
- // Only the inline cache is associated with a register.
- assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");
-
- // Push new nodes.
- if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
- if (loadConLNodes_IC._last) nodes->push(loadConLNodes_IC._last);
- nodes->push(call);
- %}
-
// Compound version of call dynamic
// Toc is only passed so that it can be used in ins_encode statement.
// In the code we have to use $constanttablebase.
enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
int start_offset = __ offset();
-
- Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
-
- int vtable_index = this->_vtable_index;
- if (vtable_index < 0) {
- // Must be invalid_vtable_index, not nonvirtual_vtable_index.
- assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
- Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
-
- // Virtual call relocation will point to ic load.
- address virtual_call_meta_addr = __ pc();
- // Load a clear inline cache.
- AddressLiteral empty_ic((address) Universe::non_oop_word());
- bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
- if (!success) {
- ciEnv::current()->record_out_of_memory_failure();
- return;
- }
- // CALL to fixup routine. Fixup routine uses ScopeDesc info
- // to determine who we intended to call.
- __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
- emit_call_with_trampoline_stub(masm, (address)$meth$$method, relocInfo::none);
- if (ciEnv::current()->failing()) { return; } // Code cache may be full.
- assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
- "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
- } else {
- assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
- // Go thru the vtable. Get receiver klass. Receiver already
- // checked for non-null. If we'll go thru a C2I adapter, the
- // interpreter expects method in R19_method.
-
- __ load_klass(R11_scratch1, R3);
-
- int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
- int v_off = entry_offset + in_bytes(vtableEntry::method_offset());
- __ li(R19_method, v_off);
- __ ldx(R19_method/*method*/, R19_method/*method offset*/, R11_scratch1/*class*/);
- // NOTE: for vtable dispatches, the vtable entry will never be
- // null. However it may very well end up in handle_wrong_method
- // if the method is abstract for the particular class.
- __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
- // Call target. Either compiled code or C2I adapter.
- __ mtctr(R11_scratch1);
- __ bctrl();
- assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
- "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
+ int method_index = resolved_method_index(masm);
+ bool scratch_emit = ra_ == nullptr;
+ Register Rtoc = scratch_emit ? R2_TOC : $constanttablebase;
+ bool success = __ ic_call(Rtoc, (address)$meth$$method, method_index, scratch_emit, true /*fixed_size*/);
+ if (!success) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
}
+ assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
+ "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
__ post_call_nop();
%}
@@ -5974,6 +5737,9 @@ instruct loadConN_Ex(iRegNdst dst, immN src) %{
MachNode *m1 = new loadConN_hiNode();
MachNode *m2 = new loadConN_loNode();
MachNode *m3 = new clearMs32bNode();
+ m1->_bottom_type = bottom_type();
+ m2->_bottom_type = bottom_type();
+ m3->_bottom_type = bottom_type();
m1->add_req(nullptr);
m2->add_req(nullptr, m1);
m3->add_req(nullptr, m2);
@@ -6052,6 +5818,7 @@ instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
postalloc_expand %{
// Load high bits into register. Sign extended.
MachNode *m1 = new loadConNKlass_hiNode();
+ m1->_bottom_type = bottom_type();
m1->add_req(nullptr);
m1->_opnds[0] = op_dst;
m1->_opnds[1] = op_src;
@@ -6062,6 +5829,7 @@ instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
if (!Assembler::is_uimm((jlong)CompressedKlassPointers::encode((Klass *)op_src->constant()), 31)) {
// Value might be 1-extended. Mask out these bits.
m2 = new loadConNKlass_maskNode();
+ m2->_bottom_type = bottom_type();
m2->add_req(nullptr, m1);
m2->_opnds[0] = op_dst;
m2->_opnds[1] = op_src;
@@ -6071,6 +5839,7 @@ instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
}
MachNode *m3 = new loadConNKlass_loNode();
+ m3->_bottom_type = bottom_type();
m3->add_req(nullptr, m2);
m3->_opnds[0] = op_dst;
m3->_opnds[1] = op_src;
@@ -14028,15 +13797,14 @@ instruct safePoint_poll(iRegPdst poll) %{
// ============================================================================
// Call Instructions
-// Call Java Static Instruction
-
source %{
#include "runtime/continuation.hpp"
%}
-// Schedulable version of call static node.
+// Call Java Static Instruction
+
instruct CallStaticJavaDirect(method meth) %{
match(CallStaticJava);
effect(USE meth);
@@ -14052,51 +13820,9 @@ instruct CallStaticJavaDirect(method meth) %{
// Call Java Dynamic Instruction
-// Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
-// Loading of IC was postalloc expanded. The nodes loading the IC are reachable
-// via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
-// The call destination must still be placed in the constant pool.
-instruct CallDynamicJavaDirectSched(method meth) %{
- match(CallDynamicJava); // To get all the data fields we need ...
- effect(USE meth);
- predicate(false); // ... but never match.
-
- ins_field_load_ic_hi_node(loadConL_hiNode*);
- ins_field_load_ic_node(loadConLNode*);
- ins_num_consts(1 /* 1 patchable constant: call destination */);
-
- format %{ "BL \t// dynamic $meth ==> " %}
- size((Continuations::enabled() ? 8 : 4));
- ins_encode( enc_java_dynamic_call_sched(meth) );
- ins_pipe(pipe_class_call);
-%}
-
-// Schedulable (i.e. postalloc expanded) version of call dynamic java.
-// We use postalloc expanded calls if we use inline caches
-// and do not update method data.
-//
-// This instruction has two constants: inline cache (IC) and call destination.
-// Loading the inline cache will be postalloc expanded, thus leaving a call with
-// one constant.
-instruct CallDynamicJavaDirectSched_Ex(method meth) %{
- match(CallDynamicJava);
- effect(USE meth);
- predicate(UseInlineCaches);
- ins_cost(CALL_COST);
-
- ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);
-
- format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
- postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
-%}
-
-// Compound version of call dynamic java
-// We use postalloc expanded calls if we use inline caches
-// and do not update method data.
instruct CallDynamicJavaDirect(method meth) %{
match(CallDynamicJava);
effect(USE meth);
- predicate(!UseInlineCaches);
ins_cost(CALL_COST);
// Enc_java_to_runtime_call needs up to 4 constants (method data oop).
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index 54336e9f62b..fc444cb6923 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -1713,10 +1713,8 @@ static void gen_continuation_enter(MacroAssembler* masm,
check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue");
check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
- address resolve_static_call = SharedRuntime::get_resolve_static_call_stub();
-
+ AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type);
address start = __ pc();
-
Label L_thaw, L_exit;
// i2i entry used at interp_only_mode only
@@ -1753,33 +1751,17 @@ static void gen_continuation_enter(MacroAssembler* masm,
// Emit compiled static call. The call will be always resolved to the c2i
// entry of Continuation.enter(Continuation c, boolean isContinue).
- // There are special cases in SharedRuntime::resolve_static_call_C() and
- // SharedRuntime::resolve_sub_helper_internal() to achieve this
- // See also corresponding call below.
- address c2i_call_pc = __ pc();
- int start_offset = __ offset();
- // Put the entry point as a constant into the constant pool.
- const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none);
- const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
- guarantee(entry_point_toc_addr != nullptr, "const section overflow");
+ address c2i_call_pc = __ trampoline_call(resolve);
+ guarantee(c2i_call_pc != nullptr, "CodeCache is full at gen_continuation_enter");
- // Emit the trampoline stub which will be related to the branch-and-link below.
- address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset);
- guarantee(stub != nullptr, "no space for trampoline stub");
+ // Emit stub for static call
+ address stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
+ guarantee(stub != nullptr, "CodeCache is full at gen_continuation_enter");
- __ relocate(relocInfo::static_call_type);
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ bl(__ pc());
oop_maps->add_gc_map(__ pc() - start, map);
__ post_call_nop();
__ b(L_exit);
-
- // static stub for the call above
- stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
- guarantee(stub != nullptr, "no space for static stub");
}
// compiled entry
@@ -1804,22 +1786,9 @@ static void gen_continuation_enter(MacroAssembler* masm,
// SharedRuntime::find_callee_info_helper() which calls
// LinkResolver::resolve_continuation_enter() which resolves the call to
// Continuation.enter(Continuation c, boolean isContinue).
- address call_pc = __ pc();
- int start_offset = __ offset();
- // Put the entry point as a constant into the constant pool.
- const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none);
- const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
- guarantee(entry_point_toc_addr != nullptr, "const section overflow");
+ address call_pc = __ trampoline_call(resolve);
+ guarantee(call_pc != nullptr, "CodeCache is full at gen_continuation_enter");
- // Emit the trampoline stub which will be related to the branch-and-link below.
- address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset);
- guarantee(stub != nullptr, "no space for trampoline stub");
-
- __ relocate(relocInfo::static_call_type);
- // Note: At this point we do not have the address of the trampoline
- // stub, and the entry point might be too far away for bl, so __ pc()
- // serves as dummy and the bl will be patched later.
- __ bl(__ pc());
oop_maps->add_gc_map(__ pc() - start, map);
__ post_call_nop();
@@ -1872,8 +1841,8 @@ static void gen_continuation_enter(MacroAssembler* masm,
__ blr();
// static stub for the call above
- stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
- guarantee(stub != nullptr, "no space for static stub");
+ address stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
+ guarantee(stub != nullptr, "CodeCache is full at gen_continuation_enter");
}
static void gen_continuation_yield(MacroAssembler* masm,
diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
index f528587a8bb..4d3b99afa1c 100644
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -5038,9 +5038,8 @@ void generate_lookup_secondary_supers_table_stub() {
}
void generate_compiler_stubs() {
-#if COMPILER2_OR_JVMCI
-
#ifdef COMPILER2
+
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
@@ -5058,7 +5057,6 @@ void generate_lookup_secondary_supers_table_stub() {
StubRoutines::_montgomerySquare
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
}
-#endif
// data cache line writeback
if (VM_Version::supports_data_cache_line_flush()) {
@@ -5091,7 +5089,7 @@ void generate_lookup_secondary_supers_table_stub() {
StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
}
#endif
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
public:
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
index 5097d7ec58d..fd78b429ee4 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
@@ -228,7 +228,7 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
Assembler::IncompressibleScope scope(masm); // Fixed length: see entry_barrier_offset()
- Label local_guard;
+ Label local_guard, skip_barrier;
NMethodPatchingType patching_type = nmethod_patching_type();
if (slow_path == nullptr) {
@@ -290,24 +290,26 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo
ShouldNotReachHere();
}
+ Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path;
if (slow_path == nullptr) {
- Label skip_barrier;
- __ beq(t0, t1, skip_barrier);
+ __ beq(t0, t1, barrier_target, true /* is_far */);
+ } else {
+ __ bne(t0, t1, barrier_target, true /* is_far */);
+ }
+ if (slow_path == nullptr) {
__ rt_call(StubRoutines::method_entry_barrier());
-
__ j(skip_barrier);
__ bind(local_guard);
MacroAssembler::assert_alignment(__ pc());
__ emit_int32(0); // nmethod guard value. Skipped over in common case.
- __ bind(skip_barrier);
} else {
- __ beq(t0, t1, *continuation);
- __ j(*slow_path);
__ bind(*continuation);
}
+
+ __ bind(skip_barrier);
}
void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
index 5003b9584a3..ff15a0c6937 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -35,23 +35,19 @@
#include "runtime/sharedRuntime.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciRuntime.hpp"
-#endif
static int slow_path_size(nmethod* nm) {
// The slow path code is out of line with C2.
- // Leave a jal to the stub in the fast path.
- return nm->is_compiled_by_c2() ? 1 : 8;
+ return nm->is_compiled_by_c2() ? 0 : 4;
}
static int entry_barrier_offset(nmethod* nm) {
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
switch (bs_asm->nmethod_patching_type()) {
case NMethodPatchingType::stw_instruction_and_data_patch:
- return -4 * (4 + slow_path_size(nm));
+ return -4 * (5 + slow_path_size(nm));
case NMethodPatchingType::conc_instruction_and_data_patch:
- return -4 * (15 + slow_path_size(nm));
+ return -4 * ((UseZtso ? 14 : 16) + slow_path_size(nm));
}
ShouldNotReachHere();
return 0;
@@ -75,34 +71,26 @@ class NativeNMethodBarrier {
public:
NativeNMethodBarrier(nmethod* nm): _nm(nm) {
-#if INCLUDE_JVMCI
- if (nm->is_compiled_by_jvmci()) {
- address pc = nm->code_begin() + nm->jvmci_nmethod_data()->nmethod_entry_patch_offset();
- RelocIterator iter(nm, pc, pc + 4);
- guarantee(iter.next(), "missing relocs");
- guarantee(iter.type() == relocInfo::section_word_type, "unexpected reloc");
-
- _guard_addr = (int*) iter.section_word_reloc()->target();
- _instruction_address = pc;
- } else
-#endif
- {
- _instruction_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
- if (nm->is_compiled_by_c2()) {
- // With c2 compiled code, the guard is out-of-line in a stub
- // We find it using the RelocIterator.
- RelocIterator iter(nm);
- while (iter.next()) {
- if (iter.type() == relocInfo::entry_guard_type) {
- entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
- _guard_addr = reinterpret_cast(reloc->addr());
- return;
- }
- }
- ShouldNotReachHere();
+ _instruction_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
+ if (nm->is_compiled_by_c2()) {
+ // With c2 compiled code, the guard is out-of-line in a stub
+ // We find it using the RelocIterator.
+ RelocIterator iter(nm);
+ while (iter.next()) {
+ if (iter.type() == relocInfo::entry_guard_type) {
+ entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
+ _guard_addr = reinterpret_cast(reloc->addr());
+ return;
}
- _guard_addr = reinterpret_cast(instruction_address() + local_guard_offset(nm));
}
+
+ ShouldNotReachHere();
+ }
+ _guard_addr = reinterpret_cast(instruction_address() + local_guard_offset(nm));
+
+ // Perform the checking as verification.
+ err_msg msg("%s", "");
+ assert(check_barrier(msg), "%s", msg.buffer());
}
int get_value() {
@@ -128,10 +116,6 @@ public:
}
bool check_barrier(err_msg& msg) const;
- void verify() const {
- err_msg msg("%s", "");
- assert(check_barrier(msg), "%s", msg.buffer());
- }
};
// Store the instruction bitmask, bits and name for checking the barrier.
@@ -142,8 +126,8 @@ struct CheckInsn {
};
static const struct CheckInsn barrierInsn[] = {
- { 0x00000fff, 0x00000297, "auipc t0, 0 "},
- { 0x000fffff, 0x0002e283, "lwu t0, guard_offset(t0) "},
+ { 0x00000fff, 0x00000297, "auipc t0, 0 " },
+ { 0x000fffff, 0x0002e283, "lwu t0, guard_offset(t0)" },
/* ...... */
/* ...... */
/* guard: */
@@ -155,10 +139,11 @@ static const struct CheckInsn barrierInsn[] = {
// register numbers and immediate values in the encoding.
bool NativeNMethodBarrier::check_barrier(err_msg& msg) const {
address addr = instruction_address();
- for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
+ for (unsigned int i = 0; i < sizeof(barrierInsn) / sizeof(struct CheckInsn); i++) {
uint32_t inst = Assembler::ld_instr(addr);
if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
- msg.print("Addr: " INTPTR_FORMAT " Code: 0x%x not an %s instruction", p2i(addr), inst, barrierInsn[i].name);
+ msg.print("Nmethod entry barrier did not start with auipc & lwu as expected. "
+ "Addr: " INTPTR_FORMAT " Code: 0x%x not an %s instruction.", p2i(addr), inst, barrierInsn[i].name);
return false;
}
addr += 4;
@@ -234,10 +219,3 @@ int BarrierSetNMethod::guard_value(nmethod* nm) {
NativeNMethodBarrier barrier(nm);
return barrier.get_value();
}
-
-#if INCLUDE_JVMCI
-bool BarrierSetNMethod::verify_barrier(nmethod* nm, err_msg& msg) {
- NativeNMethodBarrier barrier(nm);
- return barrier.check_barrier(msg);
-}
-#endif
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
deleted file mode 100644
index 11c4e5dc81b..00000000000
--- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "c1/c1_LIRAssembler.hpp"
-#include "c1/c1_MacroAssembler.hpp"
-#include "gc/shared/gc_globals.hpp"
-#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-
-#define __ masm->masm()->
-
-void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
- Register addr = _addr->as_register_lo();
- Register newval = _new_value->as_register();
- Register cmpval = _cmp_value->as_register();
- Register tmp1 = _tmp1->as_register();
- Register tmp2 = _tmp2->as_register();
- Register result = result_opr()->as_register();
-
- if (UseCompressedOops) {
- __ encode_heap_oop(tmp1, cmpval);
- cmpval = tmp1;
- __ encode_heap_oop(tmp2, newval);
- newval = tmp2;
- }
-
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
- /* release */ Assembler::rl, /* is_cae */ false, result);
-}
-
-#undef __
-
-#ifdef ASSERT
-#define __ gen->lir(__FILE__, __LINE__)->
-#else
-#define __ gen->lir()->
-#endif
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
- BasicType bt = access.type();
- if (access.is_oop()) {
- LIRGenerator *gen = access.gen();
- if (ShenandoahSATBBarrier) {
- pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
- LIR_OprFact::illegalOpr /* pre_val */);
- }
- if (ShenandoahCASBarrier) {
- cmp_value.load_item();
- new_value.load_item();
-
- LIR_Opr tmp1 = gen->new_register(T_OBJECT);
- LIR_Opr tmp2 = gen->new_register(T_OBJECT);
- LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
- LIR_Opr result = gen->new_register(T_INT);
-
- __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
-
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), new_value.result());
- }
- return result;
- }
- }
-
- return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
-}
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
- LIRGenerator* gen = access.gen();
- BasicType type = access.type();
-
- LIR_Opr result = gen->new_register(type);
- value.load_item();
- LIR_Opr value_opr = value.result();
-
- assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
- LIR_Opr tmp = gen->new_register(T_INT);
- __ xchg(access.resolved_addr(), value_opr, result, tmp);
-
- if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
- LIR_Opr tmp_opr = gen->new_register(type);
- __ move(result, tmp_opr);
- result = tmp_opr;
- if (ShenandoahSATBBarrier) {
- pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
- result /* pre_val */);
- }
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), result);
- }
- }
-
- return result;
-}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
index 156c6b9e5a8..ee8ff1b308f 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -42,6 +42,10 @@
#include "c1/c1_MacroAssembler.hpp"
#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
#endif
+#ifdef COMPILER2
+#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
+#include "opto/output.hpp"
+#endif
#define __ masm->
@@ -768,3 +772,321 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
+
+#ifdef COMPILER2
+
+#undef __
+#define __ masm->
+
+void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow) {
+ // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
+ if (is_narrow) {
+ __ lwu(dst, src);
+ } else {
+ __ ld(dst, src);
+ }
+
+ ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
+}
+
+void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
+ Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3) {
+
+ ShenandoahBarrierStubC2::store_pre(masm, node, tmp1, dst, tmp2, tmp3, dst_narrow);
+
+ // Do the actual store
+ if (dst_narrow) {
+ if (!src_narrow) {
+ // Need to encode into tmp, because we cannot clobber src.
+ assert(tmp1 != noreg, "need temp register");
+ if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
+ __ encode_heap_oop(tmp1, src);
+ } else {
+ __ encode_heap_oop_not_null(tmp1, src);
+ }
+ src = tmp1;
+ }
+ __ sw(src, dst);
+ } else {
+ __ sd(src, dst);
+ }
+
+ ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
+}
+
+void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
+ Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool is_acquire) {
+ const Assembler::Aqrl acquire = is_acquire ? Assembler::aq : Assembler::relaxed;
+ const Assembler::Aqrl release = Assembler::rl;
+ const Assembler::operand_size size = narrow ? Assembler::uint32 : Assembler::int64;
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, Address(addr), tmp2, tmp3, narrow);
+
+ // CAS!
+ __ cmpxchg(addr, oldval, newval, size, acquire, release, /* result */ res, !exchange /* result_as_bool */);
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
+}
+
+void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
+ Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
+ const bool is_narrow = node->bottom_type()->isa_narrowoop();
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp1, Address(addr, 0), tmp2, tmp3, is_narrow);
+
+ if (is_narrow) {
+ if (is_acquire) {
+ __ atomic_xchgalwu(preval, newval, addr);
+ } else {
+ __ atomic_xchgwu(preval, newval, addr);
+ }
+ } else {
+ if (is_acquire) {
+ __ atomic_xchgal(preval, newval, addr);
+ } else {
+ __ atomic_xchg(preval, newval, addr);
+ }
+ }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
+}
+
+#undef __
+#define __ masm.
+
+void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
+ assert(CardTable::dirty_card_val() == 0, "must be");
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+
+ // tmp1 = card table base (holder)
+ Address curr_ct_holder_addr(xthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
+ __ ld(tmp1, curr_ct_holder_addr);
+
+ // tmp1 = effective address
+ __ la(tmp2, address);
+
+ // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
+ __ srli(tmp2, tmp2, CardTable::card_shift());
+ __ add(tmp2, tmp2, tmp1);
+
+ if (UseCondCardMark) {
+ Label L_already_dirty;
+ __ lbu(tmp1, Address(tmp2));
+ __ beqz(tmp1, L_already_dirty);
+ __ sb(zr, Address(tmp2));
+ __ bind(L_already_dirty);
+ } else {
+ __ sb(zr, Address(tmp2));
+ }
+}
+
+void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+
+ Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
+ __ lbu(tmp, gc_state_fast);
+ __ beqz(tmp, *continuation());
+ __ j(*entry());
+
+ // This is were the slowpath stub will return to or the code above will
+ // jump to if the checks are false
+ __ bind(*continuation());
+}
+
+void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
+
+ __ bind(*entry());
+
+ // If we need to load ourselves, do it here.
+ if (_do_load) {
+ if (_narrow) {
+ __ lwu(_obj, _addr);
+ } else {
+ __ ld(_obj, _addr);
+ }
+ }
+
+ // If the object is null, there is no point in applying barriers.
+ maybe_far_jump_if_zero(masm, _obj);
+
+ // We need to make sure that loads done by callers survive across slow-path calls.
+ // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
+ bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
+ if (!_do_load || needs_both_barriers) {
+ preserve(_obj);
+ }
+
+ // Go for barriers. Barriers can return straight to continuation, as long
+ // as another barrier is not needed and we can reach the fastpath.
+ if (needs_both_barriers) {
+ keepalive(masm, nullptr);
+ lrb(masm);
+ } else if (_needs_keep_alive_barrier) {
+ keepalive(masm, continuation());
+ } else if (_needs_load_ref_barrier) {
+ lrb(masm);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
+ Label L_short_jump;
+ __ bnez(reg, L_short_jump);
+ __ j(*continuation());
+ __ bind(L_short_jump);
+}
+
+void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
+ Address index(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(xthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+ Label L_through, L_slowpath;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_load_ref_barrier) {
+ assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
+ Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
+ __ lbu(_tmp1, gc_state_fast);
+ __ beqz(_tmp1, L_through);
+ }
+
+ // Fast-path: put object into buffer.
+ // If buffer is already full, go slow.
+ __ ld(_tmp1, index);
+ __ beqz(_tmp1, L_slowpath);
+ __ subi(_tmp1, _tmp1, wordSize);
+ __ sd(_tmp1, index);
+ __ ld(_tmp2, buffer);
+
+ // Store the object in queue.
+ // If object is narrow, we need to decode it before inserting.
+ __ add(_tmp1, _tmp1, _tmp2);
+ if (_narrow) {
+ __ decode_heap_oop_not_null(_tmp2, _obj);
+ __ sd(_tmp2, Address(_tmp1));
+ } else {
+ __ sd(_obj, Address(_tmp1));
+ }
+
+ // Fast-path exits here.
+ if (L_done != nullptr) {
+ __ j(*L_done);
+ } else {
+ __ j(L_through);
+ }
+
+ // Slow-path: call runtime to handle.
+ __ bind(L_slowpath);
+
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Go to runtime and handle the rest there.
+ __ mv(c_rarg0, _obj);
+ __ la(ra, RuntimeAddress(keepalive_runtime_entry_addr()));
+ __ jalr(ra);
+ }
+ if (L_done != nullptr) {
+ __ j(*L_done);
+ } else {
+ __ bind(L_through);
+ }
+}
+
+void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
+ Label L_slow;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_keep_alive_barrier) {
+ char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
+ Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
+ __ lbu(_tmp1, gc_state_fast);
+ maybe_far_jump_if_zero(masm, _tmp1);
+ }
+
+ // If weak references are being processed, weak/phantom loads need to go slow,
+ // regardless of their cset status.
+ if (_needs_load_ref_weak_barrier) {
+ Address gc_state_fast(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
+ __ lbu(_tmp1, gc_state_fast);
+ __ bnez(_tmp1, L_slow);
+ }
+
+ // Cset-check. Fall-through to slow if in collection set.
+ if (_narrow) {
+ __ decode_heap_oop_not_null(_tmp2, _obj);
+ } else {
+ __ mv(_tmp2, _obj);
+ }
+
+ __ mv(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
+ __ srli(_tmp2, _tmp2, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ add(_tmp1, _tmp1, _tmp2);
+ __ lbu(_tmp1, Address(_tmp1, 0));
+ maybe_far_jump_if_zero(masm, _tmp1);
+
+ // Slow path
+ __ bind(L_slow);
+
+ // Obj is the result, need to temporarily stop preserving it.
+ bool is_obj_preserved = is_preserved(_obj);
+ if (is_obj_preserved) {
+ dont_preserve(_obj);
+ }
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Shuffle in the arguments. The end result should be:
+ // c_rarg0 <- obj
+ // c_rarg1 <- lea(addr)
+ if (c_rarg0 == _obj) {
+ __ la(c_rarg1, _addr);
+ } else if (c_rarg1 == _obj) {
+ // Set up arguments in reverse, and then flip them
+ __ la(c_rarg0, _addr);
+ // flip them
+ __ mv(_tmp1, c_rarg0);
+ __ mv(c_rarg0, c_rarg1);
+ __ mv(c_rarg1, _tmp1);
+ } else {
+ assert_different_registers(c_rarg1, _obj);
+ __ la(c_rarg1, _addr);
+ __ mv(c_rarg0, _obj);
+ }
+
+ // Go to runtime and handle the rest there.
+ __ la(ra, RuntimeAddress(lrb_runtime_entry_addr()));
+ __ jalr(ra);
+
+ // Save the result where needed. Narrow entries return narrowOop (32 bits)
+ // we need to zero the upper 32 bits of x10.
+ if (_narrow) {
+ __ zext_w(_obj, x10);
+ } else {
+ __ mv(_obj, x10);
+ }
+ }
+ if (is_obj_preserved) {
+ preserve(_obj);
+ }
+
+ __ j(*continuation());
+}
+
+int ShenandoahBarrierStubC2::available_gp_registers() {
+ Unimplemented(); // Not used
+ return 0;
+}
+
+bool ShenandoahBarrierStubC2::is_special_register(Register r) {
+ Unimplemented(); // Not used
+ return true;
+}
+
+void ShenandoahBarrierStubC2::post_init() {
+ // Do nothing.
+}
+
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
index eed20e00a65..d1260eac57b 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
@@ -37,6 +37,9 @@ class ShenandoahPreBarrierStub;
class ShenandoahLoadReferenceBarrierStub;
class StubAssembler;
#endif
+#ifdef COMPILER2
+class MachNode;
+#endif // COMPILER2
class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
@@ -63,13 +66,6 @@ public:
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_instruction_and_data_patch; }
-#ifdef COMPILER1
- void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
- void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
- void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-#endif
-
virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register src, Register dst, Register count, RegSet saved_regs);
@@ -87,6 +83,24 @@ public:
Register tmp, Label& slow_path);
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
+
+#ifdef COMPILER1
+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
+#endif
+
+#ifdef COMPILER2
+ // Entry points from Matcher
+ void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address addr, Register tmp1, Register tmp2, bool is_narrow);
+ void store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow, Register src,
+ bool src_narrow, Register tmp1, Register tmp2, Register tmp3);
+ void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval,
+ Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool is_acquire);
+ void get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval, Register newval,
+ Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire);
+#endif
};
#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad
index 81bcd3c1362..2af0ea31e66 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad
@@ -23,263 +23,438 @@
//
//
-source_hpp %{
+source %{
#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp"
%}
-instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
- effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
- %}
-
+instruct load_P_shenandoah(iRegPNoSp dst, memory mem, rFlagsReg cr)
+%{
+ match(Set dst (LoadP mem));
+ predicate(UseShenandoahGC && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "ld $dst, $mem\t# ptr, shenandoah late-barrier, #@loadP_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::relaxed /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm, as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp), t0, t1, false);
%}
-
- ins_pipe(pipe_slow);
+ ins_cost(3 * LOAD_COST);
+ ins_pipe(iload_reg_mem);
%}
-instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
- effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
- %}
-
+instruct load_N_shenandoah(iRegNNoSp dst, memory mem, rFlagsReg cr)
+%{
+ match(Set dst (LoadN mem));
+ predicate(UseShenandoahGC && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "lwu $dst, $mem\t# compressed ptr, shenandoah late-barrier, #@loadN_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::relaxed /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm, as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp), t0, t1, true);
%}
-
- ins_pipe(pipe_slow);
+ ins_cost(3 * LOAD_COST);
+ ins_pipe(iload_reg_mem);
%}
-instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_reserved(n));
- match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
+instruct storeP_shenandoah(iRegP src, memory mem, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreP mem src));
+ predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0);
effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
- %}
-
+ format %{ "sd $src, $mem\t# ptr, shenandoah late-barrier, #@storeP_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::aq /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ Address(as_Register($mem$$base), $mem$$disp), /* dst_narrow = */ false,
+ as_Register($src$$reg), /* src_narrow = */ false,
+ as_Register($tmp$$reg),
+ t0,
+ t1);
%}
-
- ins_pipe(pipe_slow);
+ ins_cost(3 * STORE_COST);
+ ins_pipe(istore_reg_mem);
%}
-instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_reserved(n));
- match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
+instruct storeN_shenandoah(iRegN src, memory mem, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem src));
+ predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0);
effect(TEMP tmp, KILL cr);
-
- format %{
- "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
- %}
-
+ format %{ "sw $src, $mem\t# compressed ptr, shenandoah late-barrier, #@storeN_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::aq /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ Address(as_Register($mem$$base), $mem$$disp), /* dst_narrow = */ true,
+ as_Register($src$$reg), /* src_narrow = */ true,
+ as_Register($tmp$$reg),
+ t0,
+ t1);
%}
-
- ins_pipe(pipe_slow);
+ ins_cost(3 * STORE_COST);
+ ins_pipe(istore_reg_mem);
%}
-instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
+instruct encodePAndStoreN_shenandoah(iRegP src, memory mem, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem (EncodeP src)));
+ predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{
+ "encode_heap_oop $tmp, $src\n\t"
+ "sw $tmp, $mem\t# compressed ptr, shenandoah late-barrier, #@encodePAndStoreN_shenandoah"
+ %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ Address(as_Register($mem$$base), $mem$$disp), /* dst_narrow = */ true,
+ as_Register($src$$reg), /* src_narrow = */ false,
+ as_Register($tmp$$reg),
+ t0,
+ t1);
+ %}
+ ins_cost(4 * STORE_COST);
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct compareAndSwap_P_A_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
effect(TEMP_DEF res, TEMP tmp, KILL cr);
-
- format %{
- "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
- %}
-
+ format %{ "cmpxchg_P_A_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval, #@compareAndSwap_P_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::relaxed /* acquire */, Assembler::rl /* release */,
- true /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ false,
+ /* acquire */ true);
%}
-
+ ins_cost(10 * DEFAULT_COST);
ins_pipe(pipe_slow);
%}
-instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
+instruct compareAndSwap_P_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
effect(TEMP_DEF res, TEMP tmp, KILL cr);
- format %{
- "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
- %}
-
+ format %{ "cmpxchg_P_N_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval, #@compareAndSwap_P_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::relaxed /* acquire */, Assembler::rl /* release */,
- true /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ false,
+ /* acquire */ false);
%}
-
+ ins_cost(10 * DEFAULT_COST);
ins_pipe(pipe_slow);
%}
-instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
- effect(TEMP tmp, KILL cr);
- format %{
- "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
- %}
-
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::relaxed /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
- %}
-
- ins_pipe(pipe_slow);
-%}
-
-instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_reserved(n));
- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
+instruct compareAndSwap_N_A_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
effect(TEMP_DEF res, TEMP tmp, KILL cr);
- format %{
- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
- %}
-
+ format %{ "cmpxchg_N_shenandoah $mem, $oldval, $newval\t# (narrow ptr) if $mem == $oldval then $mem <-- $newval, #@compareAndSwap_N_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register);
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::aq /* acquire */, Assembler::rl /* release */,
- true /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ true,
+ /* acquire */ true);
%}
-
+ ins_cost(10 * DEFAULT_COST);
ins_pipe(pipe_slow);
%}
-instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_reserved(n));
- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
- ins_cost(10 * DEFAULT_COST);
-
+instruct compareAndSwap_N_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
effect(TEMP_DEF res, TEMP tmp, KILL cr);
- format %{
- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
- %}
-
+ format %{ "cmpxchg_N_shenandoah $mem, $oldval, $newval\t# (narrow ptr) if $mem == $oldval then $mem <-- $newval, #@compareAndSwap_N_shenandoah" %}
ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register);
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::aq /* acquire */, Assembler::rl /* release */,
- true /* is_cae */, $res$$Register);
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ true,
+ /* acquire */ false);
%}
-
- ins_pipe(pipe_slow);
-%}
-
-instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
ins_cost(10 * DEFAULT_COST);
-
- effect(TEMP tmp, KILL cr);
- format %{
- "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
- %}
-
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::relaxed /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
- %}
-
ins_pipe(pipe_slow);
%}
-instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_reserved(n));
- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
+instruct compareAndExchange_N_A_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_N_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval, #@compareAndExchange_N_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ true,
+ /* is_narrow */ true,
+ /* acquire */ true);
+ %}
ins_cost(10 * DEFAULT_COST);
-
- effect(TEMP tmp, KILL cr);
- format %{
- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
- %}
-
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::aq /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
- %}
-
ins_pipe(pipe_slow);
%}
-instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
- predicate(needs_acquiring_load_reserved(n));
- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
+instruct compareAndExchange_N_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_N_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval, #@compareAndExchange_N_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ true,
+ /* is_narrow */ true,
+ /* acquire */ false);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchange_P_A_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_P_shenandoah $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval, #@compareAndExchange_P_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ true,
+ /* is_narrow */ false,
+ /* acquire */ true);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchange_P_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_P_shenandoah $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval, #@compareAndExchange_P_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ true,
+ /* is_narrow */ false,
+ /* acquire */ false);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwap_N_A_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_N_weak_shenandoah $res = $mem, $oldval, $newval\t# (N, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwap_N_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ true,
+ /* acquire */ true);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwap_N_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_N_weak_shenandoah $res = $mem, $oldval, $newval\t# (N, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwap_N_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ true,
+ /* acquire */ false);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwap_P_A_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_P_weak_shenandoah $res = $mem, $oldval, $newval\t# (P, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwap_P_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ false,
+ /* acquire */ true);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwap_P_shenandoah(iRegINoSp res, indirect mem, iRegPNoSp oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
+ format %{ "cae_P_weak_shenandoah $res = $mem, $oldval, $newval\t# (P, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwap_P_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ as_Register($res$$reg),
+ as_Register($mem$$base),
+ as_Register($oldval$$reg),
+ as_Register($newval$$reg),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* exchange */ false,
+ /* is_narrow */ false,
+ /* acquire */ false);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct getAndSet_P_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetP mem newval));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "get_and_set_P $preval, $newval, [$mem], #@getAndSet_P_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ as_Register($preval$$reg),
+ as_Register($newval$$reg),
+ as_Register($mem$$base),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* acquire */ false);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct getAndSet_P_A_shenandoah(indirect mem, iRegP newval, iRegPNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetP mem newval));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "get_and_set_P $preval, $newval, [$mem], #@getAndSet_P_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ as_Register($preval$$reg),
+ as_Register($newval$$reg),
+ as_Register($mem$$base),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* acquire */ true);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct getAndSet_N_A_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetN mem newval));
+ predicate(UseShenandoahGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "get_and_set_N $preval, $newval, [$mem], #@getAndSet_N_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ as_Register($preval$$reg),
+ as_Register($newval$$reg),
+ as_Register($mem$$base),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* acquire */ true);
+ %}
+ ins_cost(10 * DEFAULT_COST);
+ ins_pipe(pipe_slow);
+%}
+
+instruct getAndSet_N_shenandoah(indirect mem, iRegN newval, iRegNNoSp preval, iRegPNoSp tmp, rFlagsReg cr)
+%{
+ match(Set preval (GetAndSetN mem newval));
+ predicate(UseShenandoahGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF preval, TEMP tmp, KILL cr);
+ format %{ "get_and_set_N $preval, $newval, [$mem], #@getAndSet_N_shenandoah" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ as_Register($preval$$reg),
+ as_Register($newval$$reg),
+ as_Register($mem$$base),
+ as_Register($tmp$$reg),
+ t0,
+ t1,
+ /* acquire */ false);
+ %}
ins_cost(10 * DEFAULT_COST);
-
- effect(TEMP tmp, KILL cr);
- format %{
- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
- %}
-
- ins_encode %{
- Register tmp = $tmp$$Register;
- __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
- Assembler::aq /* acquire */, Assembler::rl /* release */,
- false /* is_cae */, $res$$Register);
- %}
-
ins_pipe(pipe_slow);
%}
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
index 21b119266e2..18ca120bdfa 100644
--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im
define_pd_global(bool, TrapBasedNullChecks, false);
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls past to check cast
-define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
+define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_PRESENT(true) NOT_COMPILER2(false));
define_pd_global(size_t, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
define_pd_global(uint, CodeEntryAlignment, 64);
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
index 804c2072ba5..bb56acb3f38 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -1220,7 +1220,7 @@ void InterpreterMacroAssembler::notify_method_exit(
// Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
// track stack depth. If it is possible to enter interp_only_mode we add
// the code to check if the event should be sent.
- if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+ if (mode == NotifyJVMTI && (JvmtiExport::can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
Label L;
// Note: frame::interpreter_frame_result has a dependency on how the
// method result is saved across the call to post_method_exit. If this
@@ -1229,8 +1229,15 @@ void InterpreterMacroAssembler::notify_method_exit(
// template interpreter will leave the result on the top of the stack.
push(state);
- lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
- beqz(x13, L);
+
+ ld(t1, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+ beqz(t1, L); // if (thread->jvmti_thread_state() == nullptr) exit;
+
+ lwu(t1, Address(t1, JvmtiThreadState::frame_pop_cnt_offset()));
+ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
+ orr(t0, t0, t1);
+ beqz(t0, L);
+
call_VM(noreg,
CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
bind(L);
diff --git a/src/hotspot/cpu/riscv/jvmciCodeInstaller_riscv.cpp b/src/hotspot/cpu/riscv/jvmciCodeInstaller_riscv.cpp
deleted file mode 100644
index cbe387eed14..00000000000
--- a/src/hotspot/cpu/riscv/jvmciCodeInstaller_riscv.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.hpp"
-#include "jvmci/jvmci.hpp"
-#include "jvmci/jvmciCodeInstaller.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#include "jvmci/jvmciCompilerToVM.hpp"
-#include "jvmci/jvmciJavaClasses.hpp"
-#include "oops/compressedKlass.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/jniHandles.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "vmreg_riscv.inline.hpp"
-
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMCI_TRAPS) {
- address pc = (address) inst;
- if (inst->is_call()) {
- return pc_offset + NativeCall::byte_size();
- } else if (inst->is_jump()) {
- return pc_offset + NativeJump::instruction_size;
- } else if (inst->is_movptr1()) {
- return pc_offset + NativeMovConstReg::movptr1_instruction_size;
- } else if (inst->is_movptr2()) {
- return pc_offset + NativeMovConstReg::movptr2_instruction_size;
- } else {
- JVMCI_ERROR_0("unsupported type of instruction for call site");
- }
-}
-
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& obj, bool compressed, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- jobject value = JNIHandles::make_local(obj());
- MacroAssembler::patch_oop(pc, cast_from_oop(obj()));
- int oop_index = _oop_recorder->find_index(value);
- RelocationHolder rspec = oop_Relocation::spec(oop_index);
- _instructions->relocate(pc, rspec);
-}
-
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, HotSpotCompiledCodeStream* stream, u1 tag, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- if (tag == PATCH_NARROW_KLASS) {
- narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, stream, tag, JVMCI_CHECK);
- MacroAssembler::pd_patch_instruction_size(pc, (address) (long) narrowOop);
- JVMCI_event_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
- } else {
- NativeMovConstReg* move = nativeMovConstReg_at(pc);
- void* reference = record_metadata_reference(_instructions, pc, stream, tag, JVMCI_CHECK);
- move->set_data((intptr_t) reference);
- JVMCI_event_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
- }
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- address dest = _constants->start() + data_offset;
- _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
- JVMCI_event_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
-}
-
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, JVMCI_TRAPS) {
- address pc = (address) inst;
- if (inst->is_jal()) {
- NativeCall* call = nativeCall_at(pc);
- call->set_destination((address) foreign_call_destination);
- _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
- } else if (inst->is_jump()) {
- NativeJump* jump = nativeJump_at(pc);
- jump->set_jump_destination((address) foreign_call_destination);
- _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
- } else if (inst->is_movptr()) {
- NativeMovConstReg* movptr = nativeMovConstReg_at(pc);
- movptr->set_data((intptr_t) foreign_call_destination);
- _instructions->relocate(movptr->instruction_address(), runtime_call_Relocation::spec());
- } else {
- JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
- }
- JVMCI_event_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
-}
-
-void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, methodHandle& method, jint pc_offset, JVMCI_TRAPS) {
- Unimplemented();
-}
-
-bool CodeInstaller::pd_relocate(address pc, jint mark) {
- Unimplemented();
- return false;
-}
-
-// convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, JVMCI_TRAPS) {
- if (jvmci_reg < Register::number_of_registers) {
- return as_Register(jvmci_reg)->as_VMReg();
- } else {
- jint floatRegisterNumber = jvmci_reg - Register::number_of_registers;
- if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegister::number_of_registers) {
- return as_FloatRegister(floatRegisterNumber)->as_VMReg();
- }
- JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
- }
-}
-
-bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
- return !(hotspotRegister->is_FloatRegister() || hotspotRegister->is_VectorRegister());
-}
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index 0e32c602d95..7b3e034906e 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -593,7 +593,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
Register offset = t1;
Label L_loop_search_receiver, L_loop_search_empty;
- Label L_restart, L_found_recv, L_found_empty, L_polymorphic, L_count_update;
+ Label L_restart, L_found_recv, L_found_empty, L_count_update;
// The code here recognizes three major cases:
// A. Fastest: receiver found in the table
@@ -623,21 +623,20 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
// if (receiver(i) == recv) goto found_recv(i);
// }
//
- // // Fast: no receiver, but profile is full
+ // // Fast: no receiver, but profile is not full
// for (i = 0; i < receiver_count(); i++) {
// if (receiver(i) == null) goto found_null(i);
// }
- // goto polymorphic
+ //
+ // // Slow: profile is full, polymorphic case
+ // count++;
+ // return
//
// // Slow: try to install receiver
// found_null(i):
// CAS(&receiver(i), null, recv);
// goto restart
//
- // polymorphic:
- // count++;
- // return
- //
// found_recv(i):
// *receiver_count(i)++
//
@@ -654,7 +653,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
sub(t0, offset, end_receiver_offset);
bnez(t0, L_loop_search_receiver);
- // Fast: no receiver, but profile is full
+ // Fast: no receiver, but profile is not full
mv(offset, base_receiver_offset);
bind(L_loop_search_empty);
add(t0, mdp, offset);
@@ -663,9 +662,13 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
add(offset, offset, receiver_step);
sub(t0, offset, end_receiver_offset);
bnez(t0, L_loop_search_empty);
- j(L_polymorphic);
- // Slow: try to install receiver
+ // Slow: Receiver is not found and table is full.
+ // Increment polymorphic counter instead of receiver slot.
+ mv(offset, poly_count_offset);
+ j(L_count_update);
+
+ // Slowest: try to install receiver
bind(L_found_empty);
// Atomically swing receiver slot: null -> recv.
@@ -683,16 +686,11 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
// and just restart the search from the beginning.
j(L_restart);
- // Counter updates:
- // Increment polymorphic counter instead of receiver slot.
- bind(L_polymorphic);
- mv(offset, poly_count_offset);
- j(L_count_update);
-
// Found a receiver, convert its slot offset to corresponding count offset.
bind(L_found_recv);
add(offset, offset, receiver_to_count_step);
+ // Finally, update the counter
bind(L_count_update);
add(t1, mdp, offset);
increment(Address(t1), DataLayout::counter_increment);
@@ -4117,7 +4115,7 @@ void MacroAssembler::membar(uint32_t order_constraint) {
}
address prev = pc() - MacroAssembler::instruction_size;
- address last = code()->last_insn();
+ address last = code()->last_merge_candidate();
if (last != nullptr && is_membar(last) && prev == last) {
// We are merging two memory barrier instructions. On RISCV we
@@ -4127,7 +4125,7 @@ void MacroAssembler::membar(uint32_t order_constraint) {
return;
}
- code()->set_last_insn(pc());
+ code()->set_last_merge_candidate(pc());
uint32_t predecessor = 0;
uint32_t successor = 0;
membar_mask_to_pred_succ(order_constraint, predecessor, successor);
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index 4cc55e7ae23..6e592b5c852 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -876,7 +876,7 @@ public:
void bind(Label& L) {
Assembler::bind(L);
// fences across basic blocks should not be merged
- code()->clear_last_insn();
+ code()->clear_last_merge_candidate();
}
typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index e615b946b44..0c077dc84a3 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1146,8 +1146,6 @@ bool is_CAS(int opcode, bool maybe_volatile)
case Op_CompareAndSwapL:
case Op_CompareAndSwapP:
case Op_CompareAndSwapN:
- case Op_ShenandoahCompareAndSwapP:
- case Op_ShenandoahCompareAndSwapN:
case Op_CompareAndSwapB:
case Op_CompareAndSwapS:
case Op_GetAndSetI:
@@ -1169,10 +1167,6 @@ bool is_CAS(int opcode, bool maybe_volatile)
case Op_WeakCompareAndSwapL:
case Op_WeakCompareAndSwapP:
case Op_WeakCompareAndSwapN:
- case Op_ShenandoahWeakCompareAndSwapP:
- case Op_ShenandoahWeakCompareAndSwapN:
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN:
return maybe_volatile;
default:
return false;
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 8c343f6ab2b..9ad6663d0fa 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -59,9 +59,6 @@
#include "adfiles/ad_riscv.hpp"
#include "opto/runtime.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciJavaClasses.hpp"
-#endif
#define __ masm->
@@ -202,11 +199,9 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
#ifdef COMPILER2
__ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
#else
-#if !INCLUDE_JVMCI
- assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
-#endif
+ assert(!_save_vectors, "vectors are generated only by C2");
__ pop_CPU_state(_save_vectors);
-#endif
+#endif // COMPILER2
__ leave();
}
@@ -492,18 +487,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
// Pre-load the register-jump target early, to schedule it better.
__ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- // check if this call should be routed towards a specific entry point
- __ ld(t0, Address(xthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
- Label no_alternative_target;
- __ beqz(t0, no_alternative_target);
- __ mv(t1, t0);
- __ sd(zr, Address(xthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
- __ bind(no_alternative_target);
- }
-#endif // INCLUDE_JVMCI
-
// Now generate the shuffle code.
for (int i = 0; i < total_args_passed; i++) {
if (sig_bt[i] == T_VOID) {
@@ -1995,11 +1978,6 @@ void SharedRuntime::generate_deopt_blob() {
ResourceMark rm;
// Setup code generation tools
int pad = 0;
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- pad += 512; // Increase the buffer size when compiling for JVMCI
- }
-#endif
const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
CodeBuffer buffer(name, 2048 + pad, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
@@ -2007,7 +1985,7 @@ void SharedRuntime::generate_deopt_blob() {
OopMap* map = nullptr;
OopMapSet *oop_maps = new OopMapSet();
assert_cond(masm != nullptr && oop_maps != nullptr);
- RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
+ RegisterSaver reg_saver(COMPILER2_PRESENT(true) NOT_COMPILER2(false));
// -------------
// This code enters when returning to a de-optimized nmethod. A return
@@ -2052,13 +2030,6 @@ void SharedRuntime::generate_deopt_blob() {
__ j(cont);
int reexecute_offset = __ pc() - start;
-#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (UseJVMCICompiler) {
- // JVMCI does not use this kind of deoptimization
- __ should_not_reach_here();
- }
-#endif
-
// Reexecute case
// return address is the pc describes what bci to do re-execute at
@@ -2068,42 +2039,6 @@ void SharedRuntime::generate_deopt_blob() {
__ mv(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
__ j(cont);
-#if INCLUDE_JVMCI
- Label after_fetch_unroll_info_call;
- int implicit_exception_uncommon_trap_offset = 0;
- int uncommon_trap_offset = 0;
-
- if (EnableJVMCI) {
- implicit_exception_uncommon_trap_offset = __ pc() - start;
-
- __ ld(ra, Address(xthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
- __ sd(zr, Address(xthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
-
- uncommon_trap_offset = __ pc() - start;
-
- // Save everything in sight.
- reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
- // fetch_unroll_info needs to call last_java_frame()
- Label retaddr;
- __ set_last_Java_frame(sp, noreg, retaddr, t0);
-
- __ lw(c_rarg1, Address(xthread, in_bytes(JavaThread::pending_deoptimization_offset())));
- __ mv(t0, -1);
- __ sw(t0, Address(xthread, in_bytes(JavaThread::pending_deoptimization_offset())));
-
- __ mv(xcpool, Deoptimization::Unpack_reexecute);
- __ mv(c_rarg0, xthread);
- __ orrw(c_rarg2, zr, xcpool); // exec mode
- __ rt_call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
- __ bind(retaddr);
- oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
-
- __ reset_last_Java_frame(false);
-
- __ j(after_fetch_unroll_info_call);
- } // EnableJVMCI
-#endif // INCLUDE_JVMCI
-
int exception_offset = __ pc() - start;
// Prolog for exception case
@@ -2195,12 +2130,6 @@ void SharedRuntime::generate_deopt_blob() {
__ reset_last_Java_frame(false);
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- __ bind(after_fetch_unroll_info_call);
- }
-#endif
-
// Load UnrollBlock* into x15
__ mv(x15, x10);
@@ -2354,12 +2283,6 @@ void SharedRuntime::generate_deopt_blob() {
_deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
assert(_deopt_blob != nullptr, "create deoptimization blob fail!");
_deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
- _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
- }
-#endif
}
// Number of stack slots between incoming argument block and the start of
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 4656b5c0d41..13f3ed4de89 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -4754,7 +4754,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
#undef __
#define __ this->
@@ -6782,7 +6782,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
// x10 = input (float16)
// f10 = result (float)
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
index ec67a338052..b230c2657de 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -49,9 +49,6 @@ enum platform_dependent_constants {
class riscv {
friend class StubGenerator;
friend class StubRoutines;
-#if INCLUDE_JVMCI
- friend class JVMCIVMStructs;
-#endif
// declare fields for arch-specific entries
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index e1d8d062c23..38698370faa 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -2884,10 +2884,23 @@ void LIR_Assembler::on_spin_wait() {
}
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
- assert(patch_code == lir_patch_none, "Patch code not supported");
+ assert(addr_opr->is_address(), "must be an address");
+ assert(dest->is_register(), "must be a register");
+
LIR_Address* addr = addr_opr->as_address_ptr();
+ Register reg = dest->as_pointer_register();
assert(addr->scale() == LIR_Address::times_1, "scaling unsupported");
- __ load_address(dest->as_pointer_register(), as_Address(addr));
+
+ if (addr->index()->is_illegal() && patch_code != lir_patch_none) {
+ PatchingStub* patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+
+ // TODO: Use load_const_32to64 here by extending NativeMovRegMem to support both instruction patterns.
+ __ load_const(Z_R0_scratch, (intptr_t)0);
+ __ z_agrk(reg, addr->base()->as_pointer_register(), Z_R0_scratch);
+ patching_epilog(patch, patch_code, addr->base()->as_register(), info);
+ } else {
+ __ load_address(reg, as_Address(addr));
+ }
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
diff --git a/src/hotspot/cpu/s390/globals_s390.hpp b/src/hotspot/cpu/s390/globals_s390.hpp
index d110443adf8..80ed6d1acc8 100644
--- a/src/hotspot/cpu/s390/globals_s390.hpp
+++ b/src/hotspot/cpu/s390/globals_s390.hpp
@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code fo
define_pd_global(bool, TrapBasedNullChecks, true);
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls passed to check cast.
-define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
+define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_PRESENT(true) NOT_COMPILER2(false));
define_pd_global(size_t, CodeCacheSegmentSize, 256);
// This shall be at least 32 for proper branch target alignment.
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index 7327e2a13f2..03c90a499fb 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -2002,14 +2002,10 @@ void InterpreterMacroAssembler::notify_method_exit(bool native_method,
// entry/exit events are sent for that thread to track stack
// depth. If it is possible to enter interp_only_mode we add
// the code to check if the event should be sent.
- if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
- Label jvmti_post_done;
- MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset()));
- z_bre(jvmti_post_done);
+ if (mode == NotifyJVMTI && (JvmtiExport::can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
if (!native_method) push(state); // see frame::interpreter_frame_result()
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
if (!native_method) pop(state);
- bind(jvmti_post_done);
}
}
diff --git a/src/hotspot/cpu/s390/jvmciCodeInstaller_s390.cpp b/src/hotspot/cpu/s390/jvmciCodeInstaller_s390.cpp
deleted file mode 100644
index 4318703ad38..00000000000
--- a/src/hotspot/cpu/s390/jvmciCodeInstaller_s390.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-// JVMCI (JEP 243):
-// So far, the JVMCI is not supported/implemented on SAP platforms.
-// This file just serves as a placeholder which may be filled with life
-// should the JVMCI ever be implemented.
-#if INCLUDE_JVMCI
-
-#include "jvmci/jvmciCodeInstaller.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#include "jvmci/jvmciCompilerToVM.hpp"
-#include "jvmci/jvmciJavaClasses.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "vmreg_s390.inline.hpp"
-
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
- Unimplemented();
- return 0;
-}
-
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
- Unimplemented();
-}
-
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
- Unimplemented();
-}
-
-// Convert JVMCI register indices (as used in oop maps) to HotSpot registers.
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
- return nullptr;
-}
-
-bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
- return false;
-}
-
-#endif // INLCUDE_JVMCI
diff --git a/src/hotspot/cpu/s390/registerMap_s390.cpp b/src/hotspot/cpu/s390/registerMap_s390.cpp
new file mode 100644
index 00000000000..85a49ff1d60
--- /dev/null
+++ b/src/hotspot/cpu/s390/registerMap_s390.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026 IBM Corp. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "runtime/registerMap.hpp"
+
+address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
+ if (base_reg->is_VectorRegister()) {
+ // Not all physical slots belonging to a VectorRegister have corresponding
+ // valid VMReg locations in the RegisterMap.
+ // (See RegisterSaver::save_live_registers.)
+ // However, the slots are always saved to the stack in a contiguous region
+ // of memory so we can calculate the address of the upper slots by
+ // offsetting from the base address.
+ assert(base_reg->is_concrete(), "must pass base reg");
+ address base_location = location(base_reg, nullptr);
+ if (base_location != nullptr) {
+ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
+ return base_location + offset_in_bytes;
+ } else {
+ return nullptr;
+ }
+ } else {
+ return location(base_reg->next(slot_idx), nullptr);
+ }
+}
diff --git a/src/hotspot/cpu/s390/registerMap_s390.hpp b/src/hotspot/cpu/s390/registerMap_s390.hpp
index 827e3b44e04..9069fb1e31d 100644
--- a/src/hotspot/cpu/s390/registerMap_s390.hpp
+++ b/src/hotspot/cpu/s390/registerMap_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -36,9 +36,7 @@
// Since there is none, we just return null.
address pd_location(VMReg reg) const {return nullptr;}
- address pd_location(VMReg base_reg, int slot_idx) const {
- return location(base_reg->next(slot_idx), nullptr);
- }
+ address pd_location(VMReg base_reg, int slot_idx) const;
// No PD state to clear or copy.
void pd_clear() {}
diff --git a/src/hotspot/cpu/s390/registerSaver_s390.hpp b/src/hotspot/cpu/s390/registerSaver_s390.hpp
index a049f8b581b..2d3c35250ba 100644
--- a/src/hotspot/cpu/s390/registerSaver_s390.hpp
+++ b/src/hotspot/cpu/s390/registerSaver_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -58,7 +58,7 @@ class RegisterSaver {
// During deoptimization only the result register need to be restored
// all the other values have already been extracted.
- static void restore_result_registers(MacroAssembler* masm);
+ static void restore_result_registers(MacroAssembler* masm, bool save_vectors);
// Constants and data structures:
diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
index 00a830a80cd..e5a27e66968 100644
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@@ -402,9 +402,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg
break;
}
- // Second set_callee_saved is really a waste but we'll keep things as they were for now
map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
- map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
}
assert(first != noreg, "Should spill at least one int reg.");
__ z_stmg(first, last, first_offset, Z_SP);
@@ -416,12 +414,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg
map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
RegisterSaver_LiveVRegs[i].vmreg);
- map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2),
- RegisterSaver_LiveVRegs[i].vmreg->next());
- map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2),
- RegisterSaver_LiveVRegs[i].vmreg->next(2));
- map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2),
- RegisterSaver_LiveVRegs[i].vmreg->next(3));
}
assert(offset == frame_size_in_bytes, "consistency check");
@@ -473,7 +465,6 @@ OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_se
for (int i = 0; i < regstosave_num; i++) {
if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
- map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
}
offset += reg_size;
}
@@ -580,10 +571,12 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg
// Pop the current frame and restore the registers that might be holding a result.
-void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+void RegisterSaver::restore_result_registers(MacroAssembler* masm, bool save_vectors) {
const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
sizeof(RegisterSaver::LiveRegType);
- const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
+ const int vecregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
+ const int vreg_save_size = vecregstosave_num * v_reg_size;
+ const int register_save_offset = live_reg_frame_size(all_registers, save_vectors) - (live_reg_save_size(all_registers) + vreg_save_size);
// Restore all result registers (ints and floats).
int offset = register_save_offset;
@@ -609,7 +602,7 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
ShouldNotReachHere();
}
}
- assert(offset == live_reg_frame_size(all_registers), "consistency check");
+ assert(offset == live_reg_frame_size(all_registers, save_vectors) - (save_vectors ? vreg_save_size : 0) , "consistency check");
}
// ---------------------------------------------------------------------------
@@ -2557,7 +2550,7 @@ void SharedRuntime::generate_deopt_blob() {
// nmethod that was valid just before the nmethod was deoptimized.
// save R14 into the deoptee frame. the `fetch_unroll_info'
// procedure called below will read it from there.
- map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+ map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, /* save_vectors= */ SuperwordUseVX);
// note the entry point.
__ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
@@ -2573,7 +2566,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ offset() - start_off;
// No need to update map as each call to save_live_registers will produce identical oopmap
- (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
+ (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, /* save_vectors= */ SuperwordUseVX);
__ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
__ z_bru(exec_mode_initialized);
@@ -2611,7 +2604,7 @@ void SharedRuntime::generate_deopt_blob() {
__ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
// Save everything in sight.
- (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
+ (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch, /* save_vectors= */ SuperwordUseVX);
// Now it is safe to overwrite any register
@@ -2661,7 +2654,7 @@ void SharedRuntime::generate_deopt_blob() {
__ z_lgr(unroll_block_reg, Z_RET);
// restore the return registers that have been saved
// (among other registers) by save_live_registers(...).
- RegisterSaver::restore_result_registers(masm);
+ RegisterSaver::restore_result_registers(masm, /* save_vectors= */ SuperwordUseVX);
// reload the exec mode from the UnrollBlock (it might have changed)
__ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset()));
@@ -2737,7 +2730,7 @@ void SharedRuntime::generate_deopt_blob() {
// Make sure all code is generated
masm->flush();
- _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, SuperwordUseVX)/wordSize);
_deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
}
diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
index 3f16312eb48..5309158fc74 100644
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -3361,7 +3361,7 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
// Generate AES intrinsics code.
if (UseAESIntrinsics) {
if (VM_Version::has_Crypto_AES()) {
@@ -3405,7 +3405,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(StubId::stubgen_sha512_implCompressMB_id);
}
-#ifdef COMPILER2
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
@@ -3417,8 +3416,7 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_montgomerySquare
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
}
-#endif
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
public:
diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp
index 7e9000991ca..c3f981f159a 100644
--- a/src/hotspot/cpu/s390/vm_version_s390.cpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -24,8 +24,9 @@
*/
#include "asm/assembler.inline.hpp"
-#include "compiler/disassembler.hpp"
#include "code/compiledIC.hpp"
+#include "compiler/compilerDefinitions.inline.hpp"
+#include "compiler/disassembler.hpp"
#include "jvm.h"
#include "memory/resourceArea.hpp"
#include "runtime/java.hpp"
@@ -105,7 +106,7 @@ void VM_Version::initialize() {
int model_ix = get_model_index();
if ( model_ix >= 7 ) {
- if (FLAG_IS_DEFAULT(SuperwordUseVX)) {
+ if (FLAG_IS_DEFAULT(SuperwordUseVX) && CompilerConfig::is_c2_enabled()) {
FLAG_SET_ERGO(SuperwordUseVX, true);
}
if (model_ix > 7 && FLAG_IS_DEFAULT(UseSFPV) && SuperwordUseVX) {
diff --git a/src/hotspot/cpu/s390/vmreg_s390.hpp b/src/hotspot/cpu/s390/vmreg_s390.hpp
index 517fb8e2130..5fb5b7b40b1 100644
--- a/src/hotspot/cpu/s390/vmreg_s390.hpp
+++ b/src/hotspot/cpu/s390/vmreg_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -59,7 +59,12 @@ inline VectorRegister as_VectorRegister() {
inline bool is_concrete() {
assert(is_reg(), "must be");
- return is_even(value());
+ if (is_Register() || is_FloatRegister()) return is_even(value());
+ if (is_VectorRegister()) {
+ int base = value() - ConcreteRegisterImpl::max_fpr;
+ return (base & 3) == 0;
+ }
+ return true;
}
#endif // CPU_S390_VMREG_S390_HPP
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index 0c8dd85b15d..14ffdb70f3d 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -3439,6 +3439,16 @@ void Assembler::movdqa(XMMRegister dst, Address src) {
emit_operand(dst, src, 0);
}
+void Assembler::movdqa(Address dst, XMMRegister src) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.reset_is_clear_context();
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x7F);
+ emit_operand(src, dst, 0);
+}
+
void Assembler::movdqu(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -3848,6 +3858,26 @@ void Assembler::evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool mer
emit_operand(dst, src, 0);
}
+void Assembler::evmovdqaq(Address dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ evmovdqaq(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdqaq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x7F);
+ emit_operand(src, dst, 0);
+}
+
void Assembler::evmovntdquq(Address dst, XMMRegister src, int vector_len) {
// Unmasked instruction
evmovntdquq(dst, k0, src, /*merge*/ true, vector_len);
@@ -9481,6 +9511,20 @@ void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
emit_int16((unsigned char)0xF3, (0xC0 | encode));
}
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ attributes.set_address_attributes(/* tuple_type */ EVEX_M128, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(dst, shift, 0);
+}
+
// Shift packed integers logically right by specified number of bits.
void Assembler::psrlw(XMMRegister dst, int shift) {
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -9572,6 +9616,20 @@ void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
emit_int16((unsigned char)0xD3, (0xC0 | encode));
}
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ attributes.set_address_attributes(/* tuple_type */ EVEX_M128, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0xD3);
+ emit_operand(dst, shift, 0);
+}
+
void Assembler::evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -9712,6 +9770,18 @@ void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
emit_int16(0x47, (0xC0 | encode));
}
+void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x47);
+ emit_operand(dst, shift, 0);
+}
+
//Variable Shift packed integers logically right.
void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 1, "requires AVX2");
@@ -9727,6 +9797,18 @@ void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
emit_int16(0x45, (0xC0 | encode));
}
+void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_evex() : 0, "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(shift, src->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x45);
+ emit_operand(dst, shift, 0);
+}
+
//Variable right Shift arithmetic packed integers .
void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 1, "requires AVX2");
@@ -15091,7 +15173,6 @@ void Assembler::cdqe() {
}
void Assembler::clflush(Address adr) {
- assert(VM_Version::supports_clflush(), "should do");
prefix(adr, true /* is_map1 */);
emit_int8((unsigned char)0xAE);
emit_operand(rdi, adr, 0);
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 98684752b0c..bc473ba1ca2 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1612,6 +1612,7 @@ private:
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, XMMRegister src);
void movdqa(XMMRegister dst, Address src);
+ void movdqa(Address dst, XMMRegister src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src);
@@ -1661,8 +1662,10 @@ private:
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Move Aligned 512bit Vector
- void evmovdqaq(XMMRegister dst, Address src, int vector_len);
- void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdqaq(XMMRegister dst, Address src, int vector_len);
+ void evmovdqaq(Address dst, XMMRegister src, int vector_len);
+ void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdqaq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void vmovsldup(XMMRegister dst, XMMRegister src, int vector_len);
void vmovshdup(XMMRegister dst, XMMRegister src, int vector_len);
@@ -2860,6 +2863,7 @@ private:
void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsllq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
// Logical shift right packed integers
@@ -2875,6 +2879,7 @@ private:
void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsrlq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2895,10 +2900,12 @@ private:
// Variable shift left packed integers
void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsllvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
// Variable shift right packed integers
void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsrlvq(XMMRegister dst, XMMRegister src, Address shift, int vector_len);
// Variable shift right arithmetic packed integers
void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 3c4659934c6..69308bb2a7e 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -483,7 +483,7 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t,
// Try to unlock. Transition lock bits 0b00 => 0b01
movptr(reg_rax, mark);
- andptr(reg_rax, ~(int32_t)markWord::lock_mask);
+ andptr(reg_rax, ~(int32_t)markWord::lock_mask_in_place);
orptr(mark, markWord::unlocked_value);
lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
jcc(Assembler::notEqual, push_and_slow_path);
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index 1ff28516307..2b06f9ee80c 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -298,7 +298,7 @@ void frame::patch_pc(Thread* thread, address pc) {
#ifdef ASSERT
{
- frame f(this->sp(), this->unextended_sp(), this->fp(), pc);
+ frame f(sp(), unextended_sp(), fp(), pc, cb(), oop_map(), is_heap_frame());
assert(f.is_deoptimized_frame() == this->is_deoptimized_frame() && f.pc() == this->pc() && f.raw_pc() == this->raw_pc(),
"must be (f.is_deoptimized_frame(): %d this->is_deoptimized_frame(): %d "
"f.pc(): " INTPTR_FORMAT " this->pc(): " INTPTR_FORMAT " f.raw_pc(): " INTPTR_FORMAT " this->raw_pc(): " INTPTR_FORMAT ")",
@@ -453,11 +453,11 @@ frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
intptr_t* unextended_sp = interpreter_frame_sender_sp();
intptr_t* sender_fp = link();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (map->update_map()) {
update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
address sender_pc = this->sender_pc();
diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp
index 40311f746ea..7ebc90b9f71 100644
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,9 +33,6 @@
#include "utilities/debug.hpp"
#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciRuntime.hpp"
-#endif
class NativeNMethodCmpBarrier: public NativeInstruction {
public:
@@ -167,16 +164,7 @@ static int entry_barrier_offset(nmethod* nm) {
}
static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) {
- address barrier_address;
-#if INCLUDE_JVMCI
- if (nm->is_compiled_by_jvmci()) {
- barrier_address = nm->code_begin() + nm->jvmci_nmethod_data()->nmethod_entry_patch_offset();
- } else
-#endif
- {
- barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
- }
-
+ address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
NativeNMethodCmpBarrier* barrier = reinterpret_cast(barrier_address);
barrier->verify();
return barrier;
@@ -199,11 +187,3 @@ int BarrierSetNMethod::guard_value(nmethod* nm) {
NativeNMethodCmpBarrier* cmp = native_nmethod_barrier(nm);
return cmp->get_immediate();
}
-
-
-#if INCLUDE_JVMCI
-bool BarrierSetNMethod::verify_barrier(nmethod* nm, err_msg& msg) {
- NativeNMethodCmpBarrier* barrier = native_nmethod_barrier(nm);
- return barrier->check_barrier(msg);
-}
-#endif
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
deleted file mode 100644
index 66fb4cbb8c7..00000000000
--- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
- * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "c1/c1_LIRAssembler.hpp"
-#include "c1/c1_MacroAssembler.hpp"
-#include "gc/shared/gc_globals.hpp"
-#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-
-#define __ masm->masm()->
-
-void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
- Register addr = _addr->is_single_cpu() ? _addr->as_register() : _addr->as_register_lo();
- Register newval = _new_value->as_register();
- Register cmpval = _cmp_value->as_register();
- Register tmp1 = _tmp1->as_register();
- Register tmp2 = _tmp2->as_register();
- Register result = result_opr()->as_register();
- assert(cmpval == rax, "wrong register");
- assert(newval != noreg, "new val must be register");
- assert(cmpval != newval, "cmp and new values must be in different registers");
- assert(cmpval != addr, "cmp and addr must be in different registers");
- assert(newval != addr, "new value and addr must be in different registers");
-
- if (UseCompressedOops) {
- __ encode_heap_oop(cmpval);
- __ mov(rscratch1, newval);
- __ encode_heap_oop(rscratch1);
- newval = rscratch1;
- }
-
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), result, Address(addr, 0), cmpval, newval, false, tmp1, tmp2);
-}
-
-#undef __
-
-#ifdef ASSERT
-#define __ gen->lir(__FILE__, __LINE__)->
-#else
-#define __ gen->lir()->
-#endif
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
-
- if (access.is_oop()) {
- LIRGenerator* gen = access.gen();
- if (ShenandoahSATBBarrier) {
- pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
- LIR_OprFact::illegalOpr /* pre_val */);
- }
- if (ShenandoahCASBarrier) {
- cmp_value.load_item_force(FrameMap::rax_oop_opr);
- new_value.load_item();
-
- LIR_Opr t1 = gen->new_register(T_OBJECT);
- LIR_Opr t2 = gen->new_register(T_OBJECT);
- LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
- LIR_Opr result = gen->new_register(T_INT);
-
- __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result));
-
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), new_value.result());
- }
- return result;
- }
- }
- return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
-}
-
-LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
- LIRGenerator* gen = access.gen();
- BasicType type = access.type();
-
- LIR_Opr result = gen->new_register(type);
- value.load_item();
- LIR_Opr value_opr = value.result();
-
- // Because we want a 2-arg form of xchg and xadd
- __ move(value_opr, result);
-
- assert(type == T_INT || is_reference_type(type) || type == T_LONG, "unexpected type");
- __ xchg(access.resolved_addr(), result, result, LIR_OprFact::illegalOpr);
-
- if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
- LIR_Opr tmp = gen->new_register(type);
- __ move(result, tmp);
- result = tmp;
- if (ShenandoahSATBBarrier) {
- pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
- result /* pre_val */);
- }
- if (ShenandoahCardBarrier) {
- post_barrier(access, access.resolved_addr(), result);
- }
- }
-
- return result;
-}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 1df8de2352d..ce8f26fc1b0 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -42,6 +42,9 @@
#include "c1/c1_MacroAssembler.hpp"
#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
#endif
+#ifdef COMPILER2
+#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
+#endif
#define __ masm->
@@ -901,3 +904,399 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
+
+#ifdef COMPILER2
+
+#undef __
+#define __ masm->
+
+void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
+ // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
+ if (narrow) {
+ __ movl(dst, src);
+ } else {
+ __ movq(dst, src);
+ }
+
+ ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
+}
+
+void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
+ Address dst, bool dst_narrow,
+ Register src, bool src_narrow,
+ Register tmp) {
+
+ ShenandoahBarrierStubC2::store_pre(masm, node, tmp, dst, noreg, noreg, dst_narrow);
+
+ // Need to encode into tmp, because we cannot clobber src.
+ if (dst_narrow && !src_narrow) {
+ __ movq(tmp, src);
+ if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
+ __ encode_heap_oop(tmp);
+ } else {
+ __ encode_heap_oop_not_null(tmp);
+ }
+ src = tmp;
+ }
+
+ // Do the actual store
+ if (dst_narrow) {
+ __ movl(dst, src);
+ } else {
+ __ movq(dst, src);
+ }
+
+ ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
+}
+
+void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
+ Register res, Address addr,
+ Register oldval, Register newval, Register tmp,
+ bool narrow) {
+
+ assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
+
+ // Oldval and newval can be in the same register, but all other registers should be
+ // distinct for extra safety, as we shuffle register values around.
+ assert_different_registers(oldval, tmp, addr.base(), addr.index());
+ assert_different_registers(newval, tmp, addr.base(), addr.index());
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
+
+ // CAS!
+ __ lock();
+ if (narrow) {
+ __ cmpxchgl(newval, addr);
+ } else {
+ __ cmpxchgptr(newval, addr);
+ }
+
+ // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
+ if (res != noreg) {
+ __ setcc(Assembler::equal, res);
+ }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
+}
+
+void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
+ assert_different_registers(newval, tmp, addr.base(), addr.index());
+
+ ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
+
+ if (narrow) {
+ __ xchgl(newval, addr);
+ } else {
+ __ xchgq(newval, addr);
+ }
+
+ ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
+}
+
+#undef __
+#define __ masm.
+
+void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+
+ __ lea(tmp1, addr);
+ __ shrptr(tmp1, CardTable::card_shift());
+ __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
+ Address card_address(tmp1, 0);
+
+ assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
+ Label L_done;
+ if (UseCondCardMark) {
+ __ cmpb(card_address, 0);
+ __ jccb(Assembler::equal, L_done);
+ }
+ if (UseCompressedOops && CompressedOops::base() == nullptr) {
+ __ movb(card_address, r12);
+ } else {
+ __ movb(card_address, 0);
+ }
+ __ bind(L_done);
+}
+
+void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+
+ Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
+ __ cmpb(gc_state_fast, 0);
+ __ jcc(Assembler::notEqual, *entry());
+ __ bind(*continuation());
+}
+
+void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
+ assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
+
+ // On x86, there is a significant penalty with unaligned branch target, for example
+ // when the target instruction straggles the fetch line. It makes (performance) sense
+ // to spend some code size to align the target better.
+ __ align(16);
+ __ bind(*entry());
+
+ // If we need to load ourselves, do it here.
+ if (_do_load) {
+ if (_narrow) {
+ __ movl(_obj, _addr);
+ } else {
+ __ movq(_obj, _addr);
+ }
+ }
+
+ // If the object is null, there is no point in applying barriers.
+ maybe_far_jump_if_zero(masm, _obj);
+
+ // We need to make sure that loads done by callers survive across slow-path calls.
+ // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
+ bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
+ if (!_do_load || needs_both_barriers) {
+ preserve(_obj);
+ }
+
+ // Go for barriers. Barriers can return straight to continuation, as long
+ // as another barrier is not needed.
+ if (needs_both_barriers) {
+ keepalive(masm, nullptr);
+ lrb(masm);
+ } else if (_needs_keep_alive_barrier) {
+ keepalive(masm, continuation());
+ } else if (_needs_load_ref_barrier) {
+ lrb(masm);
+ } else {
+ ShouldNotReachHere();
+ }
+}
+
+void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
+ Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
+ Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+ Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+
+ Label L_through, L_pop_and_slow;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_load_ref_barrier) {
+ assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
+ __ cmpb(gc_state_fast, 0);
+ __ jcc(Assembler::equal, L_through);
+ }
+
+ // Need temp to work, allocate one now.
+ bool tmp_live;
+ Register tmp = select_temp_register(tmp_live);
+ if (tmp_live) {
+ __ push(tmp);
+ }
+
+ // Fast-path: put object into buffer.
+ // If buffer is already full, go slow.
+ __ movptr(tmp, index);
+ __ subptr(tmp, wordSize);
+ __ jccb(Assembler::below, L_pop_and_slow);
+ __ movptr(index, tmp);
+ __ addptr(tmp, buffer);
+
+ // Store the object in queue.
+ // If object is narrow, we need to decode it before inserting.
+ // We can skip the re-encoding if we know that object is not preserved.
+ if (_narrow) {
+ __ decode_heap_oop_not_null(_obj);
+ }
+ __ movptr(Address(tmp, 0), _obj);
+ if (_narrow && is_preserved(_obj)) {
+ __ encode_heap_oop_not_null(_obj);
+ }
+
+ // Fast-path exits here.
+ if (tmp_live) {
+ __ pop(tmp);
+ }
+
+ if (L_done != nullptr) {
+ __ jmp(*L_done);
+ } else {
+ __ jmp(L_through);
+ }
+
+ // Slow-path: call runtime to handle.
+ // Need to pop tmp immediately for stack to remain aligned.
+ __ bind(L_pop_and_slow);
+ if (tmp_live) {
+ __ pop(tmp);
+ }
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ // Shuffle in the arguments. The end result should be:
+ // c_rarg0 <-- obj
+ if (c_rarg0 != _obj) {
+ __ mov(c_rarg0, _obj);
+ }
+
+ // Go to runtime and handle the rest there.
+ // Use rax as scratch, as it will be saved if live.
+ __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
+ }
+ if (L_done != nullptr) {
+ __ jmp(*L_done);
+ } else {
+ __ bind(L_through);
+ }
+}
+
+void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
+ Label L_pop_and_slow, L_slow;
+
+ // If another barrier is enabled as well, do a runtime check for a specific barrier.
+ if (_needs_keep_alive_barrier) {
+ char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
+ Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
+ __ cmpb(gc_state_fast, 0);
+ __ jcc(Assembler::equal, *continuation());
+ }
+
+ // If weak references are being processed, weak/phantom loads need to go slow,
+ // regardless of their cset status.
+ if (_needs_load_ref_weak_barrier) {
+ Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
+ __ cmpb(gc_state_fast, 0);
+ __ jccb(Assembler::notEqual, L_slow);
+ }
+
+ bool is_aot = AOTCodeCache::is_on_for_dump();
+
+ // Need temp to work, allocate one now.
+ bool tmp_live;
+ Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
+ if (tmp_live) {
+ __ push(tmp);
+ }
+
+ // Compute the cset bitmap index
+ if (_narrow) {
+ __ decode_heap_oop_not_null(tmp, _obj);
+ } else {
+ __ movptr(tmp, _obj);
+ }
+
+ Address cset_addr_arg;
+ intptr_t cset_addr = reinterpret_cast(ShenandoahHeap::in_cset_fast_test_addr());
+ if (!is_aot && cset_addr < INT32_MAX) {
+ // Cset bitmap is at easily encodeable address. Just use it as displacement.
+ __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ cset_addr_arg = Address(tmp, checked_cast(cset_addr));
+ } else {
+ bool tmp2_live;
+ Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
+ if (tmp2_live) {
+ __ push(tmp2);
+ }
+ if (is_aot) {
+ // Generating AOT code, pull the cset bitmap and region shift from AOT table.
+ assert_different_registers(tmp, tmp2, rcx);
+ __ push(rcx);
+ __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
+ __ movl(rcx, Address(rcx));
+ __ shrptr(tmp);
+ __ pop(rcx);
+ __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
+ __ addptr(tmp, Address(tmp2));
+ } else {
+ // Cset bitmap is far away. Add its address fully.
+ __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ movptr(tmp2, cset_addr);
+ __ addptr(tmp, tmp2);
+ }
+ if (tmp2_live) {
+ __ pop(tmp2);
+ }
+ cset_addr_arg = Address(tmp, 0);
+ }
+
+ // Cset-check. Fall-through to slow if in collection set.
+ __ cmpb(cset_addr_arg, 0);
+ if (tmp_live) {
+ __ jccb(Assembler::notEqual, L_pop_and_slow);
+ __ pop(tmp);
+ __ jmp(*continuation());
+ } else {
+ // Nothing else to do, jump back
+ __ jcc(Assembler::equal, *continuation());
+ }
+
+ // Slow path
+ __ bind(L_pop_and_slow);
+ // Need to pop tmp immediately for stack to remain aligned.
+ if (tmp_live) {
+ __ pop(tmp);
+ }
+ __ bind(L_slow);
+
+ // Obj is the result, need to temporarily stop preserving it.
+ bool is_obj_preserved = is_preserved(_obj);
+ if (is_obj_preserved) {
+ dont_preserve(_obj);
+ }
+ {
+ SaveLiveRegisters slr(&masm, this);
+
+ assert_different_registers(rax, c_rarg0, c_rarg1);
+
+ // Shuffle in the arguments. The end result should be:
+ // c_rarg0 <-- obj
+ // c_rarg1 <-- lea(addr)
+ if (_obj == c_rarg0) {
+ __ lea(c_rarg1, _addr);
+ } else if (_obj == c_rarg1) {
+ // Set up arguments in reverse, and then flip them
+ __ lea(c_rarg0, _addr);
+ __ xchgptr(c_rarg0, c_rarg1);
+ } else {
+ assert_different_registers(_obj, c_rarg0, c_rarg1);
+ __ lea(c_rarg1, _addr);
+ __ movptr(c_rarg0, _obj);
+ }
+
+ // Go to runtime and handle the rest there.
+ // Use rax as scratch, as it will be clobbered by result anyway.
+ __ call(RuntimeAddress(lrb_runtime_entry_addr()), rax);
+
+ // Save the result where needed.
+ if (_narrow) {
+ __ movl(_obj, rax);
+ } else if (_obj != rax) {
+ __ movptr(_obj, rax);
+ }
+ }
+ if (is_obj_preserved) {
+ preserve(_obj);
+ }
+
+ __ jmp(*continuation());
+}
+
+int ShenandoahBarrierStubC2::available_gp_registers() {
+ return Register::available_gp_registers();
+}
+
+bool ShenandoahBarrierStubC2::is_special_register(Register r) {
+ return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
+}
+
+void ShenandoahBarrierStubC2::post_init() {
+ // Do nothing.
+}
+
+void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
+ if (_narrow) {
+ __ testl(reg, reg);
+ } else {
+ __ testq(reg, reg);
+ }
+ __ jcc(Assembler::zero, *continuation());
+}
+
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
index 85fb98fa15c..592cbc42fe3 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
@@ -37,6 +37,9 @@ class ShenandoahPreBarrierStub;
class ShenandoahLoadReferenceBarrierStub;
class StubAssembler;
#endif
+#ifdef COMPILER2
+class MachNode;
+#endif
class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
@@ -54,12 +57,6 @@ private:
Register tmp);
public:
-#ifdef COMPILER1
- void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
- void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
- void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-#endif
void load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators);
@@ -77,6 +74,23 @@ public:
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
virtual void try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Label& slowpath);
+
+#ifdef COMPILER1
+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
+#endif
+
+#ifdef COMPILER2
+ // Entry points from Matcher
+ void load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow);
+ void store_c2(const MachNode* node, MacroAssembler* masm,
+ Address dst, bool dst_narrow, Register src, bool src_narrow, Register tmp);
+ void compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval,
+ Register tmp, bool narrow);
+ void get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow);
+#endif
};
#endif // CPU_X86_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad
index c580d21c9b8..97d0b2c5e0f 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad
@@ -22,90 +22,215 @@
//
//
-source_hpp %{
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-#include "gc/shenandoah/c2/shenandoahSupport.hpp"
+source %{
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp"
%}
-instruct compareAndSwapP_shenandoah(rRegI res,
- memory mem_ptr,
- rRegP tmp1, rRegP tmp2,
- rax_RegP oldval, rRegP newval,
- rFlagsReg cr)
+// ---------------------------------- LOADS ---------------------------------------
+//
+
+instruct loadP_shenandoah(rRegP dst, memory mem, rFlagsReg cr)
%{
- match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval)));
- match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval)));
- effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval);
-
- format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
-
+ match(Set dst (LoadP mem));
+ predicate(UseShenandoahGC && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "shenandoah_load $dst, $mem\t# ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
- $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
- false, // swap
- $tmp1$$Register, $tmp2$$Register
- );
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ $mem$$Address,
+ /* narrow = */ false
+ );
%}
- ins_pipe( pipe_cmpxchg );
+ ins_cost(125);
+ ins_pipe(ialu_cr_reg_mem);
%}
-instruct compareAndSwapN_shenandoah(rRegI res,
- memory mem_ptr,
- rRegP tmp1, rRegP tmp2,
- rax_RegN oldval, rRegN newval,
- rFlagsReg cr) %{
- match(Set res (ShenandoahCompareAndSwapN mem_ptr (Binary oldval newval)));
- match(Set res (ShenandoahWeakCompareAndSwapN mem_ptr (Binary oldval newval)));
- effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval);
-
- format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
-
- ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
- $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
- false, // swap
- $tmp1$$Register, $tmp2$$Register
- );
- %}
- ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeN_shenandoah(memory mem_ptr,
- rax_RegN oldval, rRegN newval,
- rRegP tmp1, rRegP tmp2,
- rFlagsReg cr) %{
- match(Set oldval (ShenandoahCompareAndExchangeN mem_ptr (Binary oldval newval)));
- effect(TEMP tmp1, TEMP tmp2, KILL cr);
-
- format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
-
- ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
- noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
- true, // exchange
- $tmp1$$Register, $tmp2$$Register
- );
- %}
- ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeP_shenandoah(memory mem_ptr,
- rax_RegP oldval, rRegP newval,
- rRegP tmp1, rRegP tmp2,
- rFlagsReg cr)
+instruct loadN_shenandoah(rRegN dst, memory mem, rFlagsReg cr)
%{
- match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval)));
- effect(KILL cr, TEMP tmp1, TEMP tmp2);
- ins_cost(1000);
-
- format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
-
+ match(Set dst (LoadN mem));
+ predicate(UseShenandoahGC && n->as_Load()->barrier_data() != 0);
+ effect(TEMP_DEF dst, KILL cr);
+ // The main load is a candidate to implement implicit null checks.
+ ins_is_late_expanded_null_check_candidate(true);
+ format %{ "shenandoah_load $dst, $mem\t# compressed ptr" %}
ins_encode %{
- ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
- noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
- true, // exchange
- $tmp1$$Register, $tmp2$$Register
- );
+ ShenandoahBarrierSet::assembler()->load_c2(this, masm,
+ $dst$$Register,
+ $mem$$Address,
+ /* narrow = */ true
+ );
%}
- ins_pipe( pipe_cmpxchg );
+ ins_cost(125);
+ ins_pipe(ialu_cr_reg_mem);
+%}
+
+// ---------------------------------- STORES ---------------------------------------
+//
+
+instruct storeP_shenandoah(memory mem, any_RegP src, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreP mem src));
+ predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_store $mem, $src\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ $mem$$Address, /* dst_narrow = */ false,
+ $src$$Register, /* src_narrow = */ false,
+ $tmp$$Register);
+ %}
+ ins_cost(125);
+ ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct storeN_shenandoah(memory mem, rRegN src, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem src));
+ predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_store $mem, $src\t# compressed ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ $mem$$Address, /* dst_narrow = */ true,
+ $src$$Register, /* src_narrow = */ true,
+ $tmp$$Register
+ );
+ %}
+ ins_cost(125);
+ ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct encodePAndStoreN_shenandoah(memory mem, any_RegP src, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set mem (StoreN mem (EncodeP src)));
+ predicate(UseShenandoahGC && n->as_Store()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_store $mem, $src\t# compressed ptr (with encoding)" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->store_c2(this, masm,
+ $mem$$Address, /* dst_narrow = */ true,
+ $src$$Register, /* src_narrow = */ false,
+ $tmp$$Register
+ );
+ %}
+ ins_cost(125);
+ ins_pipe(ialu_cr_reg_mem);
+%}
+
+// ---------------------- LOAD-STORES -----------------------------------
+
+instruct compareAndSwapP_shenandoah(rRegI res, memory mem, rRegP tmp, rax_RegP oldval, rRegP newval, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL oldval, KILL cr);
+ format %{ "shenandoah_cas $mem, $newval\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$Address,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ /* narrow = */ false
+ );
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct compareAndSwapN_shenandoah(rRegI res, memory mem, rRegP tmp, rax_RegN oldval, rRegN newval, rFlagsReg cr)
+%{
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP_DEF res, TEMP tmp, KILL oldval, KILL cr);
+ format %{ "shenandoah_cas $mem, $newval\t# compressed ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ $res$$Register,
+ $mem$$Address,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ /* narrow = */ true
+ );
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct compareAndExchangeP_shenandoah(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_cae $mem, $newval\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ noreg,
+ $mem$$Address,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ /* narrow = */ false
+ );
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct compareAndExchangeN_shenandoah(memory mem, rax_RegN oldval, rRegN newval, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set oldval (CompareAndExchangeN mem (Binary oldval newval)));
+ predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_cae $mem, $newval\t# compressed ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->compare_and_set_c2(this, masm,
+ noreg,
+ $mem$$Address,
+ $oldval$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ /* narrow = */ true
+ );
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct getAndSetP_shenandoah(memory mem, rRegP newval, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set newval (GetAndSetP mem newval));
+ predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_gas $newval, $mem\t# ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $newval$$Register,
+ $mem$$Address,
+ $tmp$$Register,
+ /* narrow = */ false
+ );
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct getAndSetN_shenandoah(memory mem, rRegN newval, rRegP tmp, rFlagsReg cr)
+%{
+ match(Set newval (GetAndSetN mem newval));
+ predicate(UseShenandoahGC && n->as_LoadStore()->barrier_data() != 0);
+ effect(TEMP tmp, KILL cr);
+ format %{ "shenandoah_gas $newval, $mem\t# compressed ptr" %}
+ ins_encode %{
+ ShenandoahBarrierSet::assembler()->get_and_set_c2(this, masm,
+ $newval$$Register,
+ $mem$$Address,
+ $tmp$$Register,
+ /* narrow = */ true
+ );
+ %}
+ ins_pipe(pipe_cmpxchg);
%}
diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp
index 6de46752790..936a994ed8b 100644
--- a/src/hotspot/cpu/x86/globals_x86.hpp
+++ b/src/hotspot/cpu/x86/globals_x86.hpp
@@ -35,7 +35,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im
define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86.
define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls passed to check cast
-define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
+define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_PRESENT(true) NOT_COMPILER2(false));
define_pd_global(size_t, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
@@ -45,11 +45,11 @@ define_pd_global(size_t, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRES
// the vep is aligned at CodeEntryAlignment whereas c2 only aligns
// the uep and the vep doesn't get real alignment but just slops on by
// only assured that the entry instruction meets the 5 byte size requirement.
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
define_pd_global(uint, CodeEntryAlignment, 32);
#else
define_pd_global(uint, CodeEntryAlignment, 16);
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
define_pd_global(intx, OptoLoopAlignment, 16);
define_pd_global(intx, InlineSmallCode, 1000);
@@ -99,7 +99,7 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
\
product(int, UseSSE, 4, \
"Highest supported SSE instructions set on x86/x64") \
- range(0, 4) \
+ range(2, 4) \
\
product(int, UseAVX, 3, \
"Highest supported AVX instructions set on x86/x64") \
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp
index a38971c86fb..a9745398f71 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@@ -1603,7 +1603,7 @@ void InterpreterMacroAssembler::notify_method_exit(
// the code to check if the event should be sent.
Register rthread = r15_thread;
Register rarg = c_rarg1;
- if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+ if (mode == NotifyJVMTI && (JvmtiExport::can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
Label L;
// Note: frame::interpreter_frame_result has a dependency on how the
// method result is saved across the call to post_method_exit. If this
@@ -1612,9 +1612,18 @@ void InterpreterMacroAssembler::notify_method_exit(
// template interpreter will leave the result on the top of the stack.
push(state);
- movl(rdx, Address(rthread, JavaThread::interp_only_mode_offset()));
- testl(rdx, rdx);
+
+ movptr(rdx, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+ testptr(rdx, rdx);
+ jcc(Assembler::zero, L); // if (thread->jvmti_thread_state() == nullptr) exit;
+
+ movl(rdx, Address(rdx, JvmtiThreadState::frame_pop_cnt_offset()));
+ movl(rcx, Address(rthread, JavaThread::interp_only_mode_offset()));
+
+ orl(rdx, rcx);
+ testl(rdx,rdx);
jcc(Assembler::zero, L);
+
call_VM(noreg,
CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
bind(L);
diff --git a/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp b/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp
deleted file mode 100644
index b9a66907e3b..00000000000
--- a/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "compiler/disassembler.hpp"
-#include "oops/compressedKlass.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/javaCalls.hpp"
-#include "runtime/jniHandles.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "jvmci/jvmci.hpp"
-#include "jvmci/jvmciEnv.hpp"
-#include "jvmci/jvmciCodeInstaller.hpp"
-#include "jvmci/jvmciJavaClasses.hpp"
-#include "jvmci/jvmciCompilerToVM.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#include "asm/register.hpp"
-#include "classfile/vmSymbols.hpp"
-#include "code/vmreg.hpp"
-#include "vmreg_x86.inline.hpp"
-#if INCLUDE_ZGC
-#include "gc/z/zBarrierSetAssembler.hpp"
-#endif
-
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMCI_TRAPS) {
- if (inst->is_call() || inst->is_jump()) {
- assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size");
- return (pc_offset + NativeCall::instruction_size);
- } else if (inst->is_mov_literal64()) {
- // mov+call instruction pair
- jint offset = pc_offset + ((NativeMovConstReg*)inst)->instruction_size();
- u_char* call = (u_char*) (_instructions->start() + offset);
- if (call[0] == Assembler::REX_B) {
- offset += 1; /* prefix byte for extended register R8-R15 */
- call++;
- }
- if (call[0] == Assembler::REX2) {
- offset += 2; /* prefix byte for APX extended GPR register R16-R31 */
- call+=2;
- }
- // Register indirect call.
- assert(call[0] == 0xFF, "expected call");
- offset += 2; /* opcode byte + modrm byte */
- return (offset);
- } else if (inst->is_call_reg()) {
- // the inlined vtable stub contains a "call register" instruction
- return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset());
- } else if (inst->is_cond_jump()) {
- address pc = (address) (inst);
- return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc);
- } else {
- JVMCI_ERROR_0("unsupported type of instruction for call site");
- }
-}
-
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& obj, bool compressed, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- jobject value = JNIHandles::make_local(obj());
- if (compressed) {
- address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
- int oop_index = _oop_recorder->find_index(value);
- _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand);
- JVMCI_event_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
- } else {
- address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
- *((jobject*) operand) = value;
- _instructions->relocate(pc, oop_Relocation::spec_for_immediate(), Assembler::imm_operand);
- JVMCI_event_3("relocating (oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
- }
-}
-
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, HotSpotCompiledCodeStream* stream, u1 tag, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
- if (tag == PATCH_NARROW_KLASS) {
- address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
- *((narrowKlass*) operand) = record_narrow_metadata_reference(_instructions, operand, stream, tag, JVMCI_CHECK);
- JVMCI_event_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
- } else {
- address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
- *((void**) operand) = record_metadata_reference(_instructions, operand, stream, tag, JVMCI_CHECK);
- JVMCI_event_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
- }
-}
-
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) {
- address pc = _instructions->start() + pc_offset;
-
- address operand = Assembler::locate_operand(pc, Assembler::disp32_operand);
- address next_instruction = Assembler::locate_next_instruction(pc);
- address dest = _constants->start() + data_offset;
-
- long disp = dest - next_instruction;
- assert(disp == (jint) disp, "disp doesn't fit in 32 bits");
- *((jint*) operand) = (jint) disp;
-
- _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS), Assembler::disp32_operand);
- JVMCI_event_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
-}
-
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, JVMCI_TRAPS) {
- address pc = (address) inst;
- if (inst->is_call()) {
- // NOTE: for call without a mov, the offset must fit a 32-bit immediate
- // see also CompilerToVM.getMaxCallTargetOffset()
- NativeCall* call = nativeCall_at(pc);
- call->set_destination((address) foreign_call_destination);
- _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec(), Assembler::call32_operand);
- } else if (inst->is_mov_literal64()) {
- NativeMovConstReg* mov = nativeMovConstReg_at(pc);
- mov->set_data((intptr_t) foreign_call_destination);
- _instructions->relocate(mov->instruction_address(), runtime_call_Relocation::spec(), Assembler::imm_operand);
- } else if (inst->is_jump()) {
- NativeJump* jump = nativeJump_at(pc);
- jump->set_jump_destination((address) foreign_call_destination);
- _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec(), Assembler::call32_operand);
- } else if (inst->is_cond_jump()) {
- address old_dest = nativeGeneralJump_at(pc)->jump_destination();
- address disp = Assembler::locate_operand(pc, Assembler::call32_operand);
- *(jint*) disp += ((address) foreign_call_destination) - old_dest;
- _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand);
- } else {
- JVMCI_ERROR("unsupported relocation for foreign call");
- }
-
- JVMCI_event_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
-}
-
-void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &, methodHandle& method, jint pc_offset, JVMCI_TRAPS) {
- NativeCall* call = nullptr;
- switch (_next_call_type) {
- case INLINE_INVOKE:
- return;
- case INVOKEVIRTUAL:
- case INVOKEINTERFACE: {
- assert(!method->is_static(), "cannot call static method with invokeinterface");
-
- call = nativeCall_at(_instructions->start() + pc_offset);
- call->set_destination(SharedRuntime::get_resolve_virtual_call_stub());
- _instructions->relocate(call->instruction_address(),
- virtual_call_Relocation::spec(_invoke_mark_pc),
- Assembler::call32_operand);
- break;
- }
- case INVOKESTATIC: {
- assert(method->is_static(), "cannot call non-static method with invokestatic");
-
- call = nativeCall_at(_instructions->start() + pc_offset);
- call->set_destination(SharedRuntime::get_resolve_static_call_stub());
- _instructions->relocate(call->instruction_address(),
- relocInfo::static_call_type, Assembler::call32_operand);
- break;
- }
- case INVOKESPECIAL: {
- assert(!method->is_static(), "cannot call static method with invokespecial");
- call = nativeCall_at(_instructions->start() + pc_offset);
- call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub());
- _instructions->relocate(call->instruction_address(),
- relocInfo::opt_virtual_call_type, Assembler::call32_operand);
- break;
- }
- default:
- JVMCI_ERROR("invalid _next_call_type value: %d", _next_call_type);
- return;
- }
- if (!call->is_displacement_aligned()) {
- JVMCI_ERROR("unaligned displacement for call at offset %d", pc_offset);
- }
- if (Continuations::enabled()) {
- // Check for proper post_call_nop
- NativePostCallNop* nop = nativePostCallNop_at(call->next_instruction_address());
- if (nop == nullptr) {
- JVMCI_ERROR("missing post call nop at offset %d", pc_offset);
- } else {
- _instructions->relocate(call->next_instruction_address(), relocInfo::post_call_nop_type);
- }
- }
-}
-
-bool CodeInstaller::pd_relocate(address pc, jint mark) {
- switch (mark) {
- case POLL_NEAR:
- case POLL_FAR:
- // This is a load from a register so there is no relocatable operand.
- // We just have to ensure that the format is not disp32_operand
- // so that poll_Relocation::fix_relocation_after_move does the right
- // thing (i.e. ignores this relocation record)
- _instructions->relocate(pc, relocInfo::poll_type, Assembler::imm_operand);
- return true;
- case POLL_RETURN_NEAR:
- case POLL_RETURN_FAR:
- // see comment above for POLL_FAR
- _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand);
- return true;
-#if INCLUDE_ZGC
- case Z_BARRIER_RELOCATION_FORMAT_LOAD_GOOD_BEFORE_SHL:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeShl);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_LOAD_BAD_AFTER_TEST:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatLoadBadAfterTest);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_MARK_BAD_AFTER_TEST:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadAfterTest);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_STORE_GOOD_AFTER_CMP:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodAfterCmp);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_STORE_BAD_AFTER_TEST:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreBadAfterTest);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_STORE_GOOD_AFTER_OR:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodAfterOr);
- return true;
- case Z_BARRIER_RELOCATION_FORMAT_STORE_GOOD_AFTER_MOV:
- _instructions->relocate(pc, barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodAfterMov);
- return true;
-#endif
- default:
- return false;
- }
-}
-
-// convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, JVMCI_TRAPS) {
- if (jvmci_reg < Register::number_of_registers) {
- return as_Register(jvmci_reg)->as_VMReg();
- } else {
- jint floatRegisterNumber = jvmci_reg - Register::number_of_registers;
- if (floatRegisterNumber < XMMRegister::number_of_registers) {
- return as_XMMRegister(floatRegisterNumber)->as_VMReg();
- }
- JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
- }
-}
-
-bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
- return !(hotspotRegister->is_FloatRegister() || hotspotRegister->is_XMMRegister());
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index f64c4d3f086..0ac0a8243d4 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -2124,6 +2124,26 @@ void MacroAssembler::vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len
}
}
+void MacroAssembler::vmovdqa(XMMRegister dst, Address src, int vector_len) {
+ if (vector_len == AVX_512bit) {
+ Assembler::evmovdqaq(dst, src, AVX_512bit);
+ } else if (vector_len == AVX_256bit) {
+ Assembler::vmovdqa(dst, src);
+ } else {
+ Assembler::movdqa(dst, src);
+ }
+}
+
+void MacroAssembler::vmovdqa(Address dst, XMMRegister src, int vector_len) {
+ if (vector_len == AVX_512bit) {
+ Assembler::evmovdqaq(dst, src, AVX_512bit);
+ } else if (vector_len == AVX_256bit) {
+ Assembler::vmovdqa(dst, src);
+ } else {
+ Assembler::movdqa(dst, src);
+ }
+}
+
void MacroAssembler::kmov(KRegister dst, Address src) {
if (VM_Version::supports_avx512bw()) {
kmovql(dst, src);
@@ -4881,7 +4901,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
Register offset = rscratch1;
Label L_loop_search_receiver, L_loop_search_empty;
- Label L_restart, L_found_recv, L_found_empty, L_polymorphic, L_count_update;
+ Label L_restart, L_found_recv, L_found_empty, L_count_update;
// The code here recognizes three major cases:
// A. Fastest: receiver found in the table
@@ -4911,21 +4931,20 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
// if (receiver(i) == recv) goto found_recv(i);
// }
//
- // // Fast: no receiver, but profile is full
+ // // Fast: no receiver, but profile is not full
// for (i = 0; i < receiver_count(); i++) {
// if (receiver(i) == null) goto found_null(i);
// }
- // goto polymorphic
+ //
+ // // Slow: profile is full, polymorphic case
+ // count++;
+ // return
//
// // Slow: try to install receiver
// found_null(i):
// CAS(&receiver(i), null, recv);
// goto restart
//
- // polymorphic:
- // count++;
- // return
- //
// found_recv(i):
// *receiver_count(i)++
//
@@ -4941,7 +4960,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
cmpptr(offset, end_receiver_offset);
jccb(Assembler::notEqual, L_loop_search_receiver);
- // Fast: no receiver, but profile is full
+ // Fast: no receiver, but profile is not full
movptr(offset, base_receiver_offset);
bind(L_loop_search_empty);
cmpptr(Address(mdp, offset, Address::times_ptr), NULL_WORD);
@@ -4949,9 +4968,13 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
addptr(offset, receiver_step);
cmpptr(offset, end_receiver_offset);
jccb(Assembler::notEqual, L_loop_search_empty);
- jmpb(L_polymorphic);
- // Slow: try to install receiver
+ // Slow: Receiver is not found and table is full.
+ // Increment polymorphic counter instead of receiver slot.
+ movptr(offset, poly_count_offset);
+ jmpb(L_count_update);
+
+ // Slowest: try to install receiver
bind(L_found_empty);
// Atomically swing receiver slot: null -> recv.
@@ -5003,17 +5026,11 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_
// and just restart the search from the beginning.
jmpb(L_restart);
- // Counter updates:
-
- // Increment polymorphic counter instead of receiver slot.
- bind(L_polymorphic);
- movptr(offset, poly_count_offset);
- jmpb(L_count_update);
-
// Found a receiver, convert its slot offset to corresponding count offset.
bind(L_found_recv);
addptr(offset, receiver_to_count_step);
+ // Finally, update the counter
bind(L_count_update);
addptr(Address(mdp, offset, Address::times_ptr), DataLayout::counter_increment);
}
@@ -5355,12 +5372,10 @@ void MacroAssembler::print_CPU_state() {
void MacroAssembler::restore_cpu_control_state_after_jni(Register rscratch) {
// Either restore the MXCSR register after returning from the JNI Call
// or verify that it wasn't changed (with -Xcheck:jni flag).
- if (VM_Version::supports_sse()) {
- if (RestoreMXCSROnJNICalls) {
- ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rscratch);
- } else if (CheckJNICalls) {
- call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
- }
+ if (RestoreMXCSROnJNICalls) {
+ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rscratch);
+ } else if (CheckJNICalls) {
+ call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
}
// Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
vzeroupper();
@@ -5850,7 +5865,7 @@ void MacroAssembler::reinit_heapbase() {
}
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
@@ -6054,7 +6069,7 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg
BIND(DONE);
}
-#endif //COMPILER2_OR_JVMCI
+#endif //COMPILER2
void MacroAssembler::generate_fill(BasicType t, bool aligned,
@@ -9446,7 +9461,7 @@ void MacroAssembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, Add
}
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
Register length, Register temp, int vec_enc) {
@@ -9681,7 +9696,7 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
}
bind(L_exit);
}
-#endif //COMPILER2_OR_JVMCI
+#endif //COMPILER2
void MacroAssembler::convert_f2i(Register dst, XMMRegister src) {
@@ -9811,7 +9826,6 @@ void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
void MacroAssembler::cache_wb(Address line)
{
// 64 bit cpus always support clflush
- assert(VM_Version::supports_clflush(), "clflush should be available");
bool optimized = VM_Version::supports_clflushopt();
bool no_evict = VM_Version::supports_clwb();
@@ -9833,7 +9847,6 @@ void MacroAssembler::cache_wb(Address line)
void MacroAssembler::cache_wbsync(bool is_pre)
{
- assert(VM_Version::supports_clflush(), "clflush should be available");
bool optimized = VM_Version::supports_clflushopt();
bool no_evict = VM_Version::supports_clwb();
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index b73339c217f..de5ec02fe43 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1181,6 +1181,8 @@ public:
using Assembler::vmovdqa;
void vmovdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
+ void vmovdqa(XMMRegister dst, Address src, int vector_len);
+ void vmovdqa(Address dst, XMMRegister src, int vector_len);
// AVX512 Unaligned
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
@@ -2071,10 +2073,10 @@ public:
void cache_wb(Address line);
void cache_wbsync(bool is_pre);
-#ifdef COMPILER2_OR_JVMCI
+#ifdef COMPILER2
void generate_fill_avx3(BasicType type, Register to, Register value,
Register count, Register rtmp, XMMRegister xtmp);
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
void vallones(XMMRegister dst, int vector_len);
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index bbd43c1a0e8..8bb9982a820 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -65,9 +65,6 @@
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciJavaClasses.hpp"
-#endif
#define __ masm->
@@ -182,14 +179,14 @@ class RegisterSaver {
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
int off = 0;
int num_xmm_regs = XMMRegister::available_xmm_registers();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (save_wide_vectors && UseAVX == 0) {
save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
}
assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
#else
- save_wide_vectors = false; // vectors are generated only by C2 and JVMCI
-#endif
+ save_wide_vectors = false; // vectors are generated only by C2
+#endif // COMPILER2
// Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
@@ -234,13 +231,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for(int n = 0; n < KRegister::number_of_registers; n++) {
__ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
}
-#endif
+#endif // COMPILER2
}
} else {
if (VM_Version::supports_evex()) {
@@ -251,17 +248,17 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for(int n = 0; n < KRegister::number_of_registers; n++) {
__ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
}
-#endif
+#endif // COMPILER2
}
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (UseAPX) {
int base_addr = XSAVE_AREA_EGPRS;
off = 0;
@@ -269,7 +266,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ movq(Address(rsp, base_addr+(off++*8)), as_Register(n));
}
}
-#endif
+#endif // COMPILER2
__ vzeroupper();
if (frame::arg_reg_save_area_bytes != 0) {
@@ -342,7 +339,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (save_wide_vectors) {
// Save upper half of YMM registers(0..15)
off = ymm0_off;
@@ -363,7 +360,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
}
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
// %%% These should all be a waste but we'll keep things as they were for now
if (true) {
@@ -431,14 +428,14 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wi
__ addptr(rsp, frame::arg_reg_save_area_bytes);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (restore_wide_vectors) {
assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
}
#else
assert(!restore_wide_vectors, "vectors are generated only by C2");
-#endif
+#endif // COMPILER2
__ vzeroupper();
@@ -462,13 +459,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wi
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for (int n = 0; n < KRegister::number_of_registers; n++) {
__ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
}
-#endif
+#endif // COMPILER2
}
} else {
if (VM_Version::supports_evex()) {
@@ -479,17 +476,17 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wi
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for (int n = 0; n < KRegister::number_of_registers; n++) {
__ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
}
-#endif
+#endif // COMPILER2
}
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (UseAPX) {
int base_addr = XSAVE_AREA_EGPRS;
int off = 0;
@@ -497,7 +494,7 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wi
__ movq(as_Register(n), Address(rsp, base_addr+(off++*8)));
}
}
-#endif
+#endif // COMPILER2
// Recover CPU state
__ pop_FPU_state();
@@ -887,18 +884,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
// Pre-load the register-jump target early, to schedule it better.
__ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- // check if this call should be routed towards a specific entry point
- __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
- Label no_alternative_target;
- __ jcc(Assembler::equal, no_alternative_target);
- __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
- __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
- __ bind(no_alternative_target);
- }
-#endif // INCLUDE_JVMCI
-
// Now generate the shuffle code. Pick up all register args and move the
// rest through the floating point stack top.
for (int i = 0; i < total_args_passed; i++) {
@@ -2492,11 +2477,6 @@ void SharedRuntime::generate_deopt_blob() {
if (UseAPX) {
pad += 1024;
}
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- pad += 512; // Increase the buffer size when compiling for JVMCI
- }
-#endif
const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
if (blob != nullptr) {
@@ -2553,13 +2533,6 @@ void SharedRuntime::generate_deopt_blob() {
__ jmp(cont);
int reexecute_offset = __ pc() - start;
-#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (UseJVMCICompiler) {
- // JVMCI does not use this kind of deoptimization
- __ should_not_reach_here();
- }
-#endif
-
// Reexecute case
// return address is the pc describes what bci to do re-execute at
@@ -2569,39 +2542,6 @@ void SharedRuntime::generate_deopt_blob() {
__ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
__ jmp(cont);
-#if INCLUDE_JVMCI
- Label after_fetch_unroll_info_call;
- int implicit_exception_uncommon_trap_offset = 0;
- int uncommon_trap_offset = 0;
-
- if (EnableJVMCI) {
- implicit_exception_uncommon_trap_offset = __ pc() - start;
-
- __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
- __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD);
-
- uncommon_trap_offset = __ pc() - start;
-
- // Save everything in sight.
- RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
- // fetch_unroll_info needs to call last_java_frame()
- __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
-
- __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())));
- __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1);
-
- __ movl(r14, Deoptimization::Unpack_reexecute);
- __ mov(c_rarg0, r15_thread);
- __ movl(c_rarg2, r14); // exec mode
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
- oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
-
- __ reset_last_Java_frame(false);
-
- __ jmp(after_fetch_unroll_info_call);
- } // EnableJVMCI
-#endif // INCLUDE_JVMCI
-
int exception_offset = __ pc() - start;
// Prolog for exception case
@@ -2686,12 +2626,6 @@ void SharedRuntime::generate_deopt_blob() {
__ reset_last_Java_frame(false);
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- __ bind(after_fetch_unroll_info_call);
- }
-#endif
-
// Load UnrollBlock* into rdi
__ mov(rdi, rax);
@@ -2849,12 +2783,6 @@ void SharedRuntime::generate_deopt_blob() {
_deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
_deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
- _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
- }
-#endif
AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index b0612d21437..b64943fc4de 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -43,9 +43,6 @@
#include "opto/runtime.hpp"
#include "opto/c2_globals.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci_globals.hpp"
-#endif
// For a more detailed description of the stub routine structure
// see the comment in stubRoutines.hpp
@@ -4832,7 +4829,7 @@ void StubGenerator::generate_final_stubs() {
}
void StubGenerator::generate_compiler_stubs() {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
// Entry points that are C2 compiler specific.
@@ -4890,11 +4887,9 @@ void StubGenerator::generate_compiler_stubs() {
StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
-#ifdef COMPILER2
if ((UseAVX == 2) && EnableX86ECoreOpts && UseCountTrailingZerosInstruction) {
generate_string_indexof(StubRoutines::_string_indexof_array);
}
-#endif
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
@@ -4973,7 +4968,6 @@ void StubGenerator::generate_compiler_stubs() {
StubRoutines::_base64_decodeBlock = generate_base64_decodeBlock();
}
-#ifdef COMPILER2
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
@@ -5018,7 +5012,6 @@ void StubGenerator::generate_compiler_stubs() {
}
#endif // COMPILER2
-#endif // COMPILER2_OR_JVMCI
}
StubGenerator::StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
index d3823cb559f..360b0329d95 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@@ -161,7 +161,7 @@ class StubGenerator: public StubCodeGenerator {
void restore_argument_regs(BasicType type);
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
// Following rules apply to AVX3 optimized arraycopy stubs:
// - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
// for both special cases (various small block sizes) and aligned copy loop. This is the
@@ -216,7 +216,7 @@ class StubGenerator: public StubCodeGenerator {
void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
KRegister mask, Register length, Register index,
Register temp, int shift = Address::times_1, int offset = 0);
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
address generate_disjoint_byte_copy(address* entry);
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
index 5530e5325de..e7dc416a961 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,6 @@
#ifdef COMPILER2
#include "opto/c2_globals.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci_globals.hpp"
-#endif
#define __ _masm->
@@ -59,7 +56,7 @@ static void inc_counter_np(MacroAssembler* _masm, uint& counter, Register rscrat
__ incrementl(ExternalAddress((address)&counter), rscratch);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
static uint& get_profile_ctr(int shift) {
if (shift == 0) {
return SharedRuntime::_jbyte_array_copy_ctr;
@@ -72,7 +69,7 @@ static uint& get_profile_ctr(int shift) {
return SharedRuntime::_jlong_array_copy_ctr;
}
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
#endif // !PRODUCT
void StubGenerator::generate_arraycopy_stubs() {
@@ -505,7 +502,7 @@ void StubGenerator::copy_bytes_backward(Register from, Register dest,
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
// Note: Following rules apply to AVX3 optimized arraycopy stubs:-
// - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
@@ -1459,7 +1456,7 @@ void StubGenerator::copy64_avx(Register dst, Register src, Register index, XMMRe
}
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
// Arguments:
@@ -1483,11 +1480,11 @@ address StubGenerator::generate_disjoint_byte_copy(address* entry) {
StubId stub_id = StubId::stubgen_jbyte_disjoint_arraycopy_id;
// aligned is always false -- x86_64 always uses the unaligned code
const bool aligned = false;
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
-#endif
+#endif // COMPILER2
GrowableArray entries;
GrowableArray extras;
int expected_entry_count = (entry != nullptr ? 2 : 1);
@@ -1633,11 +1630,11 @@ address StubGenerator::generate_conjoint_byte_copy(address nooverlap_target, add
StubId stub_id = StubId::stubgen_jbyte_arraycopy_id;
// aligned is always false -- x86_64 always uses the unaligned code
const bool aligned = false;
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
-#endif
+#endif // COMPILER2
GrowableArray entries;
GrowableArray extras;
int expected_entry_count = (entry != nullptr ? 2 : 1);
@@ -1777,11 +1774,11 @@ address StubGenerator::generate_disjoint_short_copy(address *entry) {
StubId stub_id = StubId::stubgen_jshort_disjoint_arraycopy_id;
// aligned is always false -- x86_64 always uses the unaligned code
const bool aligned = false;
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
-#endif
+#endif // COMPILER2
GrowableArray entries;
GrowableArray extras;
int expected_entry_count = (entry != nullptr ? 2 : 1);
@@ -2004,11 +2001,11 @@ address StubGenerator::generate_conjoint_short_copy(address nooverlap_target, ad
StubId stub_id = StubId::stubgen_jshort_arraycopy_id;
// aligned is always false -- x86_64 always uses the unaligned code
const bool aligned = false;
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
-#endif
+#endif // COMPILER2
GrowableArray entries;
GrowableArray extras;
int expected_entry_count = (entry != nullptr ? 2 : 1);
@@ -2162,11 +2159,11 @@ address StubGenerator::generate_disjoint_int_oop_copy(StubId stub_id, address* e
}
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
-#endif
+#endif // COMPILER2
GrowableArray entries;
GrowableArray extras;
bool add_handlers = !is_oop && !aligned;
@@ -2344,11 +2341,11 @@ address StubGenerator::generate_conjoint_int_oop_copy(StubId stub_id, address no
}
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
-#endif
+#endif // COMPILER2
bool add_handlers = !is_oop && !aligned;
bool add_relocs = UseZGC && is_oop;
bool add_extras = add_handlers || add_relocs;
@@ -2527,11 +2524,11 @@ address StubGenerator::generate_disjoint_long_oop_copy(StubId stub_id, address *
}
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
-#endif
+#endif // COMPILER2
bool add_handlers = !is_oop && !aligned;
bool add_relocs = UseZGC && is_oop;
bool add_extras = add_handlers || add_relocs;
@@ -2710,11 +2707,11 @@ address StubGenerator::generate_conjoint_long_oop_copy(StubId stub_id, address n
}
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
-#endif
+#endif // COMPILER2
bool add_handlers = !is_oop && !aligned;
bool add_relocs = UseZGC && is_oop;
bool add_extras = add_handlers || add_relocs;
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
index 13b1c942213..c35a2a1bba6 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
@@ -631,6 +631,27 @@ address generate_kyberInverseNtt_avx512(StubGenerator *stubgen,
}
// Kyber multiply polynomials in the NTT domain.
+// Implements
+// static int implKyberNttMult(
+// short[] result, short[] ntta, short[] nttb, short[] zetas) {}
+//
+// The actual algorithm that is used here differs from the one in the Java
+// implementation, it uses Montgomery multiplications instead of Barrett
+// reduction, but the end result modulo MLKEM_Q is the same. This is the
+// Java equivalent of this intrinsic implementation:
+// static void implKyberNttMultJava(short[] result, short[] ntta, short[] nttb) {
+// for (int m = 0; m < ML_KEM_N / 2; m++) {
+// int a0 = ntta[2 * m];
+// int a1 = ntta[2 * m + 1];
+// int b0 = nttb[2 * m];
+// int b1 = nttb[2 * m + 1];
+// int r = montMul(a0, b0) +
+// montMul(montMul(a1, b1), MONT_ZETAS_FOR_NTT_MULT[m]);
+// result[2 * m] = (short) montMul(r, MONT_R_SQUARE_MOD_Q);
+// result[2 * m + 1] = (short) montMul(
+// (montMul(a0, b1) + montMul(a1, b0)), MONT_R_SQUARE_MOD_Q);
+// }
+// }
//
// result (short[256]) = c_rarg0
// ntta (short[256]) = c_rarg1
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp
index 58f81652a0c..edfe89f5af0 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026, Intel Corporation. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,472 +39,1107 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-#define xmm(i) as_XMMRegister(i)
-
-// Constants
+// Constant pool
ATTRIBUTE_ALIGNED(64) static const uint64_t round_consts_arr[24] = {
- 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
- 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
- 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
- 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
- 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
- 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
- 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
- 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
- };
+ 0x8000000080008008L, 0x0000000080000001L, 0x8000000000008080L,
+ 0x8000000080008081L, 0x800000008000000AL, 0x000000000000800AL,
+ 0x8000000000000080L, 0x8000000000008002L, 0x8000000000008003L,
+ 0x8000000000008089L, 0x800000000000008BL, 0x000000008000808BL,
+ 0x000000008000000AL, 0x0000000080008009L, 0x0000000000000088L,
+ 0x000000000000008AL, 0x8000000000008009L, 0x8000000080008081L,
+ 0x0000000080000001L, 0x000000000000808BL, 0x8000000080008000L,
+ 0x800000000000808AL, 0x0000000000008082L, 0x0000000000000001L
+};
-ATTRIBUTE_ALIGNED(64) static const uint64_t permsAndRots[] = {
- // permutation in combined rho and pi
- 9, 2, 11, 0, 1, 2, 3, 4, // step 1 and 3
- 8, 1, 9, 2, 11, 4, 12, 0, // step 2
- 9, 2, 10, 3, 11, 4, 12, 0, // step 4
- 8, 9, 2, 3, 4, 5, 6, 7, // step 5
- 0, 8, 9, 10, 15, 0, 0, 0, // step 6
- 4, 5, 8, 9, 6, 7, 10, 11, // step 7 and 8
- 0, 1, 2, 3, 13, 0, 0, 0, // step 9
- 2, 3, 0, 1, 11, 0, 0, 0, // step 10
- 4, 5, 6, 7, 14, 0, 0, 0, // step 11
- 14, 15, 12, 13, 4, 0, 0, 0, // step 12
- // size of rotations (after step 5)
- 1, 6, 62, 55, 28, 20, 27, 36,
- 3, 45, 10, 15, 25, 8, 39, 41,
- 44, 43, 21, 18, 2, 61, 56, 14,
- // rotation of row elements
- 12, 8, 9, 10, 11, 5, 6, 7,
- 9, 10, 11, 12, 8, 5, 6, 7
+ATTRIBUTE_ALIGNED(64) static const uint64_t avx2_round_consts[] = {
+ 0x8000000080008008L, 0x0L, 0x8000000080008008L, 0x0L,
+ 0x0000000080000001L, 0x0L, 0x0000000080000001L, 0x0L,
+ 0x8000000000008080L, 0x0L, 0x8000000000008080L, 0x0L,
+ 0x8000000080008081L, 0x0L, 0x8000000080008081L, 0x0L,
+ 0x800000008000000AL, 0x0L, 0x800000008000000AL, 0x0L,
+ 0x000000000000800AL, 0x0L, 0x000000000000800AL, 0x0L,
+ 0x8000000000000080L, 0x0L, 0x8000000000000080L, 0x0L,
+ 0x8000000000008002L, 0x0L, 0x8000000000008002L, 0x0L,
+ 0x8000000000008003L, 0x0L, 0x8000000000008003L, 0x0L,
+ 0x8000000000008089L, 0x0L, 0x8000000000008089L, 0x0L,
+ 0x800000000000008BL, 0x0L, 0x800000000000008BL, 0x0L,
+ 0x000000008000808BL, 0x0L, 0x000000008000808BL, 0x0L,
+ 0x000000008000000AL, 0x0L, 0x000000008000000AL, 0x0L,
+ 0x0000000080008009L, 0x0L, 0x0000000080008009L, 0x0L,
+ 0x0000000000000088L, 0x0L, 0x0000000000000088L, 0x0L,
+ 0x000000000000008AL, 0x0L, 0x000000000000008AL, 0x0L,
+ 0x8000000000008009L, 0x0L, 0x8000000000008009L, 0x0L,
+ 0x8000000080008081L, 0x0L, 0x8000000080008081L, 0x0L,
+ 0x0000000080000001L, 0x0L, 0x0000000080000001L, 0x0L,
+ 0x000000000000808BL, 0x0L, 0x000000000000808BL, 0x0L,
+ 0x8000000080008000L, 0x0L, 0x8000000080008000L, 0x0L,
+ 0x800000000000808AL, 0x0L, 0x800000000000808AL, 0x0L,
+ 0x0000000000008082L, 0x0L, 0x0000000000008082L, 0x0L,
+ 0x0000000000000001L, 0x0L, 0x0000000000000001L, 0x0L,
+};
+
+ATTRIBUTE_ALIGNED(64) static const uint64_t avx2_rotate_consts[] = {
+ // X0 X0 X1 X3 X1 X3 X2 X4 X2 X4
+ 1, 28, 1, 28, 62, 27, 62, 27, // A1A3, A2A4
+ 36, 41, 36, 41, 44, 55, 44, 55, 6, 20, 6, 20, // A5A15, A6A8, A7A9
+ 3, 18, 3, 18, 10, 25, 10, 25, 43, 39, 43, 39, // A10A20, A11A13, A12A14
+ 45, 21, 45, 21, 15, 8, 15, 8, // A16A18, A17A19
+ 2, 56, 2, 56, 61, 14, 61, 14, // A21A23, A22A24
+
+ //Offset = 384 = 12*4*8
+ // X0 X0 X1 X3 X1 X3 X2 X4 X2 X4
+ 64- 1, 64-28, 64- 1, 64-28, 64-62, 64-27, 64-62, 64-27,
+ 64-36, 64-41, 64-36, 64-41, 64-44, 64-55, 64-44, 64-55, 64- 6, 64-20, 64- 6, 64-20,
+ 64- 3, 64-18, 64- 3, 64-18, 64-10, 64-25, 64-10, 64-25, 64-43, 64-39, 64-43, 64-39,
+ 64-45, 64-21, 64-45, 64-21, 64-15, 64- 8, 64-15, 64- 8,
+ 64- 2, 64-56, 64- 2, 64-56, 64-61, 64-14, 64-61, 64-14,
};
static address round_constsAddr() {
return (address) round_consts_arr;
}
-static address permsAndRotsAddr() {
- return (address) permsAndRots;
+static address avx2_round_constsAddr() {
+ return (address) avx2_round_consts;
}
-// Arguments:
-//
-// Inputs:
+static address avx2_rotate_constsAddr() {
+ return (address) avx2_rotate_consts;
+}
+
+// Inputs (sha3_implCompress|sha3_implCompressMB):
// c_rarg0 - byte[] source+offset
// c_rarg1 - long[] SHA3.state
// c_rarg2 - int block_size
// c_rarg3 - int offset
// c_rarg4 - int limit
//
-static address generate_sha3_implCompress(StubId stub_id,
+// Inputs (double_keccak):
+// c_rarg0 - long[] SHA3.state1
+// c_rarg1 - long[] SHA3.state2
+//
+// Inputs (quad_keccak):
+// c_rarg0 - long[] SHA3.state1
+// c_rarg1 - long[] SHA3.state2
+// c_rarg2 - long[] SHA3.state3
+// c_rarg3 - long[] SHA3.state4
+//
+// Design notes:
+// With 32 AVX512 registers, we can fit the entire SHA3 state into first 25 registers
+// (using just one element out of each register!). The 'interesting' part of the
+// function is just a translation of Java code. This is (surprisingly) very
+// efficient; and also makes double_keccak and quad_keccak support fairly trivial.
+static address generate_sha3_implCompress_avx512(StubId stub_id,
StubGenerator *stubgen,
MacroAssembler *_masm) {
- bool multiBlock;
switch(stub_id) {
case StubId::stubgen_sha3_implCompress_id:
- multiBlock = false;
- break;
case StubId::stubgen_sha3_implCompressMB_id:
- multiBlock = true;
+ case StubId::stubgen_double_keccak_id:
+ case StubId::stubgen_quad_keccak_id:
break;
default:
ShouldNotReachHere();
}
+
int entry_count = StubInfo::entry_count(stub_id);
assert(entry_count == 1, "sanity check");
address start = stubgen->load_archive_data(stub_id);
if (start != nullptr) {
return start;
}
+
__ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
start = __ pc();
-
- const Register buf = c_rarg0;
- const Register state = c_rarg1;
- const Register block_size = c_rarg2;
- const Register ofs = c_rarg3;
-#ifndef _WIN64
- const Register limit = c_rarg4;
-#else
- const Address limit_mem(rbp, 6 * wordSize);
- const Register limit = r12;
-#endif
-
- const Register permsAndRots = r10;
- const Register round_consts = r11;
- const Register constant2use = r13;
- const Register roundsLeft = r14;
-
- Label sha3_loop;
- Label rounds24_loop, block104, block136, block144, block168;
-
__ enter();
- __ push_ppx(r12);
- __ push_ppx(r13);
- __ push_ppx(r14);
+ Register buf, offset, block_size, limit;
+ Register state1, state2, state3, state4, state5, state6, state7, state8;
+ Register roundsLeft = r10;
+ Register round_consts = r11;
+ int vector_len = Assembler::AVX_128bit;
+ bool multiBlock = stub_id == StubId::stubgen_sha3_implCompressMB_id;
+ bool parallelKeccak = true;
-#ifdef _WIN64
- // on win64, fill limit from stack position
- __ movptr(limit, limit_mem);
-#endif
+ switch (stub_id) {
+ case StubId::stubgen_quad_keccak_id:
+ vector_len = Assembler::AVX_256bit;
+ state1 = c_rarg0;
+ state2 = c_rarg1;
+ state3 = c_rarg2;
+ state4 = c_rarg3;
+ break;
+ case StubId::stubgen_double_keccak_id:
+ state1 = c_rarg0;
+ state2 = c_rarg1;
+ break;
+ default:
+ parallelKeccak = false;
+ buf = c_rarg0;
+ state1 = c_rarg1;
+ block_size = c_rarg2;
+ offset = c_rarg3;
+ #ifndef _WIN64
+ limit = c_rarg4;
+ #else
+ limit = rdi;
+ __ push_ppx(rdi);
+ __ movptr(limit, Address(rbp, 6 * wordSize));
+ #endif
+ }
- __ lea(permsAndRots, ExternalAddress(permsAndRotsAddr()));
+ __ movl(rax, 0x1);
+ __ kmovwl(k1, rax);
__ lea(round_consts, ExternalAddress(round_constsAddr()));
- // set up the masks
- __ movl(rax, 0x1F);
- __ kmovwl(k5, rax);
- __ kshiftrwl(k4, k5, 1);
- __ kshiftrwl(k3, k5, 2);
- __ kshiftrwl(k2, k5, 3);
- __ kshiftrwl(k1, k5, 4);
+ XMMRegister A0 = xmm0;
+ XMMRegister A1 = xmm1;
+ XMMRegister A2 = xmm2;
+ XMMRegister A3 = xmm3;
+ XMMRegister A4 = xmm4;
+ XMMRegister A5 = xmm5;
+ XMMRegister A6 = xmm6;
+ XMMRegister A7 = xmm7;
+ XMMRegister A8 = xmm8;
+ XMMRegister A9 = xmm9;
+ XMMRegister A10 = xmm10;
+ XMMRegister A11 = xmm11;
+ XMMRegister A12 = xmm12;
+ XMMRegister A13 = xmm13;
+ XMMRegister A14 = xmm14;
+ XMMRegister A15 = xmm15;
+ XMMRegister A16 = xmm16;
+ XMMRegister A17 = xmm17;
+ XMMRegister A18 = xmm18;
+ XMMRegister A19 = xmm19;
+ XMMRegister A20 = xmm20;
+ XMMRegister A21 = xmm21;
+ XMMRegister A22 = xmm22;
+ XMMRegister A23 = xmm23;
+ XMMRegister A24 = xmm24;
+ XMMRegister C0 = xmm25;
+ XMMRegister C1 = xmm26;
+ XMMRegister C2 = xmm27;
+ XMMRegister C3 = xmm28;
+ XMMRegister C4 = xmm29;
+ XMMRegister T0 = xmm30;
+ XMMRegister T1 = xmm31;
- // load the state
- for (int i = 0; i < 5; i++) {
- __ evmovdquq(xmm(i), k5, Address(state, i * 40), false, Assembler::AVX_512bit);
- }
-
- // load the permutation and rotation constants
- for (int i = 0; i < 15; i++) {
- __ evmovdquq(xmm(i + 17), Address(permsAndRots, i * 64), Assembler::AVX_512bit);
+ auto loadState = [=](XMMRegister X1, XMMRegister X2, int disp){
+ if (stub_id == StubId::stubgen_quad_keccak_id) {
+ __ vmovdqu(T0, Address(state1, disp), Assembler::AVX_128bit);
+ __ vmovdqu(T1, Address(state2, disp), Assembler::AVX_128bit);
+ __ vmovdqu(C0, Address(state3, disp), Assembler::AVX_128bit);
+ __ vmovdqu(C1, Address(state4, disp), Assembler::AVX_128bit);
+ __ vshufpd(X1, T0, T1, 0b00, Assembler::AVX_128bit);
+ __ vshufpd(X2, T0, T1, 0b11, Assembler::AVX_128bit);
+ __ vshufpd(T0, C0, C1, 0b00, Assembler::AVX_128bit);
+ __ vshufpd(T1, C0, C1, 0b11, Assembler::AVX_128bit);
+ __ vinserti128(X1, X1, T0, 1);
+ __ vinserti128(X2, X2, T1, 1);
+ } else if (stub_id == StubId::stubgen_double_keccak_id) {
+ __ vmovdqu(T0, Address(state1, disp), Assembler::AVX_128bit);
+ __ vmovdqu(T1, Address(state2, disp), Assembler::AVX_128bit);
+ __ vshufpd(X1, T0, T1, 0b00, Assembler::AVX_128bit);
+ __ vshufpd(X2, T0, T1, 0b11, Assembler::AVX_128bit);
+ } else {
+ // only care about values in first 64bit columns for non-parallel keccak
+ __ vmovdqu(X1, Address(state1, disp), Assembler::AVX_128bit);
+ __ vshufpd(X2, X1, X1, 0b1, Assembler::AVX_128bit);
+ }
+ };
+
+ loadState( A0, A1, 0 * 8);
+ loadState( A2, A3, 2 * 8);
+ loadState( A4, A5, 4 * 8);
+ loadState( A6, A7, 6 * 8);
+ loadState( A8, A9, 8 * 8);
+ loadState(A10, A11, 10 * 8);
+ loadState(A12, A13, 12 * 8);
+ loadState(A14, A15, 14 * 8);
+ loadState(A16, A17, 16 * 8);
+ loadState(A18, A19, 18 * 8);
+ loadState(A20, A21, 20 * 8);
+ loadState(A22, A23, 22 * 8);
+ __ movq(A24, Address(state1, 24 * 8));
+ if (stub_id == StubId::stubgen_quad_keccak_id) {
+ __ movq(T0, Address(state2, 24 * 8));
+ __ vshufpd(A24, A24, T0, 0b00, Assembler::AVX_128bit);
+
+ __ movq(T0, Address(state3, 24 * 8));
+ __ movq(T1, Address(state4, 24 * 8));
+ __ vshufpd(T0, T0, T1, 0b00, Assembler::AVX_128bit);
+ __ vinserti128(A24, A24, T0, 1);
+ } else if (stub_id == StubId::stubgen_double_keccak_id) {
+ __ movq(T0, Address(state2, 24 * 8));
+ __ vshufpd(A24, A24, T0, 0b00, Assembler::AVX_128bit);
}
+ Label rounds24_loop, multi_loop;
__ align(OptoLoopAlignment);
- __ BIND(sha3_loop);
+ __ BIND(multi_loop);
+ __ movl(roundsLeft, 23);
- // there will be 24 keccak rounds
- __ movl(roundsLeft, 24);
- // load round_constants base
- __ movptr(constant2use, round_consts);
+ if (!parallelKeccak) {
+ __ evpxorq( A0, k1, A0, Address(buf, 0 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A1, k1, A1, Address(buf, 1 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A2, k1, A2, Address(buf, 2 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A3, k1, A3, Address(buf, 3 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A4, k1, A4, Address(buf, 4 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A5, k1, A5, Address(buf, 5 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A6, k1, A6, Address(buf, 6 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A7, k1, A7, Address(buf, 7 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq( A8, k1, A8, Address(buf, 8 * 8), false, Assembler::AVX_128bit);
+ __ cmpl(block_size, 72);
+ __ jcc(Assembler::equal, rounds24_loop);
+ __ evpxorq( A9, k1, A9, Address(buf, 9 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A10, k1, A10, Address(buf, 10 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A11, k1, A11, Address(buf, 11 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A12, k1, A12, Address(buf, 12 * 8), false, Assembler::AVX_128bit);
+ __ cmpl(block_size, 104);
+ __ jcc(Assembler::equal, rounds24_loop);
+ __ evpxorq(A13, k1, A13, Address(buf, 13 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A14, k1, A14, Address(buf, 14 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A15, k1, A15, Address(buf, 15 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A16, k1, A16, Address(buf, 16 * 8), false, Assembler::AVX_128bit);
+ __ cmpl(block_size, 136);
+ __ jcc(Assembler::equal, rounds24_loop);
+ __ evpxorq(A17, k1, A17, Address(buf, 17 * 8), false, Assembler::AVX_128bit);
+ __ cmpl(block_size, 144);
+ __ jcc(Assembler::equal, rounds24_loop);
+ __ evpxorq(A18, k1, A18, Address(buf, 18 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A19, k1, A19, Address(buf, 19 * 8), false, Assembler::AVX_128bit);
+ __ evpxorq(A20, k1, A20, Address(buf, 20 * 8), false, Assembler::AVX_128bit);
+ }
- // load input: 72, 104, 136, 144 or 168 bytes
- // i.e. 5+4, 2*5+3, 3*5+2, 3*5+3 or 4*5+1 longs
- __ evpxorq(xmm0, k5, xmm0, Address(buf, 0), true, Assembler::AVX_512bit);
-
- // if(blockSize == 72) SHA3-512
- __ cmpl(block_size, 72);
- __ jcc(Assembler::notEqual, block104);
- __ evpxorq(xmm1, k4, xmm1, Address(buf, 40), true, Assembler::AVX_512bit);
- __ jmp(rounds24_loop);
-
- // if(blockSize == 104) SHA3-384
- __ BIND(block104);
- __ cmpl(block_size, 104);
- __ jcc(Assembler::notEqual, block136);
- __ evpxorq(xmm1, k5, xmm1, Address(buf, 40), true, Assembler::AVX_512bit);
- __ evpxorq(xmm2, k3, xmm2, Address(buf, 80), true, Assembler::AVX_512bit);
- __ jmp(rounds24_loop);
-
- // if(blockSize == 136) SHA3-256 and SHAKE256
- __ BIND(block136);
- __ cmpl(block_size, 136);
- __ jcc(Assembler::notEqual, block144);
- __ evpxorq(xmm1, k5, xmm1, Address(buf, 40), true, Assembler::AVX_512bit);
- __ evpxorq(xmm2, k5, xmm2, Address(buf, 80), true, Assembler::AVX_512bit);
- __ evpxorq(xmm3, k2, xmm3, Address(buf, 120), true, Assembler::AVX_512bit);
- __ jmp(rounds24_loop);
-
- // if(blockSize == 144) SHA3-224
- __ BIND(block144);
- __ cmpl(block_size, 144);
- __ jcc(Assembler::notEqual, block168);
- __ evpxorq(xmm1, k5, xmm1, Address(buf, 40), true, Assembler::AVX_512bit);
- __ evpxorq(xmm2, k5, xmm2, Address(buf, 80), true, Assembler::AVX_512bit);
- __ evpxorq(xmm3, k3, xmm3, Address(buf, 120), true, Assembler::AVX_512bit);
- __ jmp(rounds24_loop);
-
- // if(blockSize == 168) SHAKE128
- __ BIND(block168);
- __ evpxorq(xmm1, k5, xmm1, Address(buf, 40), true, Assembler::AVX_512bit);
- __ evpxorq(xmm2, k5, xmm2, Address(buf, 80), true, Assembler::AVX_512bit);
- __ evpxorq(xmm3, k5, xmm3, Address(buf, 120), true, Assembler::AVX_512bit);
- __ evpxorq(xmm4, k1, xmm4, Address(buf, 160), true, Assembler::AVX_512bit);
-
- // The 24 rounds of the keccak transformation.
- // The implementation closely follows the Java version, with the state
- // array "rows" in the lowest 5 64-bit slots of zmm0 - zmm4, i.e.
- // each row of the SHA3 specification is located in one zmm register.
__ align(OptoLoopAlignment);
__ BIND(rounds24_loop);
- __ subl(roundsLeft, 1);
- __ evmovdquw(xmm5, xmm0, Assembler::AVX_512bit);
- // vpternlogq(x, 150, y, z) does x = x ^ y ^ z
- __ vpternlogq(xmm5, 150, xmm1, xmm2, Assembler::AVX_512bit);
- __ vpternlogq(xmm5, 150, xmm3, xmm4, Assembler::AVX_512bit);
- // Now the "c row", i.e. c0-c4 are in zmm5.
- // Rotate each element of the c row by one bit to zmm6, call the
- // rotated version c'.
- __ evprolq(xmm6, xmm5, 1, Assembler::AVX_512bit);
- // Rotate elementwise the c row so that c4 becomes c0,
- // c0 becomes c1, etc.
- __ evpermt2q(xmm5, xmm30, xmm5, Assembler::AVX_512bit);
- // rotate elementwise the c' row so that c'0 becomes c'4,
- // c'1 becomes c'0, etc.
- __ evpermt2q(xmm6, xmm31, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm0, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm1, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm2, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm3, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm4, 150, xmm5, xmm6, Assembler::AVX_512bit);
- // Now the theta mapping has been finished.
+ // Step mapping Theta as defined in section 3.2.1.
+ // long c0 = a0^a5^a10^a15^a20;
+ // long c1 = a1^a6^a11^a16^a21;
+ // long c2 = a2^a7^a12^a17^a22;
+ // long c3 = a3^a8^a13^a18^a23;
+ // long c4 = a4^a9^a14^a19^a24;
- // Do the cyclical permutation of the 24 moving state elements
- // and the required rotations within each element (the combined
- // rho and pi steps).
- __ evpermt2q(xmm4, xmm17, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm3, xmm18, xmm2, Assembler::AVX_512bit);
- __ evpermt2q(xmm2, xmm17, xmm1, Assembler::AVX_512bit);
- __ evpermt2q(xmm1, xmm19, xmm0, Assembler::AVX_512bit);
- __ evpermt2q(xmm4, xmm20, xmm2, Assembler::AVX_512bit);
- // The 24 moving elements are now in zmm1, zmm3 and zmm4,
- // do the rotations now.
- __ evprolvq(xmm1, xmm1, xmm27, Assembler::AVX_512bit);
- __ evprolvq(xmm3, xmm3, xmm28, Assembler::AVX_512bit);
- __ evprolvq(xmm4, xmm4, xmm29, Assembler::AVX_512bit);
- __ evmovdquw(xmm2, xmm1, Assembler::AVX_512bit);
- __ evmovdquw(xmm5, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm0, xmm21, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm1, xmm22, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm5, xmm22, xmm2, Assembler::AVX_512bit);
- __ evmovdquw(xmm3, xmm1, Assembler::AVX_512bit);
- __ evmovdquw(xmm2, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm1, xmm23, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm2, xmm24, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm3, xmm25, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm4, xmm26, xmm5, Assembler::AVX_512bit);
- // The combined rho and pi steps are done.
+ __ evmovdquq(C0, A0, vector_len);
+ __ evmovdquq(C1, A1, vector_len);
+ __ evmovdquq(C2, A2, vector_len);
+ __ evmovdquq(C3, A3, vector_len);
+ __ evmovdquq(C4, A4, vector_len);
- // Do the chi step (the same operation on all 5 rows).
- // vpternlogq(x, 180, y, z) does x = x ^ (y & ~z).
- __ evpermt2q(xmm5, xmm31, xmm0, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm0, 180, xmm6, xmm5, Assembler::AVX_512bit);
+ __ vpternlogq(C0, 0x96, A5, A10, vector_len);
+ __ vpternlogq(C1, 0x96, A6, A11, vector_len);
+ __ vpternlogq(C2, 0x96, A7, A12, vector_len);
+ __ vpternlogq(C3, 0x96, A8, A13, vector_len);
+ __ vpternlogq(C4, 0x96, A9, A14, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm1, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm1, 180, xmm6, xmm5, Assembler::AVX_512bit);
+ __ vpternlogq(C0, 0x96, A15, A20, vector_len);
+ __ vpternlogq(C1, 0x96, A16, A21, vector_len);
+ __ vpternlogq(C2, 0x96, A17, A22, vector_len);
+ __ vpternlogq(C3, 0x96, A18, A23, vector_len);
+ __ vpternlogq(C4, 0x96, A19, A24, vector_len);
- // xor the round constant into a0 (the lowest 64 bits of zmm0
- __ evpxorq(xmm0, k1, xmm0, Address(constant2use, 0), true, Assembler::AVX_512bit);
- __ addptr(constant2use, 8);
+ // long d0 = c4 ^ Long.rotateLeft(c1, 1);
+ // long d1 = c0 ^ Long.rotateLeft(c2, 1);
+ // long d2 = c1 ^ Long.rotateLeft(c3, 1);
+ // long d3 = c2 ^ Long.rotateLeft(c4, 1);
+ // long d4 = c3 ^ Long.rotateLeft(c0, 1);
+ // a0 ^= d0; a1 ^= d1; a2 ^= d2; a3 ^= d3; a4 ^= d4;
+ // a5 ^= d0; a6 ^= d1; a7 ^= d2; a8 ^= d3; a9 ^= d4;
+ // a10 ^= d0; a11 ^= d1; a12 ^= d2; a13 ^= d3; a14 ^= d4;
+ // a15 ^= d0; a16 ^= d1; a17 ^= d2; a18 ^= d3; a19 ^= d4;
+ // a20 ^= d0; a21 ^= d1; a22 ^= d2; a23 ^= d3; a24 ^= d4;
- __ evpermt2q(xmm5, xmm31, xmm2, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm2, 180, xmm6, xmm5, Assembler::AVX_512bit);
+ __ evprolq(T0, C1, 1, vector_len);
+ __ vpternlogq(A0 , 0x96, T0, C4, vector_len);
+ __ vpternlogq(A5 , 0x96, T0, C4, vector_len);
+ __ vpternlogq(A10, 0x96, T0, C4, vector_len);
+ __ vpternlogq(A15, 0x96, T0, C4, vector_len);
+ __ vpternlogq(A20, 0x96, T0, C4, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm3, 180, xmm6, xmm5, Assembler::AVX_512bit);
+ __ evprolq(T0, C2, 1, vector_len);
+ __ vpternlogq(A1 , 0x96, T0, C0, vector_len);
+ __ vpternlogq(A6 , 0x96, T0, C0, vector_len);
+ __ vpternlogq(A11, 0x96, T0, C0, vector_len);
+ __ vpternlogq(A16, 0x96, T0, C0, vector_len);
+ __ vpternlogq(A21, 0x96, T0, C0, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm4, 180, xmm6, xmm5, Assembler::AVX_512bit);
- __ cmpl(roundsLeft, 0);
- __ jcc(Assembler::notEqual, rounds24_loop);
+ __ evprolq(T0, C3, 1, vector_len);
+ __ vpternlogq(A2 , 0x96, T0, C1, vector_len);
+ __ vpternlogq(A7 , 0x96, T0, C1, vector_len);
+ __ vpternlogq(A12, 0x96, T0, C1, vector_len);
+ __ vpternlogq(A17, 0x96, T0, C1, vector_len);
+ __ vpternlogq(A22, 0x96, T0, C1, vector_len);
+
+ __ evprolq(T0, C4, 1, vector_len);
+ __ vpternlogq(A3 , 0x96, T0, C2, vector_len);
+ __ vpternlogq(A8 , 0x96, T0, C2, vector_len);
+ __ vpternlogq(A13, 0x96, T0, C2, vector_len);
+ __ vpternlogq(A18, 0x96, T0, C2, vector_len);
+ __ vpternlogq(A23, 0x96, T0, C2, vector_len);
+
+ __ evprolq(T0, C0, 1, vector_len);
+ __ vpternlogq(A4 , 0x96, T0, C3, vector_len);
+ __ vpternlogq(A9 , 0x96, T0, C3, vector_len);
+ __ vpternlogq(A14, 0x96, T0, C3, vector_len);
+ __ vpternlogq(A19, 0x96, T0, C3, vector_len);
+ __ vpternlogq(A24, 0x96, T0, C3, vector_len);
+
+ // Merged Step mapping Rho (section 3.2.2) and Pi (section 3.2.3)
+ // long ay = Long.rotateLeft(a10, 3);
+ // a10 = Long.rotateLeft(a1, 1);
+ // a1 = Long.rotateLeft(a6, 44);
+ // a6 = Long.rotateLeft(a9, 20);
+ // a9 = Long.rotateLeft(a22, 61);
+ // a22 = Long.rotateLeft(a14, 39);
+ // a14 = Long.rotateLeft(a20, 18);
+ // a20 = Long.rotateLeft(a2, 62);
+ // a2 = Long.rotateLeft(a12, 43);
+ // a12 = Long.rotateLeft(a13, 25);
+ // a13 = Long.rotateLeft(a19, 8);
+ // a19 = Long.rotateLeft(a23, 56);
+ // a23 = Long.rotateLeft(a15, 41);
+ // a15 = Long.rotateLeft(a4, 27);
+ // a4 = Long.rotateLeft(a24, 14);
+ // a24 = Long.rotateLeft(a21, 2);
+ // a21 = Long.rotateLeft(a8, 55);
+ // a8 = Long.rotateLeft(a16, 45);
+ // a16 = Long.rotateLeft(a5, 36);
+ // a5 = Long.rotateLeft(a3, 28);
+ // a3 = Long.rotateLeft(a18, 21);
+ // a18 = Long.rotateLeft(a17, 15);
+ // a17 = Long.rotateLeft(a11, 10);
+ // a11 = Long.rotateLeft(a7, 6);
+ // a7 = ay;
+
+ __ evprolq(T0, A10, 3, vector_len);
+ __ evprolq(A10, A1, 1, vector_len);
+ __ evprolq(A1, A6, 44, vector_len);
+ __ evprolq(A6, A9, 20, vector_len);
+ __ evprolq(A9, A22, 61, vector_len);
+ __ evprolq(A22, A14, 39, vector_len);
+ __ evprolq(A14, A20, 18, vector_len);
+ __ evprolq(A20, A2, 62, vector_len);
+ __ evprolq(A2, A12, 43, vector_len);
+ __ evprolq(A12, A13, 25, vector_len);
+ __ evprolq(A13, A19, 8, vector_len);
+ __ evprolq(A19, A23, 56, vector_len);
+ __ evprolq(A23, A15, 41, vector_len);
+ __ evprolq(A15, A4, 27, vector_len);
+ __ evprolq(A4, A24, 14, vector_len);
+ __ evprolq(A24, A21, 2, vector_len);
+ __ evprolq(A21, A8, 55, vector_len);
+ __ evprolq(A8, A16, 45, vector_len);
+ __ evprolq(A16, A5, 36, vector_len);
+ __ evprolq(A5, A3, 28, vector_len);
+ __ evprolq(A3, A18, 21, vector_len);
+ __ evprolq(A18, A17, 15, vector_len);
+ __ evprolq(A17, A11, 10, vector_len);
+ __ evprolq(A11, A7, 6, vector_len);
+ __ evmovdquq(A7, T0, vector_len);
+
+ // // Step mapping Chi as defined in section 3.2.4.
+ // long tmp0 = a0;
+ // long tmp1 = a1;
+ // long tmp2 = a2;
+ // long tmp3 = a3;
+ // long tmp4 = a4;
+ // a0 = tmp0 ^ ((~tmp1) & tmp2);
+ // a1 = tmp1 ^ ((~tmp2) & tmp3);
+ // a2 = tmp2 ^ ((~tmp3) & tmp4);
+ // a3 = tmp3 ^ ((~tmp4) & tmp0);
+ // a4 = tmp4 ^ ((~tmp0) & tmp1);
+ __ evmovdquq(T0, A0, vector_len);
+ __ evmovdquq(T1, A1, vector_len);
+ __ vpternlogq(A0 , 0xD2, A1, A2, vector_len);
+ __ vpternlogq(A1 , 0xD2, A2, A3, vector_len);
+ __ vpternlogq(A2 , 0xD2, A3, A4, vector_len);
+ __ vpternlogq(A3 , 0xD2, A4, T0, vector_len);
+ __ vpternlogq(A4 , 0xD2, T0, T1, vector_len);
+
+ // Step mapping Iota as defined in section 3.2.5.
+ // a0 ^= RC_CONSTANTS[ir];
+ __ vpbroadcastq(T0, Address(round_consts, roundsLeft, Address::times_8), vector_len);
+ __ evpxorq(A0, A0, T0, vector_len);
+
+ // tmp0 = a5; tmp1 = a6; tmp2 = a7; tmp3 = a8; tmp4 = a9;
+ // a5 = tmp0 ^ ((~tmp1) & tmp2);
+ // a6 = tmp1 ^ ((~tmp2) & tmp3);
+ // a7 = tmp2 ^ ((~tmp3) & tmp4);
+ // a8 = tmp3 ^ ((~tmp4) & tmp0);
+ // a9 = tmp4 ^ ((~tmp0) & tmp1);
+ __ evmovdquq(T0, A5, vector_len);
+ __ evmovdquq(T1, A6, vector_len);
+ __ vpternlogq(A5 , 0xD2, A6, A7, vector_len);
+ __ vpternlogq(A6 , 0xD2, A7, A8, vector_len);
+ __ vpternlogq(A7 , 0xD2, A8, A9, vector_len);
+ __ vpternlogq(A8 , 0xD2, A9, T0, vector_len);
+ __ vpternlogq(A9 , 0xD2, T0, T1, vector_len);
+
+ // tmp0 = a10; tmp1 = a11; tmp2 = a12; tmp3 = a13; tmp4 = a14;
+ // a10 = tmp0 ^ ((~tmp1) & tmp2);
+ // a11 = tmp1 ^ ((~tmp2) & tmp3);
+ // a12 = tmp2 ^ ((~tmp3) & tmp4);
+ // a13 = tmp3 ^ ((~tmp4) & tmp0);
+ // a14 = tmp4 ^ ((~tmp0) & tmp1);
+ __ evmovdquq(T0, A10, vector_len);
+ __ evmovdquq(T1, A11, vector_len);
+ __ vpternlogq(A10 , 0xD2, A11, A12, vector_len);
+ __ vpternlogq(A11 , 0xD2, A12, A13, vector_len);
+ __ vpternlogq(A12 , 0xD2, A13, A14, vector_len);
+ __ vpternlogq(A13 , 0xD2, A14, T0, vector_len);
+ __ vpternlogq(A14 , 0xD2, T0, T1, vector_len);
+
+ // tmp0 = a15; tmp1 = a16; tmp2 = a17; tmp3 = a18; tmp4 = a19;
+ // a15 = tmp0 ^ ((~tmp1) & tmp2);
+ // a16 = tmp1 ^ ((~tmp2) & tmp3);
+ // a17 = tmp2 ^ ((~tmp3) & tmp4);
+ // a18 = tmp3 ^ ((~tmp4) & tmp0);
+ // a19 = tmp4 ^ ((~tmp0) & tmp1);
+ __ evmovdquq(T0, A15, vector_len);
+ __ evmovdquq(T1, A16, vector_len);
+ __ vpternlogq(A15 , 0xD2, A16, A17, vector_len);
+ __ vpternlogq(A16 , 0xD2, A17, A18, vector_len);
+ __ vpternlogq(A17 , 0xD2, A18, A19, vector_len);
+ __ vpternlogq(A18 , 0xD2, A19, T0, vector_len);
+ __ vpternlogq(A19 , 0xD2, T0, T1, vector_len);
+
+ // tmp0 = a20; tmp1 = a21; tmp2 = a22; tmp3 = a23; tmp4 = a24;
+ // a20 = tmp0 ^ ((~tmp1) & tmp2);
+ // a21 = tmp1 ^ ((~tmp2) & tmp3);
+ // a22 = tmp2 ^ ((~tmp3) & tmp4);
+ // a23 = tmp3 ^ ((~tmp4) & tmp0);
+ // a24 = tmp4 ^ ((~tmp0) & tmp1);
+ __ evmovdquq(T0, A20, vector_len);
+ __ evmovdquq(T1, A21, vector_len);
+ __ vpternlogq(A20 , 0xD2, A21, A22, vector_len);
+ __ vpternlogq(A21 , 0xD2, A22, A23, vector_len);
+ __ vpternlogq(A22 , 0xD2, A23, A24, vector_len);
+ __ vpternlogq(A23 , 0xD2, A24, T0, vector_len);
+ __ vpternlogq(A24 , 0xD2, T0, T1, vector_len);
+
+ __ decrementl(roundsLeft);
+ __ jcc(Assembler::positive, rounds24_loop);
if (multiBlock) {
__ addptr(buf, block_size);
- __ addl(ofs, block_size);
- __ cmpl(ofs, limit);
- __ jcc(Assembler::lessEqual, sha3_loop);
- __ movq(rax, ofs); // return ofs
+ __ addl(offset, block_size);
+ __ cmpl(offset, limit);
+ __ jcc(Assembler::lessEqual, multi_loop);
+ __ movq(rax, offset); // return offset
} else {
__ xorq(rax, rax); // return 0
}
- // store the state
- for (int i = 0; i < 5; i++) {
- __ evmovdquq(Address(state, i * 40), k5, xmm(i), true, Assembler::AVX_512bit);
+ auto storeState = [=](int disp, XMMRegister X1, XMMRegister X2){
+ if (stub_id == StubId::stubgen_quad_keccak_id) {
+ __ vshufpd(T0, X1, X2, 0b0000, Assembler::AVX_256bit);
+ __ vshufpd(T1, X1, X2, 0b1111, Assembler::AVX_256bit);
+ __ vmovdqu(Address(state1, disp), T0, Assembler::AVX_128bit);
+ __ vmovdqu(Address(state2, disp), T1, Assembler::AVX_128bit);
+ __ vextracti128(Address(state3, disp), T0, 1);
+ __ vextracti128(Address(state4, disp), T1, 1);
+ } else if (stub_id == StubId::stubgen_double_keccak_id) {
+ __ vshufpd(T0, X1, X2, 0b00, Assembler::AVX_128bit);
+ __ vshufpd(T1, X1, X2, 0b11, Assembler::AVX_128bit);
+ __ vmovdqu(Address(state1, disp), T0, Assembler::AVX_128bit);
+ __ vmovdqu(Address(state2, disp), T1, Assembler::AVX_128bit);
+ } else {
+ __ pextrq(Address(state1, disp), X1, 0);
+ __ pextrq(Address(state1, disp+8), X2, 0);
+ }
+ };
+
+ storeState( 0 * 8, A0, A1);
+ storeState( 2 * 8, A2, A3);
+ storeState( 4 * 8, A4, A5);
+ storeState( 6 * 8, A6, A7);
+ storeState( 8 * 8, A8, A9);
+ storeState(10 * 8, A10, A11);
+ storeState(12 * 8, A12, A13);
+ storeState(14 * 8, A14, A15);
+ storeState(16 * 8, A16, A17);
+ storeState(18 * 8, A18, A19);
+ storeState(20 * 8, A20, A21);
+ storeState(22 * 8, A22, A23);
+ __ pextrq(Address(state1, 24 * 8), A24, 0);
+ if (stub_id == StubId::stubgen_quad_keccak_id) {
+ __ pextrq(Address(state2, 24 * 8), A24, 1);
+ __ vextracti32x4(A24, A24, 1);
+ __ pextrq(Address(state3, 24 * 8), A24, 0);
+ __ pextrq(Address(state4, 24 * 8), A24, 1);
+ } else if (stub_id == StubId::stubgen_double_keccak_id) {
+ __ pextrq(Address(state2, 24 * 8), A24, 1);
}
- __ pop_ppx(r14);
- __ pop_ppx(r13);
- __ pop_ppx(r12);
+ // Cleanup
+ // Zero out zmm0-zmm31.
+ __ vzeroall();
+ for (XMMRegister rxmm = xmm16; rxmm->is_valid(); rxmm = rxmm->successor()) {
+ __ vpxorq(rxmm, rxmm, rxmm, vector_len);
+ }
+
+ if (!parallelKeccak) {
+#ifdef _WIN64
+ __ pop_ppx(rdi);
+#endif
+ }
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
-
// record the stub entry and end
stubgen->store_archive_data(stub_id, start, __ pc());
return start;
}
-// Inputs:
-// c_rarg0 - long[] state0
-// c_rarg1 - long[] state1
+// Inputs (sha3_implCompress|sha3_implCompressMB):
+// c_rarg0 - byte[] source+offset
+// c_rarg1 - long[] SHA3.state
+// c_rarg2 - int block_size
+// c_rarg3 - int offset
+// c_rarg4 - int limit
//
-// Performs two keccak() computations in parallel. The steps of the
-// two computations are executed interleaved.
-static address generate_double_keccak(StubGenerator *stubgen, MacroAssembler *_masm) {
- StubId stub_id = StubId::stubgen_double_keccak_id;
+// Inputs (double_keccak):
+// c_rarg0 - long[] SHA3.state1
+// c_rarg1 - long[] SHA3.state2
+//
+// Pseudocode:
+// loadStates
+// xor(buf, state, blocksize) IF !dualKeccak
+// shuffle(state)
+// LBL: {
+// KECCAK()
+// IF multiBlock {
+// if (buflen) break;
+// buf++, buflen--;
+// shuffle(buf)
+// xor(buf, state, blocksize)
+// goto LBL
+// }
+// }
+// storeStates
+//
+// KECCAK AVX2 design notes:
+// (1) - The algorithm was written to fit into 128-bit LANE
+// (i.e. hence parallelKeccak takes full 256bit register)
+// (2) - a lot of shuffles are inevitable, since there are not enough registers.
+// To save some shuffles, column1-column3 and column2-4 are placed into
+// the same 128-bit register. Column 0 is also grouped (by rows).
+// This means the SHA3 state fits into 12.5 regisers, leaving 3 registers as
+// temporaries. This is mostly sufficient, except for the Theta step, where we
+// have to buy two slots on the stack
+static address generate_sha3_implCompress_avx2(StubId stub_id,
+ StubGenerator *stubgen,
+ MacroAssembler *_masm) {
+ switch(stub_id) {
+ case StubId::stubgen_sha3_implCompress_id:
+ case StubId::stubgen_sha3_implCompressMB_id:
+ case StubId::stubgen_double_keccak_id:
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
int entry_count = StubInfo::entry_count(stub_id);
assert(entry_count == 1, "sanity check");
address start = stubgen->load_archive_data(stub_id);
if (start != nullptr) {
return start;
}
+
__ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
start = __ pc();
-
- const Register state0 = c_rarg0;
- const Register state1 = c_rarg1;
-
- const Register permsAndRots = c_rarg2;
- const Register round_consts = c_rarg3;
- const Register constant2use = r10;
- const Register roundsLeft = r11;
-
- Label rounds24_loop;
-
__ enter();
- __ lea(permsAndRots, ExternalAddress(permsAndRotsAddr()));
- __ lea(round_consts, ExternalAddress(round_constsAddr()));
+ bool multiBlock = stub_id == StubId::stubgen_sha3_implCompressMB_id;
+ bool parallelKeccak = stub_id == StubId::stubgen_double_keccak_id;
+ int vector_len, reg_size;
+ Register buf, offset, block_size, limit;
+ Register state1, state2;
+ Register roundsLeft = r10;
+ Register round_consts = r11;
+ Register rotate_consts;
- // set up the masks
- __ movl(rax, 0x1F);
- __ kmovwl(k5, rax);
- __ kshiftrwl(k4, k5, 1);
- __ kshiftrwl(k3, k5, 2);
- __ kshiftrwl(k2, k5, 3);
- __ kshiftrwl(k1, k5, 4);
-
- // load the states
- for (int i = 0; i < 5; i++) {
- __ evmovdquq(xmm(i), k5, Address(state0, i * 40), false, Assembler::AVX_512bit);
+ if (parallelKeccak) {
+ vector_len = Assembler::AVX_256bit;
+ reg_size = 32;
+ state1 = c_rarg0;
+ state2 = c_rarg1;
+ rotate_consts = r9;
+ } else {
+ vector_len = Assembler::AVX_128bit;
+ reg_size = 16;
+ buf = c_rarg0;
+ state1 = c_rarg1;
+ block_size = c_rarg2;
+ offset = c_rarg3;
+ #ifndef _WIN64
+ limit = c_rarg4;
+ #else
+ limit = rdi;
+ __ push_ppx(rdi);
+ __ movptr(limit, Address(rbp, 6 * wordSize));
+ #endif
+ rotate_consts = r12;
+ __ push_ppx(r12);
}
- for (int i = 0; i < 5; i++) {
- __ evmovdquq(xmm(10 + i), k5, Address(state1, i * 40), false, Assembler::AVX_512bit);
+ __ push_ppx(rbp);
+ __ movq(rbp, rsp);
+ __ andq(rsp, -32);
+ __ subptr(rsp, reg_size*2);
+
+ // Registers for memory load
+ // Notice the careful 'missalignment' of pairs.
+ // This helps XOR for all blocksizes
+ XMMRegister a0a1, _a2, a3a4;
+ XMMRegister a5a6, a7a8, _a9;
+ XMMRegister a10a11, _a12, a13a14;
+ XMMRegister a15a16, _a17, a18a19;
+ XMMRegister _a20, a21a22, a23a24;
+
+ // Registers for rounds24
+ XMMRegister A0_, A1A3, A2A4;
+ XMMRegister A5A15, A6A8, A7A9;
+ XMMRegister A10A20, A11A13, A12A14;
+ XMMRegister A16A18, A17A19;
+ XMMRegister A21A23, A22A24;
+ XMMRegister C0_, C1C3, C2C4;
+
+ XMMRegister T0, T1, T2, T3, T4, tmp1;
+
+ // (Very Careful) Register allocation
+ a0a1 = A0_ = xmm0;
+ tmp1 = A1A3 = xmm1;
+ _a2 = A2A4 = xmm2;
+ a3a4 = A5A15 = xmm3;
+ a5a6 = A6A8 = xmm4;
+ a7a8 = A7A9 = xmm5;
+ _a9 = A10A20 = xmm6;
+ a10a11 = A11A13 = xmm7;
+ _a12 = A12A14 = xmm8;
+ a13a14 = A16A18 = xmm9;
+ a15a16 = A17A19 = xmm10;
+ _a17 = A21A23 = T3 = xmm11;
+ a18a19 = A22A24 = T4 = xmm12;
+ _a20 = C0_ = T0 = xmm13;
+ a21a22 = C1C3 = T1 = xmm14;
+ a23a24 = C2C4 = T2 = xmm15;
+
+ __ lea(round_consts, ExternalAddress(avx2_round_constsAddr()));
+ __ lea(rotate_consts, ExternalAddress(avx2_rotate_constsAddr()));
+
+ auto loadState = [=](XMMRegister dst, int disp){
+ __ vmovdqu(dst, Address(state1, disp), Assembler::AVX_128bit);
+ if (parallelKeccak) {
+ __ vinserti128(dst, dst, Address(state2, disp), 1);
+ }
+ };
+ // load the state
+ loadState(a0a1, 0 * 8);
+ loadState(_a2, 1 * 8);
+ loadState(a3a4, 3 * 8);
+ loadState(a5a6, 5 * 8);
+ loadState(a7a8, 7 * 8);
+ loadState(_a9, 8 * 8);
+ loadState(a10a11, 10 * 8);
+ loadState(_a12, 11 * 8);
+ loadState(a13a14, 13 * 8);
+ loadState(a15a16, 15 * 8);
+ loadState(_a17 , 16 * 8);
+ loadState(a18a19, 18 * 8);
+ loadState(_a20 , 19 * 8);
+ loadState(a21a22, 21 * 8);
+ loadState(a23a24, 23 * 8);
+
+ if (!parallelKeccak) {
+ Label buffer_done;
+ // load input from buffer: 72, 104, 136, 144 or 168 bytes
+ // i.e. 5+4, 2*5+3, 3*5+2, 3*5+3 or 4*5+1 longs
+ __ vpxor(a0a1, a0a1, Address(buf, 0 * 8), vector_len);
+ __ vpxor(_a2, _a2, Address(buf, 1 * 8), vector_len);
+ __ vpxor(a3a4, a3a4, Address(buf, 3 * 8), vector_len);
+ __ vpxor(a5a6, a5a6, Address(buf, 5 * 8), vector_len);
+ __ vpxor(a7a8, a7a8, Address(buf, 7 * 8), vector_len);
+ __ cmpl(block_size, 72);
+ __ jcc(Assembler::equal, buffer_done);
+ __ vpxor(_a9, _a9, Address(buf, 8 * 8), vector_len);
+ __ vpxor(a10a11, a10a11, Address(buf, 10 * 8), vector_len);
+ __ vpxor(_a12, _a12, Address(buf, 11 * 8), vector_len);
+ __ cmpl(block_size, 104);
+ __ jcc(Assembler::equal, buffer_done);
+ __ vpxor(a13a14, a13a14, Address(buf, 13 * 8), vector_len);
+ __ vpxor(a15a16, a15a16, Address(buf, 15 * 8), vector_len);
+ __ cmpl(block_size, 136);
+ __ jcc(Assembler::equal, buffer_done);
+ __ vpxor(_a17, _a17, Address(buf, 16 * 8), vector_len);
+ __ cmpl(block_size, 144);
+ __ jcc(Assembler::equal, buffer_done);
+ __ vpxor(a18a19, a18a19, Address(buf, 18 * 8), vector_len);
+ __ vpxor(_a20, _a20, Address(buf, 19 * 8), vector_len);
+ __ BIND(buffer_done);
}
- // load the permutation and rotation constants
-
- for (int i = 0; i < 15; i++) {
- __ evmovdquq(xmm(17 + i), Address(permsAndRots, i * 64), Assembler::AVX_512bit);
- }
+ // Shuffle state registers for the round24 loop
+ __ vshufpd( A1A3, a0a1, a3a4, 0b0101, vector_len);
+ __ vshufpd( A2A4, _a2, a3a4, 0b1111, vector_len);
+ __ vshufpd( A5A15, a5a6, a15a16, 0b0000, vector_len);
+ __ vshufpd( A6A8, a5a6, a7a8, 0b1111, vector_len);
+ __ vshufpd( A7A9, a7a8, _a9, 0b1010, vector_len);
+ __ vshufpd(A10A20, a10a11, _a20, 0b1010, vector_len);
+ __ vshufpd(A11A13, a10a11, a13a14, 0b0101, vector_len);
+ __ vshufpd(A12A14, _a12, a13a14, 0b1111, vector_len);
+ __ vshufpd(A16A18, a15a16, a18a19, 0b0101, vector_len);
+ __ vshufpd(A17A19, _a17, a18a19, 0b1111, vector_len);
+ __ vshufpd(A21A23, a21a22, a23a24, 0b0000, vector_len);
+ __ vshufpd(A22A24, a21a22, a23a24, 0b1111, vector_len);
// there will be 24 keccak rounds
- // The same operations as the ones in generate_sha3_implCompress are
- // performed, but in parallel for two states: one in regs z0-z5, using z6
- // as the scratch register and the other in z10-z15, using z16 as the
- // scratch register.
- // The permutation and rotation constants, that are loaded into z17-z31,
- // are shared between the two computations.
- __ movl(roundsLeft, 24);
- // load round_constants base
- __ movptr(constant2use, round_consts);
-
+ // also use roundsLeft as index into avx2_round_consts array
+ __ movl(roundsLeft, 23*4);
+ Label rounds24_loop;
__ align(OptoLoopAlignment);
__ BIND(rounds24_loop);
- __ subl( roundsLeft, 1);
- __ evmovdquw(xmm5, xmm0, Assembler::AVX_512bit);
- __ evmovdquw(xmm15, xmm10, Assembler::AVX_512bit);
- __ vpternlogq(xmm5, 150, xmm1, xmm2, Assembler::AVX_512bit);
- __ vpternlogq(xmm15, 150, xmm11, xmm12, Assembler::AVX_512bit);
- __ vpternlogq(xmm5, 150, xmm3, xmm4, Assembler::AVX_512bit);
- __ vpternlogq(xmm15, 150, xmm13, xmm14, Assembler::AVX_512bit);
- __ evprolq(xmm6, xmm5, 1, Assembler::AVX_512bit);
- __ evprolq(xmm16, xmm15, 1, Assembler::AVX_512bit);
- __ evpermt2q(xmm5, xmm30, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm30, xmm15, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm6, Assembler::AVX_512bit);
- __ evpermt2q(xmm16, xmm31, xmm16, Assembler::AVX_512bit);
- __ vpternlogq(xmm0, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm10, 150, xmm15, xmm16, Assembler::AVX_512bit);
- __ vpternlogq(xmm1, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm11, 150, xmm15, xmm16, Assembler::AVX_512bit);
- __ vpternlogq(xmm2, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm12, 150, xmm15, xmm16, Assembler::AVX_512bit);
- __ vpternlogq(xmm3, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm13, 150, xmm15, xmm16, Assembler::AVX_512bit);
- __ vpternlogq(xmm4, 150, xmm5, xmm6, Assembler::AVX_512bit);
- __ vpternlogq(xmm14, 150, xmm15, xmm16, Assembler::AVX_512bit);
- __ evpermt2q(xmm4, xmm17, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm14, xmm17, xmm13, Assembler::AVX_512bit);
- __ evpermt2q(xmm3, xmm18, xmm2, Assembler::AVX_512bit);
- __ evpermt2q(xmm13, xmm18, xmm12, Assembler::AVX_512bit);
- __ evpermt2q(xmm2, xmm17, xmm1, Assembler::AVX_512bit);
- __ evpermt2q(xmm12, xmm17, xmm11, Assembler::AVX_512bit);
- __ evpermt2q(xmm1, xmm19, xmm0, Assembler::AVX_512bit);
- __ evpermt2q(xmm11, xmm19, xmm10, Assembler::AVX_512bit);
- __ evpermt2q(xmm4, xmm20, xmm2, Assembler::AVX_512bit);
- __ evpermt2q(xmm14, xmm20, xmm12, Assembler::AVX_512bit);
- __ evprolvq(xmm1, xmm1, xmm27, Assembler::AVX_512bit);
- __ evprolvq(xmm11, xmm11, xmm27, Assembler::AVX_512bit);
- __ evprolvq(xmm3, xmm3, xmm28, Assembler::AVX_512bit);
- __ evprolvq(xmm13, xmm13, xmm28, Assembler::AVX_512bit);
- __ evprolvq(xmm4, xmm4, xmm29, Assembler::AVX_512bit);
- __ evprolvq(xmm14, xmm14, xmm29, Assembler::AVX_512bit);
- __ evmovdquw(xmm2, xmm1, Assembler::AVX_512bit);
- __ evmovdquw(xmm12, xmm11, Assembler::AVX_512bit);
- __ evmovdquw(xmm5, xmm3, Assembler::AVX_512bit);
- __ evmovdquw(xmm15, xmm13, Assembler::AVX_512bit);
- __ evpermt2q(xmm0, xmm21, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm10, xmm21, xmm14, Assembler::AVX_512bit);
- __ evpermt2q(xmm1, xmm22, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm11, xmm22, xmm13, Assembler::AVX_512bit);
- __ evpermt2q(xmm5, xmm22, xmm2, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm22, xmm12, Assembler::AVX_512bit);
- __ evmovdquw(xmm3, xmm1, Assembler::AVX_512bit);
- __ evmovdquw(xmm13, xmm11, Assembler::AVX_512bit);
- __ evmovdquw(xmm2, xmm5, Assembler::AVX_512bit);
- __ evmovdquw(xmm12, xmm15, Assembler::AVX_512bit);
- __ evpermt2q(xmm1, xmm23, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm11, xmm23, xmm14, Assembler::AVX_512bit);
- __ evpermt2q(xmm2, xmm24, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm12, xmm24, xmm14, Assembler::AVX_512bit);
- __ evpermt2q(xmm3, xmm25, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm13, xmm25, xmm14, Assembler::AVX_512bit);
- __ evpermt2q(xmm4, xmm26, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm14, xmm26, xmm15, Assembler::AVX_512bit);
+ __ vmovdqa(Address(rsp, 0), A21A23, vector_len);
+ __ vmovdqa(Address(rsp, reg_size), A22A24, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm0, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm31, xmm10, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm16, xmm31, xmm15, Assembler::AVX_512bit);
- __ vpternlogq(xmm0, 180, xmm6, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm10, 180, xmm16, xmm15, Assembler::AVX_512bit);
+ // Step mapping Theta as defined in section 3.2.1.
+ // long c0 = a0^a5^a10^a15^a20;
+ // long c1 = a1^a6^a11^a16^a21;
+ // long c2 = a2^a7^a12^a17^a22;
+ // long c3 = a3^a8^a13^a18^a23;
+ // long c4 = a4^a9^a14^a19^a24;
+ __ vpxor( C0_, A5A15, A10A20, vector_len);
+ __ vpxor(A21A23, A21A23, A16A18, vector_len);
+ __ vpxor(A22A24, A22A24, A17A19, vector_len);
+ __ vshufpd(C1C3, C0_, C0_, 0b1111, vector_len);
+ __ vpxor( C0_, C0_, A0_, vector_len);
+ __ vpxor( C0_, C0_, C1C3, vector_len);
+ __ vpxor( C1C3, A1A3, A6A8, vector_len);
+ __ vpxor( C2C4, A2A4, A7A9, vector_len);
+ __ vpxor(A21A23, A21A23, A11A13, vector_len);
+ __ vpxor(A22A24, A22A24, A12A14, vector_len);
+ __ vpxor( C1C3, C1C3, A21A23, vector_len);
+ __ vpxor( C2C4, C2C4, A22A24, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm1, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm31, xmm11, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm16, xmm31, xmm15, Assembler::AVX_512bit);
- __ vpternlogq(xmm1, 180, xmm6, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm11, 180, xmm16, xmm15, Assembler::AVX_512bit);
+ // long d0 = c4 ^ Long.rotateLeft(c1, 1);
+ // long d1 = c0 ^ Long.rotateLeft(c2, 1);
+ // long d2 = c1 ^ Long.rotateLeft(c3, 1);
+ // long d3 = c2 ^ Long.rotateLeft(c4, 1);
+ // long d4 = c3 ^ Long.rotateLeft(c0, 1);
+ // C4_ | C0C2 | C1C3
+ // C1_ | C2C4 | C3C0 (rot1)
+ // -----+--------+------- (xor)
+ // C0C0 | A21A23 | A22A24
- __ evpxorq(xmm0, k1, xmm0, Address(constant2use, 0), true, Assembler::AVX_512bit);
- __ evpxorq(xmm10, k1, xmm10, Address(constant2use, 0), true, Assembler::AVX_512bit);
- __ addptr(constant2use, 8);
+ // Even Column: A22A24 (Overloaded with T4)
+ __ vshufpd(T3, C1C3, C0_, 0b0101, vector_len); //C3C0
+ __ vpsrlq(T4, T3, 63, vector_len);
+ __ vpsllq(T3, T3, 1, vector_len);
+ __ vpor(T3, T3, T4, vector_len);
+ __ vpxor(A22A24/*T4*/, T3, C1C3, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm2, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm31, xmm12, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm16, xmm31, xmm15, Assembler::AVX_512bit);
- __ vpternlogq(xmm2, 180, xmm6, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm12, 180, xmm16, xmm15, Assembler::AVX_512bit);
+ // First Column C0C0
+ __ vpsllq(T3, C1C3, 1, vector_len);
+ __ vpsrlq(C1C3, C1C3, 63, vector_len);
+ __ vpor(C1C3, T3, C1C3, vector_len); // C1_
+ __ vshufpd(T3, C2C4, C2C4, 0b1111, vector_len); //C4_
+ __ vpxor(C1C3, T3, C1C3, vector_len);
- __ evpermt2q(xmm5, xmm31, xmm3, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm31, xmm13, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm16, xmm31, xmm15, Assembler::AVX_512bit);
- __ vpternlogq(xmm3, 180, xmm6, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm13, 180, xmm16, xmm15, Assembler::AVX_512bit);
- __ evpermt2q(xmm5, xmm31, xmm4, Assembler::AVX_512bit);
- __ evpermt2q(xmm15, xmm31, xmm14, Assembler::AVX_512bit);
- __ evpermt2q(xmm6, xmm31, xmm5, Assembler::AVX_512bit);
- __ evpermt2q(xmm16, xmm31, xmm15, Assembler::AVX_512bit);
- __ vpternlogq(xmm4, 180, xmm6, xmm5, Assembler::AVX_512bit);
- __ vpternlogq(xmm14, 180, xmm16, xmm15, Assembler::AVX_512bit);
- __ cmpl(roundsLeft, 0);
- __ jcc(Assembler::notEqual, rounds24_loop);
+ // Odd Column: A21A23 (Overloaded with T3)
+ __ vshufpd(C0_, C0_, C2C4, 0b0000, vector_len); //C0C2
+ __ vpsllq(T3, C2C4, 1, vector_len);
+ __ vpsrlq(C2C4, C2C4, 63, vector_len);
+ __ vpor(C2C4, T3, C2C4, vector_len); // C2C4
+ __ vpxor(A21A23/*T3*/, C2C4, C0_, vector_len);
- // store the states
- for (int i = 0; i < 5; i++) {
- __ evmovdquq(Address(state0, i * 40), k5, xmm(i), true, Assembler::AVX_512bit);
+ __ vshufpd(C0_, C1C3, C1C3, 0b0000, vector_len); //C0C0
+
+ // a0 ^= d0; a1 ^= d1; a2 ^= d2; a3 ^= d3; a4 ^= d4;
+ // a5 ^= d0; a6 ^= d1; a7 ^= d2; a8 ^= d3; a9 ^= d4;
+ // a10 ^= d0; a11 ^= d1; a12 ^= d2; a13 ^= d3; a14 ^= d4;
+ // a15 ^= d0; a16 ^= d1; a17 ^= d2; a18 ^= d3; a19 ^= d4;
+ // a20 ^= d0; a21 ^= d1; a22 ^= d2; a23 ^= d3; a24 ^= d4;
+ __ vpxor( A2A4, A2A4, A22A24, vector_len);
+ __ vpxor( A7A9, A7A9, A22A24, vector_len);
+ __ vpxor(A12A14, A12A14, A22A24, vector_len);
+ __ vpxor(A17A19, A17A19, A22A24, vector_len);
+ __ vpxor(A22A24, A22A24, Address(rsp, reg_size), vector_len); // Restore A22A24 from stack
+
+ __ vpxor( A1A3, A1A3, A21A23, vector_len);
+ __ vpxor( A6A8, A6A8, A21A23, vector_len);
+ __ vpxor(A11A13, A11A13, A21A23, vector_len);
+ __ vpxor(A16A18, A16A18, A21A23, vector_len);
+ __ vpxor(A21A23, A21A23, Address(rsp, 0), vector_len); // Restore A21A23 from stack
+
+ __ vpxor( A0_, A0_, C0_, vector_len);
+ __ vpxor( A5A15, A5A15, C0_, vector_len);
+ __ vpxor(A10A20, A10A20, C0_, vector_len);
+
+ // Rho and Pi steps
+ // A0_= a0,0|x A1A3=a6,44|a18,21 A2A4=a12,43|a24,14
+ // A5A15=a3,28|a4,27 A6A8=a9,20|a16,45 A7A9= a10,3|a22,61
+ // A10A20= a1,1|a2,62 A11A13= a7,6|a19,8 A12A14=a13,25|a20,18
+ // A16A18=a5,36|a17,15 A17A19=a11,10|a23,56
+ // A21A23=a8,55|a15,41 A22A24=a14,39|a21,2
+ auto rotate = [=](XMMRegister dst, int disp){
+ __ vpsllvq(T0, dst, Address(rotate_consts, disp*4*8), vector_len);
+ __ vpsrlvq(dst, dst, Address(rotate_consts, 384 + disp*4*8), vector_len);
+ __ vpor(dst, T0, dst, vector_len);
+ };
+
+ rotate( A1A3, 0);
+ rotate( A2A4, 1);
+ rotate( A5A15, 2);
+ rotate( A6A8, 3);
+ rotate( A7A9, 4);
+ rotate(A10A20, 5);
+ rotate(A11A13, 6);
+ rotate(A12A14, 7);
+ rotate(A16A18, 8);
+ rotate(A17A19, 9);
+ rotate(A21A23, 10);
+ rotate(A22A24, 11);
+
+ __ vmovdqu(T0, A22A24, vector_len);
+ __ vmovdqu(T1, A17A19, vector_len);
+ __ vmovdqu(T2, A1A3, vector_len);
+ __ vshufpd(A0_, A0_, A1A3, 0b0000, vector_len); // A0_ = A0A1
+ __ vshufpd(A22A24, A12A14, A21A23, 0b0101, vector_len); // A22A24 = a14a21
+ __ vshufpd(A17A19, A11A13, A21A23, 0b1010, vector_len); // A17A19 = a11a23
+ __ vshufpd(A1A3, A6A8, A16A18, 0b1010, vector_len); // A1A3 = a6a18
+ __ vshufpd(A21A23, A6A8, A5A15 , 0b1111, vector_len); // A21A23 = a8a15
+ __ vshufpd(A6A8, A7A9, A16A18, 0b0101, vector_len); // A6A8 = a9a16
+ __ vshufpd(A16A18, A5A15, T1 /*A17A19*/, 0b0000, vector_len); // A16A18 = a5a17
+ __ vshufpd(A5A15, T2 /*A1A3*/, A2A4, 0b1111, vector_len); // A5A15 = a3a4
+ __ vmovdqu(T2, A10A20, vector_len);
+ __ vshufpd(A10A20, A0_/*A0A1*/, A2A4, 0b0101, vector_len); // A10A20 = a1a2
+ __ vshufpd(A2A4, A12A14, T0 /*A22A24*/, 0b1010, vector_len); // A2A4 = a12a24
+ __ vshufpd(A12A14, A11A13, T2 /*A10A20*/, 0b1111, vector_len); // A12A14 = a13a20
+ __ vshufpd(A11A13, A7A9, T1 /*A17A19*/, 0b1010, vector_len); // A11A13 = a7a19
+ __ vshufpd(A7A9, T2 /*A10A20*/, T0 /*A22A24*/, 0b0000, vector_len); // A7A9 = a10a22
+
+ // Chi step - First row
+ // ^=A0_ A1A3 A2A4
+ // ~A1_ A2A4 A3A0
+ // &A2_ A3A0 A4A1
+ __ vshufpd(T0/*A3A0*/, A1A3, A0_, 0b0101, vector_len);
+ __ vshufpd(T1/*A4A1*/, A2A4, A1A3, 0b0101, vector_len);
+ __ vpandn( T2, A2A4, T0/*A3A0*/, vector_len);
+ __ vpandn( T1, T0/*A3A0*/, T1/*A4A1*/, vector_len);
+ __ vpandn( T0, A1A3, A2A4, vector_len);
+ __ vpxor(A2A4, A2A4, T1, vector_len);
+ __ vpxor(A1A3, A1A3, T2, vector_len);
+ __ vpxor(A0_, A0_, T0, vector_len);
+
+ // Step mapping Iota as defined in section 3.2.5.
+ // a0 ^= RC_CONSTANTS[ir];
+ __ vpxor(A0_, A0_, Address(round_consts, roundsLeft, Address::times_8), vector_len);
+
+ // Chi step - Second&Fourth, Third&Fifth rows
+ // ^= X5X15 X6X8 X7X9 X16X18 X17X19
+ // ~ X6X16 X7X9 X8X5 X17X19 X18X15
+ // & X7X17 X8X5 X9X6 X18X15 X19X16
+ auto chi_row_pair = [=](XMMRegister X5X15, XMMRegister X6X8, XMMRegister X7X9,
+ XMMRegister X16X18, XMMRegister X17X19){
+ // X6X8 && X7X9
+ __ vshufpd(T0/*X8X5*/, X6X8, X5X15, 0b0101, vector_len);
+ __ vshufpd(T1/*X9X6*/, X7X9, X6X8, 0b0101, vector_len);
+ __ vpandn( T2, X7X9, T0/*X8X5*/, vector_len);
+ __ vpandn( T1, T0/*X8X5*/, T1/*X9X6*/, vector_len);
+ __ vshufpd(T0/*X6X16*/, X6X8, X16X18, 0b0000, vector_len);
+ __ vpxor(X6X8, X6X8, T2, vector_len);
+ __ vshufpd(T2/*X7X17*/, X7X9, X17X19, 0b0000, vector_len);
+ __ vpxor(X7X9, X7X9, T1, vector_len);
+
+ // X5X15
+ __ vpandn( T2, T0/*X6X16*/, T2/*X7X17*/, vector_len);
+ __ vshufpd(T0/*X18X15*/, X16X18, X5X15, 0b1111, vector_len);
+ __ vpxor(X5X15, X5X15, T2, vector_len);
+
+ // X16X18 && X17X19
+ __ vshufpd(T1/*X19X16*/, X17X19, X16X18, 0b0101, vector_len);
+ __ vpandn( T2, X17X19, T0/*X18X15*/, vector_len);
+ __ vpandn( T1, T0/*X18X15*/, T1/*X19X16*/, vector_len);
+ __ vpxor(X16X18, X16X18, T2, vector_len);
+ __ vpxor(X17X19, X17X19, T1, vector_len);
+ };
+
+ chi_row_pair(A5A15, A6A8, A7A9, A16A18, A17A19);
+ chi_row_pair(A10A20, A11A13, A12A14, A21A23, A22A24);
+
+ __ decrementl(roundsLeft, 4);
+ __ jcc(Assembler::positive, rounds24_loop);
+
+ if (multiBlock) {
+ Label multi_done, block104, block136, block144, block168;
+ __ movl(roundsLeft, 23*4);
+ __ addptr(buf, block_size);
+ __ addl(offset, block_size);
+ __ cmpl(offset, limit);
+ __ jcc(Assembler::greater, multi_done);
+
+ auto buf_even_odd = [=](int row, XMMRegister X1X3, XMMRegister X2X4) {
+ __ vmovdqu(T1, Address(buf, (row*5 + 1) * 8), vector_len); //b1b2
+ __ vmovdqu(T2, Address(buf, (row*5 + 3) * 8), vector_len); //b3b4
+ __ vshufpd(T0, T1, T2, 0b0000, vector_len); // b1b3
+ __ vshufpd(T1, T1, T2, 0b1111, vector_len); // b2b4
+ __ vpxor(X1X3, X1X3, T0, vector_len); // A1A3
+ __ vpxor(X2X4, X2X4, T1, vector_len); // A2A4
+ };
+ // First Row
+ __ vpxor(A0_, A0_, Address(buf, 0 * 8), vector_len);
+ buf_even_odd(0, A1A3, A2A4);
+
+ {
+ __ cmpl(block_size, 72);
+ __ jcc(Assembler::notEqual, block104);
+ __ vmovdqu(T1, Address(buf, 5 * 8), vector_len); //b5b6 A5A15 A6A8 A7A9
+ __ vmovdqu(T2, Address(buf, 7 * 8), vector_len); //b7b8
+ __ vshufpd(T0, A5A15, A6A8, 0b0000, vector_len); //A5A6
+ __ vpxor(T0, T0, T1, vector_len); //A5A6
+ __ vshufpd(T1, A7A9, A6A8, 0b1010, vector_len); //A7A8
+ __ vpxor(T1, T1, T2, vector_len); //A7A8
+ __ vshufpd(A5A15, T0/*A5A6*/, A5A15, 0b1010, vector_len);
+ __ vshufpd(A6A8, T0/*A5A6*/, T1/*A7A8*/, 0b1111, vector_len);
+ __ vshufpd(A7A9, T1/*A7A8*/, A7A9, 0b1010, vector_len);
+ __ jmp(rounds24_loop);
+ }
+ __ BIND(block104);
+ {
+ __ cmpl(block_size, 104);
+ __ jcc(Assembler::notEqual, block136);
+ __ movq(T0, Address(buf, 5 * 8)); //b5
+ __ movq(T1, Address(buf, 10 * 8)); //b10
+ __ vshufpd(T0, T0, T1, 0b0000, vector_len); //b5b10
+ __ vshufpd(T1, A5A15, A10A20, 0b0000, vector_len); //A5A10
+ __ vpxor(T0, T0, T1, vector_len); //A5A10
+ __ vshufpd(A5A15, T0/*A5A10*/, A5A15, 0b1010, vector_len);
+ __ vshufpd(A10A20, T0/*A5A10*/, A10A20, 0b1111, vector_len);
+
+ __ vshufpd(T0, A11A13, A12A14, 0b0000, vector_len); //A11A12
+ __ vpxor(T0, T0, Address(buf, 11 * 8), vector_len); //b11b12
+ __ vshufpd(A11A13, T0/*A11A12*/, A11A13, 0b1010, vector_len);
+ __ vshufpd(A12A14, T0/*A11A12*/, A12A14, 0b1111, vector_len);
+ buf_even_odd(1, A6A8, A7A9);
+ __ jmp(rounds24_loop);
+ }
+ __ BIND(block136);
+ {
+ __ cmpl(block_size, 136);
+ __ jcc(Assembler::notEqual, block144);
+ __ movq(T0, Address(buf, 5 * 8)); //b5
+ __ movq(T1, Address(buf, 10 * 8)); //b10
+ __ vmovdqu(T2, Address(buf, 15 * 8), vector_len); //b15b16
+ __ vshufpd(T0, T0, T2, 0b0000, vector_len); //b5b15
+ __ vpxor(A5A15, A5A15, T0, vector_len);
+ __ vshufpd(T0, T1, T2, 0b1010, vector_len); //b10b16
+ __ vshufpd(T1, A10A20, A16A18, 0b0000, vector_len); //A10A16
+ __ vpxor(T1, T1, T0, vector_len);
+ __ vshufpd(A10A20, T1/*A10A16*/, A10A20, 0b1010, vector_len);
+ __ vshufpd(A16A18, T1/*A10A16*/, A16A18, 0b1111, vector_len);
+ buf_even_odd(1, A6A8, A7A9);
+ buf_even_odd(2, A11A13, A12A14);
+ __ jmp(rounds24_loop);
+ }
+ __ BIND(block144);
+ {
+ __ cmpl(block_size, 144);
+ __ jcc(Assembler::notEqual, block168);
+ __ movq(T0, Address(buf, 5 * 8)); //b5
+ __ movq(T1, Address(buf, 15 * 8)); //b15
+ __ vshufpd(T0, T0/*b5*/, T1/*b15*/, 0b0000, vector_len); //b5b15
+ __ vpxor(A5A15, A5A15, T0, vector_len);
+ buf_even_odd(1, A6A8, A7A9);
+ buf_even_odd(2, A11A13, A12A14);
+ __ movq(T0, Address(buf, 10 * 8)); //b10
+ __ vpxor(A10A20, T0, A10A20, vector_len);
+
+ __ vshufpd(T0, A16A18, A17A19, 0b0000, vector_len); //A16A17
+ __ vpxor(T0, T0, Address(buf, 16 * 8), vector_len); //b16b17
+ __ vshufpd(A16A18, T0/*A16A17*/, A16A18, 0b1010, vector_len);
+ __ vshufpd(A17A19, T0/*A16A17*/, A17A19, 0b1111, vector_len);
+ __ jmp(rounds24_loop);
+ }
+ __ BIND(block168);
+ {
+ __ movq(T0, Address(buf, 5 * 8)); //A5
+ __ movq(T1, Address(buf, 15 * 8)); //A15
+ __ vshufpd(T0, T0/*A5*/, T1/*A15*/, 0b0000, vector_len); //A5A15
+ __ vpxor(A5A15, A5A15, T0, vector_len);
+ buf_even_odd(1, A6A8, A7A9);
+ buf_even_odd(2, A11A13, A12A14);
+ buf_even_odd(3, A16A18, A17A19);
+ __ movq(T0, Address(buf, 10 * 8)); //A10
+ __ movq(T1, Address(buf, 20 * 8)); //A20
+ __ vshufpd(T0, T0/*A10*/, T1/*A20*/, 0b0000, vector_len); //A10A20
+ __ vpxor(A10A20, A10A20, T0, vector_len);
+ __ jmp(rounds24_loop);
+ }
+ __ BIND(multi_done);
+ __ movq(rax, offset); // return offset
+ } else {
+ __ xorq(rax, rax); // return 0
}
- for (int i = 0; i < 5; i++) {
- __ evmovdquq(Address(state1, i * 40), k5, xmm(10 + i), true, Assembler::AVX_512bit);
+
+ // Unshuffle
+ auto extractState = [=](int disp, XMMRegister src) {
+ int disp1 = disp;
+ int disp2 = disp+10;
+ __ pextrq(Address(state1, disp1 * 8), src, 0);
+ __ pextrq(Address(state1, disp2 * 8), src, 1);
+ if (parallelKeccak) {
+ __ vextracti128(src, src, 1);
+ __ pextrq(Address(state2, disp1 * 8), src, 0);
+ __ pextrq(Address(state2, disp2 * 8), src, 1);
+ }
+ };
+ auto storeState = [=](int disp, XMMRegister X1X3, XMMRegister X2X4){
+ XMMRegister X1X2 = T0;
+ XMMRegister X3X4 = T1;
+ int disp1 = disp;
+ int disp2 = disp+2;
+ __ vpunpcklqdq(X1X2, X1X3, X2X4, Assembler::AVX_256bit);
+ __ vpunpckhqdq(X3X4, X1X3, X2X4, Assembler::AVX_256bit);
+ __ vmovdqu(Address(state1, disp1 * 8), X1X2, Assembler::AVX_128bit);
+ __ vmovdqu(Address(state1, disp2 * 8), X3X4, Assembler::AVX_128bit);
+ if (parallelKeccak) {
+ __ vextracti128(Address(state2, disp1 * 8), X1X2, 1);
+ __ vextracti128(Address(state2, disp2 * 8), X3X4, 1);
+ }
+ };
+
+ __ pextrq(Address(state1, 0 * 8), A0_, 0);
+ if (parallelKeccak) {
+ __ vextracti128(A0_, A0_, 1);
+ __ pextrq(Address(state2, 0 * 8), A0_, 0);
+ }
+ storeState(1, A1A3, A2A4);
+ extractState(5, A5A15);
+ storeState(6, A6A8, A7A9);
+ extractState(10, A10A20);
+ storeState(11, A11A13, A12A14);
+ storeState(16, A16A18, A17A19);
+ storeState(21, A21A23, A22A24);
+
+ // Cleanup
+ // Zero out zmm0-zmm15.
+ __ vpxor(xmm0, xmm0, xmm0, vector_len);
+ __ vmovdqa(Address(rsp, 0), xmm0, vector_len);
+ __ vmovdqa(Address(rsp, reg_size), xmm0, vector_len);
+ __ vzeroall();
+
+ __ movq(rsp, rbp);
+ __ pop_ppx(rbp);
+ if (!parallelKeccak) {
+ __ pop_ppx(r12);
+ #ifdef _WIN64
+ __ pop_ppx(rdi);
+ #endif
}
__ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -516,13 +1152,25 @@ static address generate_double_keccak(StubGenerator *stubgen, MacroAssembler *_m
}
void StubGenerator::generate_sha3_stubs() {
+ bool avx512Available = VM_Version::supports_evex() && VM_Version::supports_avx512vlbw();
if (UseSHA3Intrinsics) {
- StubRoutines::_sha3_implCompress =
- generate_sha3_implCompress(StubId::stubgen_sha3_implCompress_id, this, _masm);
- StubRoutines::_double_keccak =
- generate_double_keccak(this, _masm);
- StubRoutines::_sha3_implCompressMB =
- generate_sha3_implCompress(StubId::stubgen_sha3_implCompressMB_id, this, _masm);
+ if (avx512Available) {
+ StubRoutines::_sha3_implCompress =
+ generate_sha3_implCompress_avx512(StubId::stubgen_sha3_implCompress_id, this, _masm);
+ StubRoutines::_sha3_implCompressMB =
+ generate_sha3_implCompress_avx512(StubId::stubgen_sha3_implCompressMB_id, this, _masm);
+ StubRoutines::_double_keccak =
+ generate_sha3_implCompress_avx512(StubId::stubgen_double_keccak_id, this, _masm);
+ StubRoutines::_quad_keccak =
+ generate_sha3_implCompress_avx512(StubId::stubgen_quad_keccak_id, this, _masm);
+ } else {
+ StubRoutines::_sha3_implCompress =
+ generate_sha3_implCompress_avx2(StubId::stubgen_sha3_implCompress_id, this, _masm);
+ StubRoutines::_double_keccak =
+ generate_sha3_implCompress_avx2(StubId::stubgen_double_keccak_id, this, _masm);
+ StubRoutines::_sha3_implCompressMB =
+ generate_sha3_implCompress_avx2(StubId::stubgen_sha3_implCompressMB_id, this, _masm);
+ }
}
}
@@ -532,7 +1180,8 @@ void StubGenerator::generate_sha3_stubs() {
void StubGenerator::init_AOTAddressTable_sha3(GrowableArray& external_addresses) {
#define ADD(addr) external_addresses.append((address)(addr));
ADD(round_constsAddr());
- ADD(permsAndRotsAddr());
+ ADD(avx2_round_constsAddr());
+ ADD(avx2_rotate_constsAddr());
#undef ADD
}
#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
index 47ef0aef2bb..2edd9706272 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -63,7 +63,7 @@
// if too small.
// Run with +PrintInterpreter to get the VM to print out the size.
// Max size with JVMTI
-int TemplateInterpreter::InterpreterCodeSize = JVMCI_ONLY(268) NOT_JVMCI(256) * 1024;
+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
// Global Register Names
static const Register rbcp = r13;
@@ -224,32 +224,6 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i
__ restore_bcp();
__ restore_locals();
const Register thread = r15_thread;
-#if INCLUDE_JVMCI
- // Check if we need to take lock at entry of synchronized method. This can
- // only occur on method entry so emit it only for vtos with step 0.
- if (EnableJVMCI && state == vtos && step == 0) {
- Label L;
- __ cmpb(Address(thread, JavaThread::pending_monitorenter_offset()), 0);
- __ jcc(Assembler::zero, L);
- // Clear flag.
- __ movb(Address(thread, JavaThread::pending_monitorenter_offset()), 0);
- // Satisfy calling convention for lock_method().
- __ get_method(rbx);
- // Take lock.
- lock_method();
- __ bind(L);
- } else {
-#ifdef ASSERT
- if (EnableJVMCI) {
- Label L;
- __ cmpb(Address(r15_thread, JavaThread::pending_monitorenter_offset()), 0);
- __ jcc(Assembler::zero, L);
- __ stop("unexpected pending monitor in deopt entry");
- __ bind(L);
- }
-#endif
- }
-#endif
// handle exceptions
{
Label L;
diff --git a/src/hotspot/cpu/x86/vmStructs_x86.hpp b/src/hotspot/cpu/x86/vmStructs_x86.hpp
index e0fcc7d375a..33b1f3c5e57 100644
--- a/src/hotspot/cpu/x86/vmStructs_x86.hpp
+++ b/src/hotspot/cpu/x86/vmStructs_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,8 +32,7 @@
#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field) \
volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \
static_field(VM_Version, _features, VM_Version::VM_Features) \
- nonstatic_field(VM_Version::VM_Features, _features_bitmap[0], uint64_t) \
- static_field(VM_Version::VM_Features, _features_bitmap_size, int)
+ nonstatic_field(VM_Version::VM_Features, _features_bitmap[0], uint64_t)
#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type) \
declare_toplevel_type(VM_Version::VM_Features)
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 80d88f2ecb8..4cdcb1770bb 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -33,6 +33,7 @@
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "runtime/globals_extension.hpp"
+#include "runtime/icache.hpp"
#include "runtime/java.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/stubCodeGenerator.hpp"
@@ -64,8 +65,6 @@ address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
static BufferBlob* stub_blob;
static const int stub_size = 2550;
-int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
-
VM_Version::VM_Features VM_Version::_features;
VM_Version::VM_Features VM_Version::_cpu_features;
@@ -80,20 +79,6 @@ static detect_virt_stub_t detect_virt_stub = nullptr;
static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
-bool VM_Version::supports_clflush() {
- // clflush should always be available on x86_64
- // if not we are in real trouble because we rely on it
- // to flush the code cache.
- // Unfortunately, Assembler::clflush is currently called as part
- // of generation of the code cache flush routine. This happens
- // under Universe::init before the processor features are set
- // up. Assembler::flush calls this routine to check that clflush
- // is allowed. So, we give the caller a free pass if Universe init
- // is still in progress.
- assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
- return true;
-}
-
#define CPUID_STANDARD_FN 0x0
#define CPUID_STANDARD_FN_1 0x1
#define CPUID_STANDARD_FN_4 0x4
@@ -511,7 +496,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// and check upper YMM/ZMM bits after it.
//
int saved_useavx = UseAVX;
- int saved_usesse = UseSSE;
// If UseAVX is uninitialized or is set by the user to include EVEX
if (use_evex) {
@@ -542,7 +526,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// EVEX setup: run in lowest evex mode
VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
UseAVX = 3;
- UseSSE = 2;
#ifdef _WINDOWS
// xmm5-xmm15 are not preserved by caller on windows
// https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
@@ -569,7 +552,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// AVX setup
VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
UseAVX = 1;
- UseSSE = 2;
#ifdef _WINDOWS
__ subptr(rsp, 32);
__ vmovdqu(Address(rsp, 0), xmm7);
@@ -623,7 +605,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// EVEX check: run in lowest evex mode
VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
UseAVX = 3;
- UseSSE = 2;
__ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
__ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
__ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
@@ -641,7 +622,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
generate_vzeroupper(wrapup);
VM_Version::clean_cpuFeatures();
UseAVX = saved_useavx;
- UseSSE = saved_usesse;
__ jmp(wrapup);
}
@@ -649,7 +629,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// AVX check
VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
UseAVX = 1;
- UseSSE = 2;
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
__ vmovdqu(Address(rsi, 0), xmm0);
__ vmovdqu(Address(rsi, 32), xmm7);
@@ -668,7 +647,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
generate_vzeroupper(wrapup);
VM_Version::clean_cpuFeatures();
UseAVX = saved_useavx;
- UseSSE = saved_usesse;
__ bind(wrapup);
__ popf();
@@ -905,25 +883,6 @@ void VM_Version::get_processor_features() {
_supports_atomic_getset8 = true;
_supports_atomic_getadd8 = true;
- // OS should support SSE for x64 and hardware should support at least SSE2.
- if (!VM_Version::supports_sse2()) {
- vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
- }
- // in 64 bit the use of SSE2 is the minimum
- if (UseSSE < 2) UseSSE = 2;
-
- // flush_icache_stub have to be generated first.
- // That is why Icache line size is hard coded in ICache class,
- // see icache_x86.hpp. It is also the reason why we can't use
- // clflush instruction in 32-bit VM since it could be running
- // on CPU which does not support it.
- //
- // The only thing we can do is to verify that flushed
- // ICache::line_size has correct value.
- guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
- // clflush_size is size in quadwords (8 bytes).
- guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
-
// assigning this field effectively enables Unsafe.writebackMemory()
// by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
// that is only implemented on x86_64 and only if the OS plays ball
@@ -952,12 +911,6 @@ void VM_Version::get_processor_features() {
clear_feature(CPU_SSE4A);
}
- if (UseSSE < 2)
- clear_feature(CPU_SSE2);
-
- if (UseSSE < 1)
- clear_feature(CPU_SSE);
-
// ZX cpus specific settings
if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
if (cpu_family() == 7) {
@@ -972,21 +925,13 @@ void VM_Version::get_processor_features() {
}
// UseSSE is set to the smaller of what hardware supports and what
- // the command line requires. I.e., you cannot set UseSSE to 2 on
- // older Pentiums which do not support it.
- int use_sse_limit = 0;
- if (UseSSE > 0) {
- if (UseSSE > 3 && supports_sse4_1()) {
- use_sse_limit = 4;
- } else if (UseSSE > 2 && supports_sse3()) {
- use_sse_limit = 3;
- } else if (UseSSE > 1 && supports_sse2()) {
- use_sse_limit = 2;
- } else if (UseSSE > 0 && supports_sse()) {
- use_sse_limit = 1;
- } else {
- use_sse_limit = 0;
- }
+ // the command line requires. i.e., you cannot set UseSSE to 4 on
+ // older systems which do not support it.
+ int use_sse_limit = 2;
+ if (UseSSE > 3 && supports_sse4_1()) {
+ use_sse_limit = 4;
+ } else if (UseSSE > 2 && supports_sse3()) {
+ use_sse_limit = 3;
}
if (FLAG_IS_DEFAULT(UseSSE)) {
FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
@@ -1150,7 +1095,6 @@ void VM_Version::get_processor_features() {
_has_intel_jcc_erratum = IntelJccErratumMitigation;
}
- assert(supports_clflush(), "Always present");
if (X86ICacheSync == -1) {
// Auto-detect, choosing the best performant one that still flushes
// the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
@@ -1379,7 +1323,8 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
- if (UseSHA && supports_evex() && supports_avx512bw()) {
+ if (UseSHA && ((supports_evex() && supports_avx512vlbw()) ||
+ (EnableX86ECoreOpts && !supports_hybrid()))) {
if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
}
@@ -1390,7 +1335,7 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
int max_vector_size = 0;
if (UseAVX == 0 || !os_supports_avx_vectors()) {
// 16 byte vectors (in XMM) are supported with SSE2+
@@ -1423,7 +1368,7 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
}
-#if defined(COMPILER2) && defined(ASSERT)
+#ifdef ASSERT
if (MaxVectorSize > 0) {
if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
tty->print_cr("State of YMM registers after signal handle:");
@@ -1438,7 +1383,7 @@ void VM_Version::get_processor_features() {
}
}
}
-#endif // COMPILER2 && ASSERT
+#endif // ASSERT
if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
@@ -1477,7 +1422,7 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
UseMontgomerySquareIntrinsic = true;
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
// On new cpus instructions which update whole XMM register should be used
// to prevent partial register stall due to dependencies on high half.
@@ -1535,7 +1480,7 @@ void VM_Version::get_processor_features() {
}
if (is_amd_family()) { // AMD cpus specific settings
- if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
+ if (FLAG_IS_DEFAULT(UseAddressNop)) {
// Use it on new AMD cpus starting from Opteron.
UseAddressNop = true;
}
@@ -1578,7 +1523,7 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
- if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+ if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
}
@@ -1594,7 +1539,7 @@ void VM_Version::get_processor_features() {
if (cpu_family() >= 0x17) {
// On family >=17h processors use XMM and UnalignedLoadStores
// for Array Copy
- if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+ if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
#ifdef COMPILER2
@@ -1796,8 +1741,6 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
- } else if (!supports_sse() && supports_3dnow_prefetch()) {
- FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
}
@@ -1927,14 +1870,17 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
}
- // CopyAVX3Threshold is the threshold at which 64-byte instructions are used
- // for implementing the array copy and clear operations.
- // The Intel platforms that supports the serialize instruction
- // have improved implementation of 64-byte load/stores and so the default
- // threshold is set to 0 for these platforms.
+ // CopyAVX3Threshold is the threshold at which 64-byte vector instructions
+ // are used for implementing the array copy, fill and clear operations.
+ // The Intel platforms that support the serialize instruction and the AMD
+ // platforms with native 512-bit datapath have improved implementation of
+ // 64-byte load/stores and so the default threshold is set to 0 for these
+ // platforms.
if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
if (is_intel() && is_intel_server_family() && supports_serialize()) {
FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
+ } else if (is_amd() && is_amd_avx512_datapath_server_family()) {
+ FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
} else {
FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
}
@@ -2889,29 +2835,27 @@ int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
VM_Features vm_features;
+
+ // check the features that must be present
+ guarantee(std_cpuid1_edx.bits.sse2 != 0, "sse2 is not supported");
+ guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
+ // clflush_size is size in quadwords (8 bytes).
+ guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == ICache::line_size/8, "clflush size is not supported");
+
+ // sse and sse2 are guaranteed to be present
+ vm_features.set_feature(CPU_SSE);
+ vm_features.set_feature(CPU_SSE2);
+
if (std_cpuid1_edx.bits.cmpxchg8 != 0)
vm_features.set_feature(CPU_CX8);
if (std_cpuid1_edx.bits.cmov != 0)
vm_features.set_feature(CPU_CMOV);
- if (std_cpuid1_edx.bits.clflush != 0)
- vm_features.set_feature(CPU_FLUSH);
- // clflush should always be available on x86_64
- // if not we are in real trouble because we rely on it
- // to flush the code cache.
- assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
ext_cpuid1_edx.bits.fxsr != 0))
vm_features.set_feature(CPU_FXSR);
// HT flag is set for multi-core processors also.
if (threads_per_core() > 1)
vm_features.set_feature(CPU_HT);
- if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
- ext_cpuid1_edx.bits.mmx != 0))
- vm_features.set_feature(CPU_MMX);
- if (std_cpuid1_edx.bits.sse != 0)
- vm_features.set_feature(CPU_SSE);
- if (std_cpuid1_edx.bits.sse2 != 0)
- vm_features.set_feature(CPU_SSE2);
if (std_cpuid1_ecx.bits.sse3 != 0)
vm_features.set_feature(CPU_SSE3);
if (std_cpuid1_ecx.bits.ssse3 != 0)
@@ -3243,17 +3187,9 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
// It will be used only when AllocatePrefetchStyle > 0
if (is_amd_family()) { // AMD | Hygon
- if (supports_sse2()) {
- return 256; // Opteron
- } else {
- return 128; // Athlon
- }
+ return 256; // Opteron
} else if (is_zx()) {
- if (supports_sse2()) {
- return 256;
- } else {
- return 128;
- }
+ return 256;
} else { // Intel
if (supports_sse3() && is_intel_server_family()) {
if (is_intel_modern_cpu()) { // Nehalem based cpus
@@ -3262,14 +3198,10 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
return 384;
}
}
- if (supports_sse2()) {
- if (is_intel_server_family()) {
- return 256; // Pentium M, Core, Core2
- } else {
- return 512; // Pentium 4
- }
+ if (is_intel_server_family()) {
+ return 256; // Pentium M, Core, Core2
} else {
- return 128; // Pentium 3 (and all other old CPUs)
+ return 512; // Pentium 4
}
}
}
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index fe6d424f50c..2fb1af71a10 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -34,7 +34,6 @@ class stringStream;
class VM_Version : public Abstract_VM_Version {
friend class VMStructs;
- friend class JVMCIVMStructs;
public:
// cpuid result register layouts. These are all unions of a uint32_t
@@ -373,7 +372,6 @@ protected:
/*
* Update following files when declaring new flags:
* test/lib-test/jdk/test/whitebox/CPUInfoTest.java
- * src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java
*/
enum Feature_Flag {
#define CPU_FEATURE_FLAGS(decl) \
@@ -381,58 +379,45 @@ protected:
decl(CMOV, cmov ) \
decl(FXSR, fxsr ) \
decl(HT, ht ) \
- \
- decl(MMX, mmx ) \
decl(3DNOW_PREFETCH, 3dnowpref ) /* Processor supports 3dnow prefetch and prefetchw instructions */ \
/* may not necessarily support other 3dnow instructions */ \
decl(SSE, sse ) \
decl(SSE2, sse2 ) \
- \
decl(SSE3, sse3 ) /* SSE3 comes from cpuid 1 (ECX) */ \
decl(SSSE3, ssse3 ) \
decl(SSE4A, sse4a ) \
decl(SSE4_1, sse4.1 ) \
- \
decl(SSE4_2, sse4.2 ) \
decl(POPCNT, popcnt ) \
decl(LZCNT, lzcnt ) \
decl(TSC, tsc ) \
- \
decl(TSCINV_BIT, tscinvbit ) \
decl(TSCINV, tscinv ) \
decl(AVX, avx ) \
decl(AVX2, avx2 ) \
- \
decl(AES, aes ) \
decl(ERMS, erms ) /* enhanced 'rep movsb/stosb' instructions */ \
decl(CLMUL, clmul ) /* carryless multiply for CRC */ \
decl(BMI1, bmi1 ) \
- \
decl(BMI2, bmi2 ) \
decl(RTM, rtm ) /* Restricted Transactional Memory instructions */ \
decl(ADX, adx ) \
decl(AVX512F, avx512f ) /* AVX 512bit foundation instructions */ \
- \
decl(AVX512DQ, avx512dq ) \
decl(AVX512PF, avx512pf ) \
decl(AVX512ER, avx512er ) \
decl(AVX512CD, avx512cd ) \
- \
decl(AVX512BW, avx512bw ) /* Byte and word vector instructions */ \
decl(AVX512VL, avx512vl ) /* EVEX instructions with smaller vector length */ \
decl(SHA, sha ) /* SHA instructions */ \
decl(FMA, fma ) /* FMA instructions */ \
- \
decl(VZEROUPPER, vzeroupper ) /* Vzeroupper instruction */ \
decl(AVX512_VPOPCNTDQ, avx512_vpopcntdq ) /* Vector popcount */ \
decl(AVX512_VPCLMULQDQ, avx512_vpclmulqdq ) /* Vector carryless multiplication */ \
decl(AVX512_VAES, avx512_vaes ) /* Vector AES instruction */ \
- \
decl(AVX512_VNNI, avx512_vnni ) /* Vector Neural Network Instructions */ \
- decl(FLUSH, clflush ) /* flush instruction */ \
decl(FLUSHOPT, clflushopt ) /* flusopth instruction */ \
decl(CLWB, clwb ) /* clwb instruction */ \
- \
decl(AVX512_VBMI2, avx512_vbmi2 ) /* VBMI2 shift left double instructions */ \
decl(AVX512_VBMI, avx512_vbmi ) /* Vector BMI instructions */ \
decl(HV, hv ) /* Hypervisor instructions */ \
@@ -464,7 +449,6 @@ protected:
class VM_Features {
friend class VMStructs;
- friend class JVMCIVMStructs;
private:
uint64_t _features_bitmap[(MAX_CPU_FEATURES / BitsPerLong) + 1];
@@ -494,7 +478,6 @@ protected:
return (1ULL << (feature & features_bitmap_element_mask()));
}
- static int _features_bitmap_size; // for JVMCI purposes
public:
VM_Features() {
for (int i = 0; i < features_bitmap_element_count(); i++) {
@@ -790,16 +773,12 @@ public:
VM_Version::clear_cpu_features();
}
static void set_avx_cpuFeatures() {
- _features.set_feature(CPU_SSE);
- _features.set_feature(CPU_SSE2);
_features.set_feature(CPU_AVX);
_features.set_feature(CPU_VZEROUPPER);
}
static void set_evex_cpuFeatures() {
_features.set_feature(CPU_AVX10_1);
_features.set_feature(CPU_AVX512F);
- _features.set_feature(CPU_SSE);
- _features.set_feature(CPU_SSE2);
_features.set_feature(CPU_VZEROUPPER);
}
static void set_apx_cpuFeatures() {
@@ -841,6 +820,7 @@ public:
static bool is_P6() { return cpu_family() >= 6; }
static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 18 || cpu_family() == 19; }
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
+ static bool is_amd_avx512_datapath_server_family() { return cpu_family() >= 0x1a; }
static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
static bool is_amd_family() { return is_amd() || is_hygon(); }
static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
@@ -869,9 +849,6 @@ public:
static bool supports_cmov() { return _features.supports_feature(CPU_CMOV); }
static bool supports_fxsr() { return _features.supports_feature(CPU_FXSR); }
static bool supports_ht() { return _features.supports_feature(CPU_HT); }
- static bool supports_mmx() { return _features.supports_feature(CPU_MMX); }
- static bool supports_sse() { return _features.supports_feature(CPU_SSE); }
- static bool supports_sse2() { return _features.supports_feature(CPU_SSE2); }
static bool supports_sse3() { return _features.supports_feature(CPU_SSE3); }
static bool supports_ssse3() { return _features.supports_feature(CPU_SSSE3); }
static bool supports_sse4_1() { return _features.supports_feature(CPU_SSE4_1); }
@@ -1010,10 +987,10 @@ public:
static int allocate_prefetch_distance(bool use_watermark_prefetch);
- // SSE2 and later processors implement a 'pause' instruction
- // that can be used for efficient implementation of
- // the intrinsic for java.lang.Thread.onSpinWait()
- static bool supports_on_spin_wait() { return supports_sse2(); }
+ // All currently supported processors support PAUSE instruction
+ // that can be used for efficient implementation of intrinsic for
+ // java.lang.Thread.onSpinWait().
+ static bool supports_on_spin_wait() { return true; }
// x86_64 supports fast class initialization checks
static bool supports_fast_class_init_checks() {
@@ -1046,7 +1023,6 @@ public:
// pending in-cache changes.
//
// 64 bit cpus always support clflush which writes back and evicts
- // on 32 bit cpus support is recorded via a feature flag
//
// clflushopt is optional and acts like clflush except it does
// not synchronize with other memory ops. it needs a preceding
@@ -1057,8 +1033,6 @@ public:
// synchronize with other memory ops. so, it needs preceding
// and trailing StoreStore fences.
- static bool supports_clflush(); // Can't inline due to header file conflict
-
// Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit
static bool supports_clflushopt() { return (_features.supports_feature(CPU_FLUSHOPT)); }
static bool supports_clwb() { return (_features.supports_feature(CPU_CLWB)); }
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index f99d1ea9d48..ab39692b44b 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1742,14 +1742,10 @@ static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
// ja -> b # a
// jp -> NaN # NaN
// jb -> a # b
-// je #
-// |-jz -> a | b # a & b
-// | -> a #
+// je -> a | b # a & b
static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
- XMMRegister a, XMMRegister b,
- XMMRegister xmmt, Register rt,
+ XMMRegister a, XMMRegister b, Register rt,
bool min, enum FP_PREC pt) {
-
Label nan, zero, below, above, done;
emit_fp_ucom(masm, pt, a, b);
@@ -1759,31 +1755,26 @@ static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
} else {
__ jccb(Assembler::above, done);
}
-
__ jccb(Assembler::parity, nan); // PF=1
__ jccb(Assembler::below, below); // CF=1
// equal
- __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
- emit_fp_ucom(masm, pt, a, xmmt);
-
- __ jccb(Assembler::equal, zero);
- movfp(masm, pt, dst, a, rt);
-
- __ jmp(done);
-
- __ bind(zero);
+ // Using bitwise operations is a low cost way to compute the correct result
+ // for zero and non-zero inputs in this scenario except for NaN, which is
+ // handled separately. The mantissa and exponent are valid with either
+ // bitwise operation. For zero inputs, the sign bit is chosen according to
+ // whether a minimum or maximum value is required.
if (min) {
+ // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
__ vpor(dst, a, b, Assembler::AVX_128bit);
} else {
+ // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
__ vpand(dst, a, b, Assembler::AVX_128bit);
}
-
__ jmp(done);
__ bind(above);
movfp(masm, pt, dst, min ? b : a, rt);
-
__ jmp(done);
__ bind(nan);
@@ -4950,7 +4941,7 @@ operand immN0() %{
operand immP31()
%{
- predicate(n->as_Type()->type()->reloc() == relocInfo::none
+ predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
&& (n->get_ptr() >> 31) == 0);
match(ConP);
@@ -7376,18 +7367,18 @@ instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
ins_pipe( pipe_slow );
%}
-instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
+instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
%{
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
- effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
+ effect(USE a, USE b, TEMP rtmp, KILL cr);
- format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinF) ? true : false;
- emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
+ emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
@@ -7412,18 +7403,18 @@ instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atm
ins_pipe( pipe_slow );
%}
-instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
+instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
- effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
+ effect(USE a, USE b, TEMP rtmp, KILL cr);
- format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
+ format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinF) ? true : false;
- emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
+ emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
@@ -7445,18 +7436,18 @@ instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
ins_pipe( pipe_slow );
%}
-instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
+instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
%{
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
- effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
+ effect(USE a, USE b, TEMP rtmp, KILL cr);
- format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinD) ? true : false;
- emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
+ emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );
@@ -7481,18 +7472,18 @@ instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atm
ins_pipe( pipe_slow );
%}
-instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
+instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
- effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
+ effect(USE a, USE b, TEMP rtmp, KILL cr);
- format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinD) ? true : false;
- emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
+ emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );
@@ -16337,7 +16328,7 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
// and raw pointers have no anti-dependencies.
instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
%{
- predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
+ predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
n->in(2)->as_Load()->barrier_data() == 0);
match(Set cr (CmpP op1 (LoadP op2)));
diff --git a/src/hotspot/os/bsd/osThread_bsd.cpp b/src/hotspot/os/bsd/osThread_bsd.cpp
index db476e529ac..5ef0423af13 100644
--- a/src/hotspot/os/bsd/osThread_bsd.cpp
+++ b/src/hotspot/os/bsd/osThread_bsd.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,13 +29,7 @@
#include
OSThread::OSThread()
- : _thread_id(
-#ifdef __APPLE__
- 0
-#else
- nullptr
-#endif
- ),
+ : _thread_id(0),
_pthread_id(nullptr),
_unique_thread_id(0),
_caller_sigmask(),
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index b46cc644393..2d1540ea641 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -102,6 +102,14 @@
#include
#endif
+#ifdef __FreeBSD__
+ #include
+#endif
+
+#ifdef __NetBSD__
+#include
+#endif
+
#ifdef __APPLE__
#include
#include
@@ -873,23 +881,20 @@ pid_t os::Bsd::gettid() {
mach_port_deallocate(mach_task_self(), port);
return (pid_t)port;
+#elif defined(__FreeBSD__)
+ return ::pthread_getthreadid_np();
+#elif defined(__OpenBSD__)
+ retval = getthrid();
+#elif defined(__NetBSD__)
+ retval = (pid_t) _lwp_self();
#else
- #ifdef __FreeBSD__
- retval = syscall(SYS_thr_self);
- #else
- #ifdef __OpenBSD__
- retval = syscall(SYS_getthrid);
- #else
- #ifdef __NetBSD__
- retval = (pid_t) syscall(SYS__lwp_self);
- #endif
- #endif
- #endif
+#error "unsupported OS"
#endif
if (retval == -1) {
return getpid();
}
+ return retval;
}
// Returns the uid of a process or -1 on error.
@@ -1123,7 +1128,7 @@ bool os::dll_address_to_library_name(address addr, char* buf,
// in case of error it checks if .dll/.so was built for the
// same architecture as Hotspot is running on
-void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebuflen) {
+static void *dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
bool ieee_handling = IEEE_subnormal_handling_OK();
if (!ieee_handling) {
Events::log_dll_message(nullptr, "IEEE subnormal handling check failed before loading %s", filename);
@@ -1207,7 +1212,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
log_info(os)("attempting shared library load of %s", filename);
- return os::Bsd::dlopen_helper(filename, RTLD_LAZY, ebuf, ebuflen);
+ return dlopen_helper(filename, ebuf, ebuflen);
}
#else
void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
@@ -1218,7 +1223,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
log_info(os)("attempting shared library load of %s", filename);
void* result;
- result = os::Bsd::dlopen_helper(filename, RTLD_LAZY, ebuf, ebuflen);
+ result = dlopen_helper(filename, ebuf, ebuflen);
if (result != nullptr) {
return result;
}
diff --git a/src/hotspot/os/bsd/os_bsd.hpp b/src/hotspot/os/bsd/os_bsd.hpp
index e87a680b2d2..91fcb090f50 100644
--- a/src/hotspot/os/bsd/os_bsd.hpp
+++ b/src/hotspot/os/bsd/os_bsd.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -76,8 +76,6 @@ class os::Bsd {
// Real-time clock functions
static void clock_init(void);
- static void *dlopen_helper(const char *path, int mode, char *ebuf, int ebuflen);
-
// Stack repair handling
// none present
@@ -105,6 +103,7 @@ class os::Bsd {
static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
+
public:
static int sched_getcpu() { return _sched_getcpu != nullptr ? _sched_getcpu() : -1; }
static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
diff --git a/src/hotspot/os/linux/gc/z/zNUMA_linux.cpp b/src/hotspot/os/linux/gc/z/zNUMA_linux.cpp
index ab7498b313c..3245d07caf9 100644
--- a/src/hotspot/os/linux/gc/z/zNUMA_linux.cpp
+++ b/src/hotspot/os/linux/gc/z/zNUMA_linux.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,41 +29,126 @@
#include "os_linux.hpp"
#include "runtime/globals.hpp"
#include "runtime/globals_extension.hpp"
+#include "runtime/java.hpp"
#include "runtime/os.hpp"
#include "utilities/debug.hpp"
+#include "utilities/integerCast.hpp"
-static uint* z_numa_id_to_node = nullptr;
-static uint32_t* z_node_to_numa_id = nullptr;
+// Converts between ZGC NUMA ids and Linux NUMA node ids.
+//
+// A ZGC NUMA id is a dense zero-based index over the NUMA nodes that ZGC can
+// allocate from. For example, with two available NUMA nodes, ids 0 and 1 are
+// tracked.
+//
+// A Linux NUMA node id is the number used by native Linux NUMA APIs. These node
+// ids usually reflect the hardware configuration, can be sparse, and do not
+// have to start at 0.
+class ZNUMAConverter {
+private:
+ bool _initialized = false;
+
+ uint* _id_to_node = nullptr;
+ uint32_t _id_to_node_size = 0;
+
+ uint32_t* _node_to_id = nullptr;
+ size_t _node_to_id_size = 0;
+
+ void populate_id_mappings() {
+ const int configured_nodes_limit = os::Linux::numa_num_configured_nodes();
+ assert(configured_nodes_limit > 0, "Invalid number of configured NUMA nodes: %d", configured_nodes_limit);
+
+ if (configured_nodes_limit <= 0) {
+ vm_exit_during_initialization("Cannot determine number of available NUMA nodes. Run without NUMA using -XX:-UseNUMA");
+ }
+
+ // Allocate and populate mapping array (id -> node)
+ _id_to_node = NEW_C_HEAP_ARRAY(uint, (size_t)configured_nodes_limit, mtGC);
+ const size_t available_nodes = os::numa_get_leaf_groups(_id_to_node, (size_t)configured_nodes_limit);
+
+ assert(available_nodes <= (size_t)configured_nodes_limit,
+ "Too many NUMA nodes: %zu <= %d", available_nodes, configured_nodes_limit);
+
+ _id_to_node_size = integer_cast(MIN2(available_nodes, (size_t)configured_nodes_limit));
+ }
+
+ void populate_node_mappings() {
+ assert(_id_to_node != nullptr, "id-to-node mapping must be populated first");
+
+ const int max_node = os::Linux::numa_max_node();
+ assert(max_node >= 0, "Invalid highest NUMA node: %d", max_node);
+
+ if (max_node < 0) {
+ vm_exit_during_initialization("Cannot determine the NUMA max node. Run without NUMA using -XX:-UseNUMA");
+ }
+
+ _node_to_id_size = (size_t)max_node + 1;
+
+ // Allocate mapping array (node -> id)
+ _node_to_id = NEW_C_HEAP_ARRAY(uint32_t, _node_to_id_size, mtGC);
+
+ // Fill the array with invalid ids
+ for (size_t i = 0; i < _node_to_id_size; i++) {
+ _node_to_id[i] = (uint32_t)-1;
+ }
+
+ // Fill the reverse mappings
+ for (uint32_t i = 0; i < _id_to_node_size; i++) {
+ const uint node = _id_to_node[i];
+ assert(node < _node_to_id_size, "NUMA node is out of bounds node=%u, max=%zu", node, _node_to_id_size);
+ _node_to_id[node] = i;
+ }
+ }
+
+public:
+ void initialize() {
+ precond(!_initialized);
+ precond(UseNUMA);
+
+ populate_id_mappings();
+ populate_node_mappings();
+
+ _initialized = true;
+ }
+
+ uint32_t count() const {
+ precond(_initialized);
+ return _id_to_node_size;
+ }
+
+ uint32_t node_to_id(int node) const {
+ precond(_initialized);
+ assert(node >= 0, "Invalid NUMA node: %d", node);
+ assert((size_t)node < _node_to_id_size, "NUMA node is out of bounds node=%d, max=%zu", node, _node_to_id_size);
+
+ if (node < 0 || (size_t)node >= _node_to_id_size) {
+ return (uint32_t)-1;
+ }
+
+ return _node_to_id[node];
+ }
+
+ int id_to_node(uint32_t id) {
+ precond(_initialized);
+ assert(id < count(), "NUMA id out of range 0 <= %ud <= %ud", id, count());
+
+ return (int)_id_to_node[id];
+ }
+};
+
+static ZNUMAConverter z_numa_converter;
void ZNUMA::pd_initialize() {
_enabled = UseNUMA;
- size_t configured_nodes = 0;
-
if (UseNUMA) {
- const size_t max_nodes = os::Linux::numa_num_configured_nodes();
- z_numa_id_to_node = NEW_C_HEAP_ARRAY(uint, max_nodes, mtGC);
- configured_nodes = os::numa_get_leaf_groups(z_numa_id_to_node, 0);
-
- z_node_to_numa_id = NEW_C_HEAP_ARRAY(uint32_t, max_nodes, mtGC);
-
- // Fill the array with invalid NUMA ids
- for (uint32_t i = 0; i < max_nodes; i++) {
- z_node_to_numa_id[i] = (uint32_t)-1;
- }
-
- // Fill the reverse mappings
- for (uint32_t i = 0; i < configured_nodes; i++) {
- z_node_to_numa_id[z_numa_id_to_node[i]] = i;
- }
+ z_numa_converter.initialize();
+ _count = z_numa_converter.count();
+ } else {
+ // UseNUMA and is_faked() are mutually excluded in zArguments.cpp.
+ _count = !FLAG_IS_DEFAULT(ZFakeNUMA)
+ ? ZFakeNUMA
+ : 1; // No NUMA nodes
}
-
- // UseNUMA and is_faked() are mutually excluded in zArguments.cpp.
- _count = UseNUMA
- ? configured_nodes
- : !FLAG_IS_DEFAULT(ZFakeNUMA)
- ? ZFakeNUMA
- : 1; // No NUMA nodes
}
uint32_t ZNUMA::id() {
@@ -77,7 +162,9 @@ uint32_t ZNUMA::id() {
return 0;
}
- return z_node_to_numa_id[os::Linux::get_node_by_cpu(ZCPU::id())];
+ const uint32_t id = z_numa_converter.node_to_id(os::Linux::get_node_by_cpu(ZCPU::id()));
+ assert(id != (uint32_t)-1, "Unknown NUMA node");
+ return id;
}
uint32_t ZNUMA::memory_id(uintptr_t addr) {
@@ -93,14 +180,9 @@ uint32_t ZNUMA::memory_id(uintptr_t addr) {
fatal("Failed to get NUMA id for memory at " PTR_FORMAT " (%s)", addr, err.to_string());
}
- DEBUG_ONLY(const int max_nodes = os::Linux::numa_num_configured_nodes();)
- assert(node < max_nodes, "NUMA node is out of bounds node=%d, max=%d", node, max_nodes);
-
- return z_node_to_numa_id[node];
+ return z_numa_converter.node_to_id(node);
}
int ZNUMA::numa_id_to_node(uint32_t numa_id) {
- assert(numa_id < _count, "NUMA id out of range 0 <= %ud <= %ud", numa_id, _count);
-
- return (int)z_numa_id_to_node[numa_id];
+ return z_numa_converter.id_to_node(numa_id);
}
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index 6927f5108ac..ba48f2b4efc 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -3310,7 +3310,10 @@ size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
void os::Linux::build_numa_affinity_masks() {
// We only build the affinity masks if running libnuma v2 (_numa_node_to_cpus_v2
// is available) and we have the affinity mask of the process when it started.
- if (_numa_node_to_cpus_v2 == nullptr || _numa_all_cpus_ptr == nullptr) {
+ if (_numa_node_to_cpus_v2 == nullptr ||
+ _numa_all_cpus_ptr == nullptr ||
+ _numa_allocate_cpumask == nullptr ||
+ nindex_to_node() == nullptr) {
return;
}
@@ -3320,16 +3323,24 @@ void os::Linux::build_numa_affinity_masks() {
// the following NUMA setup:
// NUMA 0: CPUs 0-3, NUMA 1: CPUs 4-7
// We expect to get the following affinity masks:
- // Affinity masks: idx 0 = (0, 1), idx 1 = (4, 5)
+ // Affinity masks: node 0 = (0, 1), node 1 = (4, 5)
+ //
+ // The array is indexed by OS NUMA node id because node ids can be sparse
+
+ const int highest_node_number = Linux::numa_max_node();
+ if (highest_node_number < 0) {
+ return;
+ }
- const int num_nodes = get_existing_num_nodes();
const unsigned num_cpus = (unsigned)os::processor_count();
- for (int i = 0; i < num_nodes; i++) {
+ _numa_affinity_masks->at_grow(highest_node_number, nullptr);
+
+ for (int node : *nindex_to_node()) {
struct bitmask* affinity_mask = _numa_allocate_cpumask();
- // Fill the affinity mask with all CPUs belonging to NUMA node i
- _numa_node_to_cpus_v2(i, affinity_mask);
+ // Fill the affinity mask with all CPUs belonging to the OS NUMA node id.
+ _numa_node_to_cpus_v2(node, affinity_mask);
// Clear the bits of all CPUs that the process is not allowed to
// execute tasks on
@@ -3339,7 +3350,7 @@ void os::Linux::build_numa_affinity_masks() {
}
}
- _numa_affinity_masks->push(affinity_mask);
+ _numa_affinity_masks->at_put(node, affinity_mask);
}
}
@@ -3463,6 +3474,7 @@ void os::Linux::numa_set_thread_affinity(pid_t tid, int node) {
// is available) and we have all affinity mask
if (_numa_sched_setaffinity == nullptr ||
_numa_all_cpus_ptr == nullptr ||
+ _numa_affinity_masks == nullptr ||
_numa_affinity_masks->is_empty()) {
return;
}
@@ -3472,8 +3484,13 @@ void os::Linux::numa_set_thread_affinity(pid_t tid, int node) {
// of the thread when the VM was started
_numa_sched_setaffinity(tid, _numa_all_cpus_ptr);
} else {
- // Normal case, set the affinity to the corresponding affinity mask
- _numa_sched_setaffinity(tid, _numa_affinity_masks->at(node));
+ // Normal case, set the affinity to the corresponding OS NUMA node id mask.
+ if (node >= 0 && node < _numa_affinity_masks->length()) {
+ struct bitmask* const affinity_mask = _numa_affinity_masks->at(node);
+ if (affinity_mask != nullptr) {
+ _numa_sched_setaffinity(tid, affinity_mask);
+ }
+ }
}
}
diff --git a/src/hotspot/os/windows/os_perf_windows.cpp b/src/hotspot/os/windows/os_perf_windows.cpp
index 59ea83b9148..d083c72c2e0 100644
--- a/src/hotspot/os/windows/os_perf_windows.cpp
+++ b/src/hotspot/os/windows/os_perf_windows.cpp
@@ -779,6 +779,114 @@ static OSReturn allocate_pdh_constants() {
return OS_OK;
}
+// Look up the PDH index by reading the English (locale 009) counter name
+// registry. See KB Q287159: Using PDH APIs Correctly in a Localized Language
+// for details.
+static OSReturn lookup_perf_index_by_english_name(const char* english_name,
+ DWORD* result) {
+ ResourceMark rm;
+
+ DWORD type = 0;
+ DWORD size = 0;
+
+ // Determine the required buffer size
+ if (RegQueryValueEx(HKEY_PERFORMANCE_DATA, "Counter 009",
+ nullptr, &type, nullptr, &size) != ERROR_SUCCESS) {
+ return OS_ERR;
+ }
+
+ // Since registry entries in `HKEY_PERFORMANCE_DATA` are generated on the fly,
+ // they could change between calls, so we can't rely just on the size returned
+ // by the first call. Instead, Microsoft's documentation suggests running
+ // these calls in a loop until the return code is no longer `ERROR_MORE_DATA`.
+
+ char* buffer;
+ do {
+ if (size == 0) {
+ return OS_ERR;
+ }
+
+ // When `RegQueryValueEx()` returns `ERROR_MORE_DATA`, the value in the
+ // callback argument is undefined, so we need to create a new variable whose
+ // address is passed as the callback size argument.
+ buffer = NEW_RESOURCE_ARRAY(char, size);
+
+ DWORD cb_size = size;
+ LSTATUS status = RegQueryValueEx(HKEY_PERFORMANCE_DATA, "Counter 009",
+ nullptr, &type, (LPBYTE)buffer,
+ &cb_size);
+ if (status == ERROR_MORE_DATA) {
+ // We need to increase the buffer size. Since we don't know _how much_ to
+ // increase it by, we use an estimate (4096) for the increment.
+ DWORD increment = 4096;
+ if (size > MAXDWORD - increment) {
+ return OS_ERR;
+ }
+ size += increment;
+ } else if (status == ERROR_SUCCESS) {
+ break;
+ } else {
+ // If there was some other problem fetching this registry entry, tell the
+ // caller that we couldn't lookup the index.
+ return OS_ERR;
+ }
+ } while (true);
+
+ if (type != REG_MULTI_SZ) {
+ return OS_ERR;
+ }
+
+ // The buffer contains indices and names in the form (\0\0)*, so
+ // iterate character by character to parse the name and if it matches the
+ // English name, then we return the integer value of the index.
+ for (const char* p = buffer; *p != '\0'; ) {
+ const char* idx_str = p;
+ p += strlen(p) + 1;
+ if (*p == '\0') {
+ break;
+ }
+
+ const char* name = p;
+ p += strlen(p) + 1;
+ if (strcmp(name, english_name) == 0) {
+ errno = 0;
+ char* end = nullptr;
+ unsigned long value = strtoul(idx_str, &end, 10);
+ if (errno == 0 && end != idx_str && value <= MAXDWORD) {
+ *result = (DWORD)value;
+ return OS_OK;
+ }
+ }
+ }
+
+ return OS_ERR;
+}
+
+// Return the counter index of the 'Processor Information' counter, if
+// available, or else the 'Processor' counter. The former is aware of the
+// possibility of multiple processor groups and thus provides a more accurate
+// processor count whereas the latter serves as fallback.
+static DWORD get_proc_counter() {
+ static DWORD pdh_idx = 0;
+ if (pdh_idx != 0) {
+ return pdh_idx;
+ }
+
+ // Some APIs accept English counter names whereas others accept counter names
+ // in the specific user's locale. We determine the locale-specific name using
+ // the counter index, but to find the counter index, we use the English name
+ // of the counter and look for it in a specific registry key.
+ DWORD info_idx;
+ if (lookup_perf_index_by_english_name("Processor Information",
+ &info_idx) != OS_OK) {
+ info_idx = PDH_PROCESSOR_IDX;
+ }
+
+ // Assign to the static variable so that the value persists across calls.
+ pdh_idx = info_idx;
+ return pdh_idx;
+}
+
/*
* Enuerate the Processor PDH object and returns a buffer containing the enumerated instances.
* Caller needs ResourceMark;
@@ -786,8 +894,11 @@ static OSReturn allocate_pdh_constants() {
* @return buffer if successful, null on failure.
*/
static const char* enumerate_cpu_instances() {
- char* processor; //'Processor' == PDH_PROCESSOR_IDX
- if (lookup_name_by_index(PDH_PROCESSOR_IDX, &processor) != OS_OK) {
+ // The `PdhEnumObjectItems()` function accepts a localized name of the perf
+ // counter. To obtain the name that is specific to the user's locale, we
+ // perform a reverse lookup from counter index to counter name.
+ char* processor;
+ if (lookup_name_by_index(get_proc_counter(), &processor) != OS_OK) {
return nullptr;
}
DWORD c_size = 0;
@@ -821,13 +932,17 @@ static const char* enumerate_cpu_instances() {
static int count_logical_cpus(const char* instances) {
assert(instances != nullptr, "invariant");
- // count logical instances.
- DWORD count;
- char* tmp;
- for (count = 0, tmp = const_cast(instances); *tmp != '\0'; tmp = &tmp[strlen(tmp) + 1], count++);
- // PDH reports an instance for each logical processor plus an instance for the total (_Total)
- assert(count == os::processor_count() + 1, "invalid enumeration!");
- return count - 1;
+ DWORD count = 0;
+ for (const char* tmp = instances; *tmp != '\0'; tmp += strlen(tmp) + 1) {
+ // In both the 'Processor' counter and the 'Processor Information' counter,
+ // the output contains totals for the processor group(s). We filter those
+ // out by looking for the `_Total` substring.
+ if (strstr(tmp, "_Total") == nullptr) {
+ count++;
+ }
+ }
+ assert(count >= 1, "invalid enumeration!");
+ return count;
}
static int number_of_logical_cpus() {
@@ -847,7 +962,16 @@ static double cpu_factor() {
static double cpuFactor = .0;
if (numCpus == 0) {
numCpus = number_of_logical_cpus();
- assert(os::processor_count() <= (int)numCpus, "invariant");
+
+ // If we are using the legacy 'Processor' counter, which counts processors
+ // only in the first processor group, then `numCpus` can undercount, in
+ // which case, `numCpus` will be likely smaller than `os_processor_count`.
+ // However, when we use the 'Processor Information' counter, we expect both
+ // `numCpus` and `os::processorCount` to be identical. In both cases, we
+ // expect to see at least one CPU.
+ assert(numCpus >= 1 && numCpus <= (DWORD)os::processor_count(),
+ "unexpected cpu count");
+
cpuFactor = numCpus * 100;
}
return cpuFactor;
@@ -861,8 +985,8 @@ static void log_error_message_on_no_PDH_artifact(const char* counter_path) {
static int initialize_cpu_query_counters(MultiCounterQueryP query, DWORD pdh_counter_idx) {
assert(query != nullptr, "invariant");
assert(query->counters != nullptr, "invariant");
- char* processor; //'Processor' == PDH_PROCESSOR_IDX
- if (lookup_name_by_index(PDH_PROCESSOR_IDX, &processor) != OS_OK) {
+ char* processor;
+ if (lookup_name_by_index(get_proc_counter(), &processor) != OS_OK) {
return OS_ERR;
}
char* counter_name = nullptr;
@@ -880,7 +1004,11 @@ static int initialize_cpu_query_counters(MultiCounterQueryP query, DWORD pdh_cou
counter_len += OBJECT_WITH_INSTANCES_COUNTER_FMT_LEN; // "\\%s(%s)\\%s"
const char* instances = enumerate_cpu_instances();
DWORD index = 0;
- for (char* tmp = const_cast(instances); *tmp != '\0'; tmp = &tmp[strlen(tmp) + 1], index++) {
+ for (char* tmp = const_cast(instances); *tmp != '\0'; tmp = &tmp[strlen(tmp) + 1]) {
+ // Skip totals for each processor group.
+ if (strstr(tmp, ",_Total") != nullptr) {
+ continue;
+ }
const size_t tmp_len = strlen(tmp);
char* counter_path = NEW_RESOURCE_ARRAY(char, counter_len + tmp_len + 1);
const size_t jio_snprintf_result = jio_snprintf(counter_path,
@@ -896,6 +1024,7 @@ static int initialize_cpu_query_counters(MultiCounterQueryP query, DWORD pdh_cou
// return OS_OK to have the system continue to run without the missing counter
return OS_OK;
}
+ index++;
}
// Query once to initialize the counters which require at least two samples
// (like the % CPU usage) to calculate correctly.
diff --git a/src/hotspot/os/windows/safefetch_static_windows.cpp b/src/hotspot/os/windows/safefetch_static_windows.cpp
index f037459f1fb..6320cb962f7 100644
--- a/src/hotspot/os/windows/safefetch_static_windows.cpp
+++ b/src/hotspot/os/windows/safefetch_static_windows.cpp
@@ -39,25 +39,21 @@
extern "C" char _SafeFetch32_continuation[];
extern "C" char _SafeFetch32_fault[];
-#ifdef _LP64
extern "C" char _SafeFetchN_continuation[];
extern "C" char _SafeFetchN_fault[];
-#endif // _LP64
bool handle_safefetch(int exception_code, address pc, void* context) {
CONTEXT* ctx = (CONTEXT*)context;
- if ((exception_code == EXCEPTION_ACCESS_VIOLATION ||
- exception_code == EXCEPTION_GUARD_PAGE) && ctx != nullptr) {
+ if (ctx != nullptr) {
if (pc == (address)_SafeFetch32_fault) {
os::win32::context_set_pc(ctx, (address)_SafeFetch32_continuation);
return true;
}
-#ifdef _LP64
+
if (pc == (address)_SafeFetchN_fault) {
os::win32::context_set_pc(ctx, (address)_SafeFetchN_continuation);
return true;
}
-#endif
}
return false;
}
diff --git a/src/hotspot/os_cpu/bsd_aarch64/javaThread_bsd_aarch64.cpp b/src/hotspot/os_cpu/bsd_aarch64/javaThread_bsd_aarch64.cpp
index 982605bbed4..c0634a379b8 100644
--- a/src/hotspot/os_cpu/bsd_aarch64/javaThread_bsd_aarch64.cpp
+++ b/src/hotspot/os_cpu/bsd_aarch64/javaThread_bsd_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -72,8 +72,8 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
frame ret_frame(ret_sp, ret_fp, addr);
if (!ret_frame.safe_for_sender(this)) {
-#if COMPILER2_OR_JVMCI
- // C2 and JVMCI use ebp as a general register see if null fp helps
+#ifdef COMPILER2
+ // C2 uses ebp as a general register see if null fp helps
frame ret_frame2(ret_sp, nullptr, addr);
if (!ret_frame2.safe_for_sender(this)) {
// nothing else to try if the frame isn't good
@@ -83,7 +83,7 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
#else
// nothing else to try if the frame isn't good
return false;
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
*fr_addr = ret_frame;
return true;
diff --git a/src/hotspot/os_cpu/bsd_x86/javaThread_bsd_x86.cpp b/src/hotspot/os_cpu/bsd_x86/javaThread_bsd_x86.cpp
index 0b5e5b6e7bd..08288f940fa 100644
--- a/src/hotspot/os_cpu/bsd_x86/javaThread_bsd_x86.cpp
+++ b/src/hotspot/os_cpu/bsd_x86/javaThread_bsd_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -67,8 +67,8 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
frame ret_frame(ret_sp, ret_fp, addr);
if (!ret_frame.safe_for_sender(this)) {
-#if COMPILER2_OR_JVMCI
- // C2 and JVMCI use ebp as a general register see if null fp helps
+#ifdef COMPILER2
+ // C2 uses ebp as a general register see if null fp helps
frame ret_frame2(ret_sp, nullptr, addr);
if (!ret_frame2.safe_for_sender(this)) {
// nothing else to try if the frame isn't good
@@ -78,7 +78,7 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
#else
// nothing else to try if the frame isn't good
return false;
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
*fr_addr = ret_frame;
return true;
diff --git a/src/hotspot/os_cpu/linux_x86/javaThread_linux_x86.cpp b/src/hotspot/os_cpu/linux_x86/javaThread_linux_x86.cpp
index 6f0cd49951d..71498ce2e62 100644
--- a/src/hotspot/os_cpu/linux_x86/javaThread_linux_x86.cpp
+++ b/src/hotspot/os_cpu/linux_x86/javaThread_linux_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -68,8 +68,8 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
frame ret_frame(ret_sp, ret_fp, addr);
if (!ret_frame.safe_for_sender(this)) {
-#if COMPILER2_OR_JVMCI
- // C2 and JVMCI use ebp as a general register see if null fp helps
+#ifdef COMPILER2
+ // C2 uses ebp as a general register see if null fp helps
frame ret_frame2(ret_sp, nullptr, addr);
if (!ret_frame2.safe_for_sender(this)) {
// nothing else to try if the frame isn't good
@@ -79,7 +79,7 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
#else
// nothing else to try if the frame isn't good
return false;
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
*fr_addr = ret_frame;
return true;
diff --git a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
index ee08738c678..6750b71476b 100644
--- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
+++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
@@ -81,7 +81,7 @@
#define SPELL_REG_SP "rsp"
#define SPELL_REG_FP "rbp"
-address os::current_stack_pointer() {
+NOINLINE address os::current_stack_pointer() {
return (address)__builtin_frame_address(0);
}
diff --git a/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.cpp b/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.cpp
index 8f6f1ccd38a..97b6d6812e5 100644
--- a/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.cpp
+++ b/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.cpp
@@ -26,6 +26,12 @@
#include "runtime/frame.inline.hpp"
#include "runtime/javaThread.hpp"
+// CRT-provided TLS slot for this module (jvm.dll), set by the OS loader.
+extern "C" unsigned long _tls_index;
+
+// TLS offset read by the assembly code in `aarch64_get_thread_helper()`.
+extern "C" ptrdiff_t _jvm_thr_current_tls_offset = JavaThread::get_thr_tls_offset();
+
frame JavaThread::pd_last_frame() {
assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
vmassert(_anchor.last_Java_pc() != nullptr, "not walkable");
@@ -66,8 +72,8 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
}
if (!ret_frame.safe_for_sender(this)) {
-#if COMPILER2_OR_JVMCI
- // C2 and JVMCI use ebp as a general register see if null fp helps
+#ifdef COMPILER2
+ // C2 uses ebp as a general register see if null fp helps
frame ret_frame2(ret_frame.sp(), nullptr, ret_frame.pc());
if (!ret_frame2.safe_for_sender(this)) {
// nothing else to try if the frame isn't good
@@ -77,7 +83,7 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
#else
// nothing else to try if the frame isn't good
return false;
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
*fr_addr = ret_frame;
return true;
@@ -87,3 +93,25 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
}
void JavaThread::cache_global_variables() { }
+
+ptrdiff_t JavaThread::get_thr_tls_offset() {
+ char* tebPointer = (char*)NtCurrentTeb();
+
+ // 0x58 is the offset of ThreadLocalStoragePointer within the TEB. This is
+ // a stable Windows ABI constant but is not exposed in the SDK's minimal
+ // _TEB struct.
+ void** tls_array = *(void***)(tebPointer + 0x58);
+ char* curr_ptr = (char*)&Thread::_thr_current;
+ char* tls_block = (char*)tls_array[_tls_index];
+
+ // Compute the offset of Thread::_thr_current within this module's TLS
+ // block. Unlike ELF, which provides `tlsdesc` relocations that lets
+ // assembly code resolve TLS variables symbolically at link/load time,
+ // Windows PE/COFF has no equivalent mechanism for armasm64. So we compute
+ // the offset here in C++ (where the compiler knows how to access
+ // __declspec(thread) variables) and store it in a plain global that the
+ // assembly can load directly. In subsequent calls to
+ // `aarch64_get_thread_helper()`, the assembly will read the TEB to find the
+ // TLS block and then add this offset to find `Thread::_thr_current`.
+ return curr_ptr - tls_block;
+}
diff --git a/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.hpp
index 7d6ed16e629..34984607814 100644
--- a/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.hpp
+++ b/src/hotspot/os_cpu/windows_aarch64/javaThread_windows_aarch64.hpp
@@ -46,8 +46,11 @@ private:
bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
public:
- static Thread *aarch64_get_thread_helper() {
- return Thread::current();
- }
+ static Thread *aarch64_get_thread_helper();
+
+ // Compute the offset of `Thread::_thr_current` in the thread-local storage
+ // This offset is then used by the assembly code implementation of
+ // `aarch64_get_thread_helper()`.
+ static ptrdiff_t get_thr_tls_offset();
#endif // OS_CPU_WINDOWS_AARCH64_JAVATHREAD_WINDOWS_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/windows_aarch64/threadLS_windows_aarch64.S b/src/hotspot/os_cpu/windows_aarch64/threadLS_windows_aarch64.S
new file mode 100644
index 00000000000..81749b9a372
--- /dev/null
+++ b/src/hotspot/os_cpu/windows_aarch64/threadLS_windows_aarch64.S
@@ -0,0 +1,64 @@
+;
+; Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+;
+; This code is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License version 2 only, as
+; published by the Free Software Foundation.
+;
+; This code is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; version 2 for more details (a copy is included in the LICENSE file that
+; accompanied this code).
+;
+; You should have received a copy of the GNU General Public License version
+; 2 along with this work; if not, write to the Free Software Foundation,
+; Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+;
+; Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+; or visit www.oracle.com if you need additional information or have any
+; questions.
+;
+
+ ; JavaThread::aarch64_get_thread_helper()
+ ;
+ ; Optimized TLS access to `Thread::_thr_current` on Windows AArch64.
+ ; Returns the current thread pointer in x0, clobbers x1, while all other
+ ; registers are preserved.
+
+ IMPORT _tls_index
+ IMPORT _jvm_thr_current_tls_offset
+
+ AREA threadls_text, CODE, READONLY
+ ALIGN 4
+
+ ; MSVC-decorated name for: static Thread* JavaThread::aarch64_get_thread_helper()
+ EXPORT |?aarch64_get_thread_helper@JavaThread@@SAPEAVThread@@XZ|
+
+|?aarch64_get_thread_helper@JavaThread@@SAPEAVThread@@XZ| PROC
+
+ ; x18 holds the TEB, 0x58 is a well-defined offset into the TEB on 64-bit
+ ; systems, so the following line loads the thread-local storage pointer
+ ; inside the TEB
+ ldr x1, [x18, #0x58]
+
+ ; Load `_tls_index` and zero-extend it to 64 bits to occupy x0
+ adrp x0, _tls_index
+ ldr w0, [x0, _tls_index]
+
+ ; `x0` holds the index, `x1` holds the array base address (each entry is 64
+ ; bits long), so in the following line, x1 = array_base[_tls_index]
+ ldr x1, [x1, x0, lsl #3]
+
+ ; Load cached TLS offset of `Thread::_thr_current`
+ adrp x0, _jvm_thr_current_tls_offset
+ ldr x0, [x0, _jvm_thr_current_tls_offset]
+
+ ; Load `Thread::_thr_current` value
+ ldr x0, [x1, x0]
+
+ ret
+
+ ENDP
+ END
diff --git a/src/hotspot/os_cpu/windows_x86/javaThread_windows_x86.cpp b/src/hotspot/os_cpu/windows_x86/javaThread_windows_x86.cpp
index 63edd9d8eda..40f2fb3b64c 100644
--- a/src/hotspot/os_cpu/windows_x86/javaThread_windows_x86.cpp
+++ b/src/hotspot/os_cpu/windows_x86/javaThread_windows_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -63,8 +63,8 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
}
if (!ret_frame.safe_for_sender(this)) {
-#if COMPILER2_OR_JVMCI
- // C2 and JVMCI use ebp as a general register see if null fp helps
+#ifdef COMPILER2
+ // C2 uses ebp as a general register see if null fp helps
frame ret_frame2(ret_frame.sp(), nullptr, ret_frame.pc());
if (!ret_frame2.safe_for_sender(this)) {
// nothing else to try if the frame isn't good
@@ -74,7 +74,7 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
#else
// nothing else to try if the frame isn't good
return false;
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
*fr_addr = ret_frame;
return true;
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index 79779782c2a..c80c1ac379c 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -3597,9 +3597,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"CompareAndSwapB", "CompareAndSwapS", "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
"WeakCompareAndSwapB", "WeakCompareAndSwapS", "WeakCompareAndSwapI", "WeakCompareAndSwapL", "WeakCompareAndSwapP", "WeakCompareAndSwapN",
"CompareAndExchangeB", "CompareAndExchangeS", "CompareAndExchangeI", "CompareAndExchangeL", "CompareAndExchangeP", "CompareAndExchangeN",
-#if INCLUDE_SHENANDOAHGC
- "ShenandoahCompareAndSwapN", "ShenandoahCompareAndSwapP", "ShenandoahWeakCompareAndSwapP", "ShenandoahWeakCompareAndSwapN", "ShenandoahCompareAndExchangeP", "ShenandoahCompareAndExchangeN",
-#endif
"GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
"GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
"ClearArray"
@@ -3911,7 +3908,7 @@ void MatchNode::count_commutative_op(int& count) {
static const char *commut_vector_op_list[] = {
"AddVB", "AddVS", "AddVI", "AddVL", "AddVHF", "AddVF", "AddVD",
"MulVB", "MulVS", "MulVI", "MulVL", "MulVHF", "MulVF", "MulVD",
- "AndV", "OrV", "XorV",
+ "AndV", "OrV", "XorV", "AndVMask", "OrVMask", "XorVMask",
"MaxVHF", "MinVHF", "MaxV", "MinV", "UMax","UMin"
};
diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp
index a9870676786..ec764a72846 100644
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@@ -1584,9 +1584,6 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
fprintf(fp, " ((MachIfNode*)n%d)->_fcnt = _fcnt;\n", cnt);
}
- // Fill in the bottom_type
- fprintf(fp, " n%d->_bottom_type = bottom_type();\n", cnt);
-
const char *resultOper = new_inst->reduce_result();
fprintf(fp," n%d->set_opnd_array(0, state->MachOperGenerator(%s));\n",
cnt, machOperEnum(resultOper));
@@ -1850,6 +1847,8 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
fprintf(fp, "\n");
if (node->expands()) {
+ // Fill in the bottom_type, only for result
+ fprintf(fp, " result->_bottom_type = bottom_type();\n");
fprintf(fp, " return result;\n");
} else {
fprintf(fp, " return this;\n");
diff --git a/src/hotspot/share/adlc/output_h.cpp b/src/hotspot/share/adlc/output_h.cpp
index f7389b5a1b1..8aa1762f902 100644
--- a/src/hotspot/share/adlc/output_h.cpp
+++ b/src/hotspot/share/adlc/output_h.cpp
@@ -1841,6 +1841,39 @@ void ArchDesc::declareClasses(FILE *fp) {
fprintf(fp," virtual const Pipeline *pipeline() const;\n");
}
+ // Use a more precise type for constants, this is useful for nodes that are expanded after
+ // matching
+ if (data_type != Form::none) {
+ // A constant's bottom_type returns a Type containing its constant value
+ fprintf(fp, " virtual const class Type *bottom_type() const {\n");
+ switch (data_type) {
+ case Form::idealI:
+ fprintf(fp, " return TypeInt::make(opnd_array(1)->constant());\n");
+ break;
+ case Form::idealP:
+ case Form::idealN:
+ case Form::idealNKlass:
+ fprintf(fp, " return opnd_array(1)->type();\n");
+ break;
+ case Form::idealD:
+ fprintf(fp, " return TypeD::make(opnd_array(1)->constantD());\n");
+ break;
+ case Form::idealH:
+ fprintf(fp, " return TypeH::make(opnd_array(1)->constantH());\n");
+ break;
+ case Form::idealF:
+ fprintf(fp, " return TypeF::make(opnd_array(1)->constantF());\n");
+ break;
+ case Form::idealL:
+ fprintf(fp, " return TypeLong::make(opnd_array(1)->constantL());\n");
+ break;
+ default:
+ assert(false, "Unimplemented()");
+ break;
+ }
+ fprintf(fp, " };\n");
+ }
+
// Check where 'ideal_type' must be customized
/*
if ( instr->_matrule && instr->_matrule->_rChild &&
diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
index c6475050592..55ba27fcbe1 100644
--- a/src/hotspot/share/asm/codeBuffer.cpp
+++ b/src/hotspot/share/asm/codeBuffer.cpp
@@ -937,8 +937,8 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) {
// Move all the code and relocations to the new blob:
relocate_code_to(&cb);
- // some internal addresses, _last_insn _last_label, are used during code emission,
- // adjust them in expansion
+ // some internal addresses, _last_merge_candidate and _last_label, are used during
+ // code emission, adjust them in expansion
adjust_internal_address(insts_begin(), cb.insts_begin());
// Copy the temporary code buffer into the current code buffer.
@@ -966,8 +966,8 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) {
}
void CodeBuffer::adjust_internal_address(address from, address to) {
- if (_last_insn != nullptr) {
- _last_insn += to - from;
+ if (_last_merge_candidate != nullptr) {
+ _last_merge_candidate += to - from;
}
if (_last_label != nullptr) {
_last_label += to - from;
diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp
index 38e151273da..ea0237f1401 100644
--- a/src/hotspot/share/asm/codeBuffer.hpp
+++ b/src/hotspot/share/asm/codeBuffer.hpp
@@ -561,11 +561,11 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
OopRecorder* _oop_recorder;
- OopRecorder _default_oop_recorder; // override with initialize_oop_recorder
+ OopRecorder _default_oop_recorder; // override with initialize_oop_recorder
Arena* _overflow_arena;
- address _last_insn; // used to merge consecutive memory barriers, loads or stores.
- address _last_label; // record last bind label address, it's also the start of current bb.
+ address _last_label; // record last bind label address, it's also the start of current bb.
+ address _last_merge_candidate; // used to merge consecutive memory barriers, loads or stores.
SharedStubToInterpRequests* _shared_stub_to_interp_requests; // used to collect requests for shared iterpreter stubs
SharedTrampolineRequests* _shared_trampoline_requests; // used to collect requests for shared trampolines
@@ -591,11 +591,11 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
_total_size = 0;
_oop_recorder = nullptr;
_overflow_arena = nullptr;
- _last_insn = nullptr;
_last_label = nullptr;
- _finalize_stubs = false;
+ _last_merge_candidate = nullptr;
_shared_stub_to_interp_requests = nullptr;
_shared_trampoline_requests = nullptr;
+ _finalize_stubs = false;
_consts.initialize_outer(this, SECT_CONSTS);
_insts.initialize_outer(this, SECT_INSTS);
@@ -812,9 +812,9 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
OopRecorder* oop_recorder() const { return _oop_recorder; }
- address last_insn() const { return _last_insn; }
- void set_last_insn(address a) { _last_insn = a; }
- void clear_last_insn() { set_last_insn(nullptr); }
+ address last_merge_candidate() const { return _last_merge_candidate; }
+ void set_last_merge_candidate(address a) { _last_merge_candidate = a; }
+ void clear_last_merge_candidate() { set_last_merge_candidate(nullptr); }
address last_label() const { return _last_label; }
void set_last_label(address a) { _last_label = a; }
diff --git a/src/hotspot/share/c1/c1_Compilation.hpp b/src/hotspot/share/c1/c1_Compilation.hpp
index 5125e0bbe0a..5de201592f9 100644
--- a/src/hotspot/share/c1/c1_Compilation.hpp
+++ b/src/hotspot/share/c1/c1_Compilation.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -256,7 +256,7 @@ class Compilation: public StackObj {
// will compilation make optimistic assumptions that might lead to
// deoptimization and that the runtime will account for?
bool is_optimistic() {
- return CompilerConfig::is_c1_only_no_jvmci() && !is_profiling() &&
+ return CompilerConfig::is_c1_only() && !is_profiling() &&
(RangeCheckElimination || UseLoopInvariantCodeMotion) &&
method()->method_data()->trap_count(Deoptimization::Reason_none) == 0;
}
diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp
index f910ecadc16..db55b8c5fa8 100644
--- a/src/hotspot/share/c1/c1_GraphBuilder.cpp
+++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3558,7 +3558,7 @@ const char* GraphBuilder::check_can_parse(ciMethod* callee) const {
// negative filter: should callee NOT be inlined? returns null, ok to inline, or rejection msg
const char* GraphBuilder::should_not_inline(ciMethod* callee) const {
- if ( compilation()->directive()->should_not_inline(callee)) return "disallowed by CompileCommand";
+ if ( compilation()->directive()->should_not_inline(callee, compilation()->env()->comp_level())) return "disallowed by CompileCommand";
if ( callee->dont_inline()) return "don't inline by annotation";
return nullptr;
}
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index f6807abcd7a..de3b08e08e8 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -478,7 +478,7 @@ void LIRGenerator::klass2reg_with_patching(LIR_Opr r, ciMetadata* obj, CodeEmitI
/* C2 relies on constant pool entries being resolved (ciTypeFlow), so if tiered compilation
* is active and the class hasn't yet been resolved we need to emit a patch that resolves
* the class. */
- if ((!CompilerConfig::is_c1_only_no_jvmci() && need_resolve) || !obj->is_loaded() || PatchALot) {
+ if ((!CompilerConfig::is_c1_only() && need_resolve) || !obj->is_loaded() || PatchALot) {
assert(info != nullptr, "info must be set if class is not loaded");
__ klass2reg_patch(nullptr, r, info);
} else {
@@ -644,7 +644,7 @@ void LIRGenerator::monitor_exit(LIR_Opr object, LIR_Opr lock, LIR_Opr new_hdr, L
void LIRGenerator::print_if_not_loaded(const NewInstance* new_instance) {
if (PrintNotLoaded && !new_instance->klass()->is_loaded()) {
tty->print_cr(" ###class not loaded at new bci %d", new_instance->printable_bci());
- } else if (PrintNotLoaded && (!CompilerConfig::is_c1_only_no_jvmci() && new_instance->is_unresolved())) {
+ } else if (PrintNotLoaded && (!CompilerConfig::is_c1_only() && new_instance->is_unresolved())) {
tty->print_cr(" ###class not resolved at new bci %d", new_instance->printable_bci());
}
}
diff --git a/src/hotspot/share/cds/aotArtifactFinder.cpp b/src/hotspot/share/cds/aotArtifactFinder.cpp
index bd69b18a1aa..1c274e958bf 100644
--- a/src/hotspot/share/cds/aotArtifactFinder.cpp
+++ b/src/hotspot/share/cds/aotArtifactFinder.cpp
@@ -115,10 +115,18 @@ void AOTArtifactFinder::find_artifacts() {
// Add all the InstanceKlasses (and their array classes) that are always included.
SystemDictionaryShared::dumptime_table()->iterate_all_live_classes([&] (InstanceKlass* ik, DumpTimeClassInfo& info) {
- // Skip "AOT tooling classes" in this block. They will be included in the AOT cache only if
- // - One of their subtypes is included
- // - One of their instances is found by HeapShared.
- if (!info.is_excluded() && !info.is_aot_tooling_class()) {
+ bool skip = info.is_excluded();
+ if (!(ik->is_initialized() && ik->has_aot_safe_initializer())) {
+ if (info.is_aot_tooling_class()) {
+ // This class is loading only by AOT tooling (not as part of the app's training run).
+ // Skip this class for now, but it might be added later if
+ // - One of its subtypes is included
+ // - One of its instances is found by HeapShared.
+ skip = true;
+ }
+ }
+
+ if (!skip) {
bool add = false;
if (!ik->is_hidden()) {
// All non-hidden classes are always included into the AOT cache
diff --git a/src/hotspot/share/cds/aotMappedHeapWriter.cpp b/src/hotspot/share/cds/aotMappedHeapWriter.cpp
index 8f810ef5244..272f548d731 100644
--- a/src/hotspot/share/cds/aotMappedHeapWriter.cpp
+++ b/src/hotspot/share/cds/aotMappedHeapWriter.cpp
@@ -892,9 +892,7 @@ void AOTMappedHeapWriter::compute_ptrmap(AOTMappedHeapInfo* heap_info) {
Metadata* native_ptr = *buffered_field_addr;
guarantee(native_ptr != nullptr, "sanity");
- if (RegeneratedClasses::has_been_regenerated(native_ptr)) {
- native_ptr = RegeneratedClasses::get_regenerated_object(native_ptr);
- }
+ native_ptr = RegeneratedClasses::maybe_get_regenerated_object(native_ptr);
if (!ArchiveBuilder::current()->has_been_archived((address)native_ptr)) {
ResourceMark rm;
diff --git a/src/hotspot/share/cds/aotMetaspace.cpp b/src/hotspot/share/cds/aotMetaspace.cpp
index 4c23ede9cb8..76634fc3fba 100644
--- a/src/hotspot/share/cds/aotMetaspace.cpp
+++ b/src/hotspot/share/cds/aotMetaspace.cpp
@@ -999,7 +999,23 @@ void AOTMetaspace::dump_static_archive(TRAPS) {
}
#if INCLUDE_CDS_JAVA_HEAP && defined(_LP64)
-void AOTMetaspace::adjust_heap_sizes_for_dumping() {
+void AOTMetaspace::init_heap_settings() {
+ if (UseCompressedOops) {
+ if (!AOTCodeCache::is_caching_enabled()) {
+ // We don't need it -- always disable for better jitted code.
+ FLAG_SET_ERGO(AOTCompatibleOopCompression, false);
+ } else if (CDSConfig::is_dumping_final_static_archive()) {
+ // Obey the command-line switch. Do not override
+ } else if (CDSConfig::is_using_archive()) {
+ precond(FileMapInfo::current_info() == nullptr);
+ FileMapInfo* static_mapinfo = open_static_archive();
+ if (static_mapinfo != nullptr && static_mapinfo->header()->compatible_oop_compression()) {
+ // Use the same setting as recorded in the archive.
+ FLAG_SET_ERGO(AOTCompatibleOopCompression, true);
+ }
+ }
+ }
+
if (!CDSConfig::is_dumping_heap() || UseCompressedOops) {
return;
}
@@ -1502,7 +1518,10 @@ void AOTMetaspace::initialize_runtime_shared_and_meta_spaces() {
assert(CDSConfig::is_using_archive(), "Must be called when UseSharedSpaces is enabled");
MapArchiveResult result = MAP_ARCHIVE_OTHER_FAILURE;
- FileMapInfo* static_mapinfo = open_static_archive();
+ FileMapInfo* static_mapinfo = FileMapInfo::current_info(); // may have been opened by init_heap_settings()
+ if (static_mapinfo == nullptr) {
+ static_mapinfo = open_static_archive();
+ }
FileMapInfo* dynamic_mapinfo = nullptr;
if (static_mapinfo != nullptr) {
diff --git a/src/hotspot/share/cds/aotMetaspace.hpp b/src/hotspot/share/cds/aotMetaspace.hpp
index 2236bae91f3..975b6be76d7 100644
--- a/src/hotspot/share/cds/aotMetaspace.hpp
+++ b/src/hotspot/share/cds/aotMetaspace.hpp
@@ -77,7 +77,7 @@ class AOTMetaspace : AllStatic {
static void dump_static_archive(TRAPS) NOT_CDS_RETURN;
#ifdef _LP64
- static void adjust_heap_sizes_for_dumping() NOT_CDS_JAVA_HEAP_RETURN;
+ static void init_heap_settings() NOT_CDS_JAVA_HEAP_RETURN;
#endif
private:
diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp
index cf51897c2f1..bd0d070b212 100644
--- a/src/hotspot/share/cds/archiveBuilder.cpp
+++ b/src/hotspot/share/cds/archiveBuilder.cpp
@@ -564,9 +564,8 @@ ArchiveBuilder::FollowMode ArchiveBuilder::get_follow_mode(MetaspaceClosure::Ref
if (ref->type() == MetaspaceClosureType::ClassType) {
Klass* klass = (Klass*)ref->obj();
assert(klass->is_klass(), "must be");
- if (RegeneratedClasses::has_been_regenerated(klass)) {
- klass = RegeneratedClasses::get_regenerated_object(klass);
- }
+ klass = RegeneratedClasses::maybe_get_regenerated_object(klass);
+
if (is_excluded(klass)) {
ResourceMark rm;
aot_log_trace(aot)("pointer set to null: class (excluded): %s", klass->external_name());
@@ -710,9 +709,8 @@ bool ArchiveBuilder::has_been_archived(address src_addr) const {
// This is a class/method that belongs to one of the "original" classes that
// have been regenerated by lambdaFormInvokers.cpp. We must have archived
// the "regenerated" version of it.
- if (RegeneratedClasses::has_been_regenerated(src_addr)) {
- address regen_obj = RegeneratedClasses::get_regenerated_object(src_addr);
- precond(regen_obj != nullptr && regen_obj != src_addr);
+ address regen_obj = RegeneratedClasses::maybe_get_regenerated_object(src_addr);
+ if (regen_obj != src_addr) {
assert(has_been_archived(regen_obj), "must be");
assert(get_buffered_addr(src_addr) == get_buffered_addr(regen_obj), "must be");
}});
diff --git a/src/hotspot/share/cds/archiveUtils.cpp b/src/hotspot/share/cds/archiveUtils.cpp
index 7985c62d67b..bfaa1d6644c 100644
--- a/src/hotspot/share/cds/archiveUtils.cpp
+++ b/src/hotspot/share/cds/archiveUtils.cpp
@@ -303,7 +303,8 @@ public:
AllocGapNode* node = allocate_node(gap, Empty{});
insert(gap, node);
- log_trace(aot, alloc)("adding a gap of %zu bytes @ %p (total = %zu) in %zu blocks", gap_bytes, gap_bottom, _total_gap_bytes, size());
+ log_trace(aot, alloc)("adding a gap of %zu bytes @ %p (total = %zu, used = %zu) in %zu blocks",
+ gap_bytes, gap_bottom, _total_gap_bytes, _total_gap_bytes_used, size());
return gap_bytes;
}
@@ -325,29 +326,25 @@ public:
remove(node);
- precond(_total_gap_bytes >= num_bytes);
- _total_gap_bytes -= num_bytes;
_total_gap_bytes_used += num_bytes;
_total_gap_allocs++;
DEBUG_ONLY(node = nullptr); // Don't use it anymore!
precond(gap_bytes >= num_bytes);
if (gap_bytes > num_bytes) {
- gap_bytes -= num_bytes;
- gap_bottom += num_bytes;
-
- AllocGap gap(gap_bytes, gap_bottom); // constructor checks alignment
+ AllocGap gap(gap_bytes - num_bytes, gap_bottom + num_bytes); // constructor checks alignment
AllocGapNode* new_node = allocate_node(gap, Empty{});
insert(gap, new_node);
}
+ size_t unfilled_bytes = _total_gap_bytes - _total_gap_bytes_used;
log_trace(aot, alloc)("%zu bytes @ %p in a gap of %zu bytes (used gaps %zu times, remain gap = %zu bytes in %zu blocks)",
- num_bytes, result, gap_bytes, _total_gap_allocs, _total_gap_bytes, size());
+ num_bytes, result, gap_bytes, _total_gap_allocs, unfilled_bytes, size());
return result;
}
};
-size_t DumpRegion::_total_gap_bytes = 0;
-size_t DumpRegion::_total_gap_bytes_used = 0;
+size_t DumpRegion::_total_gap_bytes = 0; // All the gaps that have ever been created
+size_t DumpRegion::_total_gap_bytes_used = 0; // All the gaps that have been used
size_t DumpRegion::_total_gap_allocs = 0;
DumpRegion::AllocGapTree DumpRegion::_gap_tree;
@@ -418,20 +415,21 @@ void DumpRegion::report_gaps(DumpAllocStats* stats) {
});
double unfilled_percent = 0.0;
+ size_t unfilled_bytes = _total_gap_bytes - _total_gap_bytes_used;
if (_gap_tree.size() > 0) {
- unfilled_percent = percent_of(_total_gap_bytes, _total_gap_allocs);
+ unfilled_percent = percent_of(unfilled_bytes, _total_gap_bytes);
if (unfilled_percent > 5.0) {
// We have a limited number of small objects, so some small gaps may remain
// unfilled. If more than 5% of the gaps are unfilled, this likely indicates
// a systematic error that should be investigated. Otherwise, do not warn to
// avoid noise.
- log_warning(aot)("Unexpected %zu gaps (%zu bytes) for Klass alignment",
- _gap_tree.size(), _total_gap_bytes);
+ log_warning(aot)("Unexpected %zu gaps (%zu bytes, %.2f%%) for Klass alignment",
+ _gap_tree.size(), _total_gap_bytes, unfilled_percent);
}
}
if (_total_gap_allocs > 0) {
log_info(aot)("Allocated %zu objects of %zu bytes in gaps (remain = %zu bytes, %.2f%%)",
- _total_gap_allocs, _total_gap_bytes_used, _total_gap_bytes, unfilled_percent);
+ _total_gap_allocs, _total_gap_bytes_used, unfilled_bytes, unfilled_percent);
}
}
diff --git a/src/hotspot/share/cds/cdsConfig.cpp b/src/hotspot/share/cds/cdsConfig.cpp
index 21066f76932..2dd1d9d0824 100644
--- a/src/hotspot/share/cds/cdsConfig.cpp
+++ b/src/hotspot/share/cds/cdsConfig.cpp
@@ -110,6 +110,19 @@ void CDSConfig::ergo_initialize() {
AOTMapLogger::ergo_initialize();
setup_compiler_args();
+
+ if (is_dumping_full_module_graph()) {
+ precond(allow_only_single_java_thread());
+
+ // The AttachListenerThread may execute Java code or load new classes. It might see
+ // unexpected results after HeapShared::prepare_for_archiving().
+ //
+ // We disable all new incoming attach requests, so you can't use jcmd, etc, on this JVM.
+ // Since we are not running any application code in this JVM and only executed a very
+ // limited set of Java code (for module system init, class loading, indy resolution,
+ // etc), there is usually no need to attach to this JVM.
+ FLAG_SET_ERGO(DisableAttachMechanism, true);
+ }
}
const char* CDSConfig::default_archive_path() {
@@ -139,10 +152,10 @@ const char* CDSConfig::default_archive_path() {
if (!UseCompressedOops) {
tmp.print_raw("_nocoops");
}
- if (UseCompactObjectHeaders) {
- // Note that generation of xxx_coh.jsa variants require
- // --enable-cds-archive-coh at build time
- tmp.print_raw("_coh");
+ if (!UseCompactObjectHeaders) {
+ // Note that generation of xxx_nocoh.jsa variants require
+ // --enable-cds-archive-nocoh at build time
+ tmp.print_raw("_nocoh");
}
#endif
tmp.print_raw(".jsa");
@@ -462,7 +475,7 @@ void CDSConfig::check_aot_flags() {
// At least one AOT flag has been used
_new_aot_flags_used = true;
- if (FLAG_IS_DEFAULT(AOTMode) || strcmp(AOTMode, "auto") == 0 || strcmp(AOTMode, "on") == 0) {
+ if (FLAG_IS_DEFAULT(AOTMode) || strcmp(AOTMode, "auto") == 0 || strcmp(AOTMode, "on") == 0 || strcmp(AOTMode, "required") == 0) {
check_aotmode_auto_or_on();
} else if (strcmp(AOTMode, "off") == 0) {
check_aotmode_off();
@@ -489,7 +502,7 @@ void CDSConfig::check_aotmode_auto_or_on() {
if (FLAG_IS_DEFAULT(AOTMode) || (strcmp(AOTMode, "auto") == 0)) {
RequireSharedSpaces = false;
} else {
- assert(strcmp(AOTMode, "on") == 0, "already checked");
+ assert(strcmp(AOTMode, "on") == 0 || strcmp(AOTMode, "required") == 0 , "already checked");
RequireSharedSpaces = true;
}
}
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index 1ed9979ff85..c2b321f3c0a 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -225,11 +225,11 @@ void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment,
}
#endif
_compressed_oops = UseCompressedOops;
+ _compatible_oop_compression = AOTCompatibleOopCompression;
_narrow_klass_pointer_bits = CompressedKlassPointers::narrow_klass_pointer_bits();
_narrow_klass_shift = ArchiveBuilder::precomputed_narrow_klass_shift();
// Which JIT compier is used
- _compiler_type = (u1)CompilerConfig::compiler_type();
_type_profile_level = TypeProfileLevel;
_type_profile_args_limit = TypeProfileArgsLimit;
_type_profile_parms_limit = TypeProfileParmsLimit;
@@ -1339,6 +1339,10 @@ bool FileMapInfo::map_aot_code_region(ReservedSpace rs) {
FileMapRegion* r = region_at(AOTMetaspace::ac);
assert(r->used() > 0 && r->used_aligned() == rs.size(), "must be");
+ if (UseCompressedOops) {
+ precond(header()->compatible_oop_compression() == AOTCompatibleOopCompression);
+ }
+
char* requested_base = rs.base();
assert(requested_base != nullptr, "should be inside code cache");
@@ -1592,6 +1596,7 @@ bool FileMapInfo::can_use_heap_region() {
if (UseCompressedOops) {
aot_log_info(aot)(" narrow_oop_mode = %d, narrow_oop_base = " PTR_FORMAT ", narrow_oop_shift = %d",
narrow_oop_mode(), p2i(narrow_oop_base()), narrow_oop_shift());
+ aot_log_info(aot)(" AOTCompatibleOopCompression = %s", header()->compatible_oop_compression() ? "true" : "false");
}
aot_log_info(aot)("The current max heap size = %zuM, G1HeapRegion::GrainBytes = %zu",
MaxHeapSize/M, G1HeapRegion::GrainBytes);
@@ -1600,6 +1605,7 @@ bool FileMapInfo::can_use_heap_region() {
if (UseCompressedOops) {
aot_log_info(aot)(" narrow_oop_mode = %d, narrow_oop_base = " PTR_FORMAT ", narrow_oop_shift = %d",
CompressedOops::mode(), p2i(CompressedOops::base()), CompressedOops::shift());
+ aot_log_info(aot)(" AOTCompatibleOopCompression = %s", AOTCompatibleOopCompression ? "true" : "false");
}
if (!object_streaming_mode()) {
aot_log_info(aot)(" heap range = [" PTR_FORMAT " - " PTR_FORMAT "]",
@@ -1812,23 +1818,6 @@ bool FileMapHeader::validate() {
CompactStrings ? "enabled" : "disabled");
return false;
}
- bool jvmci_compiler_is_enabled = CompilerConfig::is_jvmci_compiler_enabled();
- CompilerType compiler_type = CompilerConfig::compiler_type();
- CompilerType archive_compiler_type = CompilerType(_compiler_type);
- // JVMCI compiler does different type profiling settigns and generate
- // different code. We can't use archive which was produced
- // without it and reverse.
- // Only allow mix when JIT compilation is disabled.
- // Interpreter is used by default when dumping archive.
- bool intepreter_is_used = (archive_compiler_type == CompilerType::compiler_none) ||
- (compiler_type == CompilerType::compiler_none);
- if (!intepreter_is_used &&
- jvmci_compiler_is_enabled != (archive_compiler_type == CompilerType::compiler_jvmci)) {
- AOTMetaspace::report_loading_error("The %s's JIT compiler setting (%s)"
- " does not equal the current setting (%s).", file_type,
- compilertype2name(archive_compiler_type), compilertype2name(compiler_type));
- return false;
- }
if (TrainingData::have_data()) {
if (_type_profile_level != TypeProfileLevel) {
AOTMetaspace::report_loading_error("The %s's TypeProfileLevel setting (%d)"
diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp
index bae08bd5bc7..f5f6ee235a4 100644
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -120,6 +120,7 @@ private:
CompressedOops::Mode _narrow_oop_mode; // compressed oop encoding mode
bool _object_streaming_mode; // dump was created for object streaming
bool _compressed_oops; // save the flag UseCompressedOops
+ bool _compatible_oop_compression; // value of AOTCompatibleOopCompression at dump time
int _narrow_klass_pointer_bits; // save number of bits in narrowKlass
int _narrow_klass_shift; // save shift width used to pre-compute narrowKlass IDs in archived heap objects
narrowPtr _cloned_vtables; // The address of the first cloned vtable
@@ -150,7 +151,6 @@ private:
AOTStreamedHeapHeader _streamed_heap_header;
// The following are parameters that affect MethodData layout.
- u1 _compiler_type;
uint _type_profile_level;
int _type_profile_args_limit;
int _type_profile_parms_limit;
@@ -199,6 +199,7 @@ public:
bool has_platform_or_app_classes() const { return _has_platform_or_app_classes; }
bool has_aot_linked_classes() const { return _has_aot_linked_classes; }
bool compressed_oops() const { return _compressed_oops; }
+ bool compatible_oop_compression() const { return _compatible_oop_compression; }
int narrow_klass_pointer_bits() const { return _narrow_klass_pointer_bits; }
int narrow_klass_shift() const { return _narrow_klass_shift; }
bool has_full_module_graph() const { return _has_full_module_graph; }
diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp
index d75816656b0..428a7dd7148 100644
--- a/src/hotspot/share/cds/heapShared.cpp
+++ b/src/hotspot/share/cds/heapShared.cpp
@@ -630,9 +630,7 @@ bool HeapShared::archive_object(oop obj, oop referrer, KlassSubGraphInfo* subgra
} else if (java_lang_invoke_ResolvedMethodName::is_instance(obj)) {
Method* m = java_lang_invoke_ResolvedMethodName::vmtarget(obj);
if (m != nullptr) {
- if (RegeneratedClasses::has_been_regenerated(m)) {
- m = RegeneratedClasses::get_regenerated_object(m);
- }
+ m = RegeneratedClasses::maybe_get_regenerated_object(m);
InstanceKlass* method_holder = m->method_holder();
AOTArtifactFinder::add_cached_class(method_holder);
}
@@ -1753,10 +1751,7 @@ bool HeapShared::walk_one_object(PendingOopStack* stack, int level, KlassSubGrap
}
if (java_lang_Class::is_instance(orig_obj)) {
- Klass* k = java_lang_Class::as_Klass(orig_obj);
- if (RegeneratedClasses::has_been_regenerated(k)) {
- orig_obj = RegeneratedClasses::get_regenerated_object(k)->java_mirror();
- }
+ orig_obj = RegeneratedClasses::maybe_get_regenerated_mirror(orig_obj);
}
if (CDSConfig::is_dumping_aot_linked_classes()) {
@@ -1961,11 +1956,7 @@ void HeapShared::verify_subgraph_from(oop orig_obj) {
void HeapShared::verify_reachable_objects_from(oop obj) {
_num_total_verifications ++;
if (java_lang_Class::is_instance(obj)) {
- Klass* k = java_lang_Class::as_Klass(obj);
- if (RegeneratedClasses::has_been_regenerated(k)) {
- k = RegeneratedClasses::get_regenerated_object(k);
- obj = k->java_mirror();
- }
+ obj = RegeneratedClasses::maybe_get_regenerated_mirror(obj);
obj = scratch_java_mirror(obj);
assert(obj != nullptr, "must be");
}
@@ -2460,9 +2451,7 @@ void HeapShared::remap_dumped_metadata(oop src_obj, address archived_object) {
return;
}
- if (RegeneratedClasses::has_been_regenerated(native_ptr)) {
- native_ptr = RegeneratedClasses::get_regenerated_object(native_ptr);
- }
+ native_ptr = RegeneratedClasses::maybe_get_regenerated_object(native_ptr);
address buffered_native_ptr = ArchiveBuilder::current()->get_buffered_addr((address)native_ptr);
address requested_native_ptr = ArchiveBuilder::current()->to_requested(buffered_native_ptr);
diff --git a/src/hotspot/share/cds/heapShared.inline.hpp b/src/hotspot/share/cds/heapShared.inline.hpp
index 8b323b067d7..52ec5182ec4 100644
--- a/src/hotspot/share/cds/heapShared.inline.hpp
+++ b/src/hotspot/share/cds/heapShared.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -95,10 +95,7 @@ inline oop HeapShared::maybe_remap_referent(bool is_java_lang_ref, size_t field_
}
if (java_lang_Class::is_instance(referent)) {
- Klass* k = java_lang_Class::as_Klass(referent);
- if (RegeneratedClasses::has_been_regenerated(k)) {
- referent = RegeneratedClasses::get_regenerated_object(k)->java_mirror();
- }
+ referent = RegeneratedClasses::maybe_get_regenerated_mirror(referent);
// When the source object points to a "real" mirror, the buffered object should point
// to the "scratch" mirror, which has all unarchivable fields scrubbed (to be reinstated
// at run time).
diff --git a/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp b/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp
index 4d212dbf2c2..3c28bf06adf 100644
--- a/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp
+++ b/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp
@@ -92,6 +92,7 @@ void RunTimeLambdaProxyClassInfo::init(LambdaProxyClassKey& key, DumpTimeLambdaP
}
DumpTimeLambdaProxyClassDictionary* LambdaProxyClassDictionary::_dumptime_table = nullptr;
+LambdaProxyClassDictionary LambdaProxyClassDictionary::_runtime_table_for_dumping;
LambdaProxyClassDictionary LambdaProxyClassDictionary::_runtime_static_table; // for static CDS archive
LambdaProxyClassDictionary LambdaProxyClassDictionary::_runtime_dynamic_table; // for dynamic CDS archive
@@ -425,7 +426,7 @@ public:
};
void LambdaProxyClassDictionary::write_dictionary(bool is_static_archive) {
- LambdaProxyClassDictionary* dictionary = is_static_archive ? &_runtime_static_table : &_runtime_dynamic_table;
+ LambdaProxyClassDictionary* dictionary = &_runtime_table_for_dumping;
CompactHashtableStats stats;
dictionary->reset();
CompactHashtableWriter writer(_dumptime_table->_count, &stats);
diff --git a/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp b/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp
index b20e998bba6..db9019731bd 100644
--- a/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp
+++ b/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp
@@ -269,6 +269,7 @@ class LambdaProxyClassDictionary : public OffsetCompactHashtable<
private:
class CleanupDumpTimeLambdaProxyClassTable;
static DumpTimeLambdaProxyClassDictionary* _dumptime_table;
+ static LambdaProxyClassDictionary _runtime_table_for_dumping;
static LambdaProxyClassDictionary _runtime_static_table; // for static CDS archive
static LambdaProxyClassDictionary _runtime_dynamic_table; // for dynamic CDS archive
@@ -319,7 +320,9 @@ public:
}
static void serialize(SerializeClosure* soc, bool is_static_archive) {
- if (is_static_archive) {
+ if (soc->writing()) {
+ _runtime_table_for_dumping.serialize_header(soc);
+ } else if (is_static_archive) {
_runtime_static_table.serialize_header(soc);
} else {
_runtime_dynamic_table.serialize_header(soc);
diff --git a/src/hotspot/share/cds/regeneratedClasses.cpp b/src/hotspot/share/cds/regeneratedClasses.cpp
index ae14866cea5..724d5b0cad4 100644
--- a/src/hotspot/share/cds/regeneratedClasses.cpp
+++ b/src/hotspot/share/cds/regeneratedClasses.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
#include "cds/archiveBuilder.hpp"
#include "cds/regeneratedClasses.hpp"
+#include "classfile/javaClasses.hpp"
#include "classfile/vmSymbols.hpp"
#include "memory/universe.hpp"
#include "oops/instanceKlass.hpp"
@@ -84,11 +85,34 @@ bool RegeneratedClasses::has_been_regenerated(address orig_obj) {
}
}
-address RegeneratedClasses::get_regenerated_object(address orig_obj) {
- assert(_regenerated_objs != nullptr, "must be");
- address* p =_regenerated_objs->get(orig_obj);
- assert(p != nullptr, "must be");
- return *p;
+// If the metadata pointed to by orig_obj has been regenerated, return the
+// regenerated version; otherwise return orig_obj,
+address RegeneratedClasses::maybe_get_regenerated_object(address orig_obj) {
+ precond(orig_obj != nullptr);
+ if (_regenerated_objs != nullptr) {
+ address* p = _regenerated_objs->get(orig_obj);
+ if (p != nullptr) {
+ precond(*p != nullptr);
+ return *p;
+ }
+ }
+ return orig_obj;
+}
+
+// If the Klass for orig_java_mirror has been regenerated, return the mirror of
+// the regenerated version; otherwise return orig_java_mirror,
+oop RegeneratedClasses::maybe_get_regenerated_mirror(oop orig_java_mirror) {
+ precond(java_lang_Class::is_instance(orig_java_mirror));
+ Klass* k = java_lang_Class::as_Klass(orig_java_mirror);
+ // Note: the primitive mirrors do not have an injected klass pointer, as primitive
+ // types such as "int" do not have a C++ Klass representation.
+ if (k != nullptr) {
+ Klass* regenerated_k = maybe_get_regenerated_object(k);
+ if (k != regenerated_k) {
+ return regenerated_k->java_mirror();
+ }
+ }
+ return orig_java_mirror;
}
bool RegeneratedClasses::is_regenerated_object(address regen_obj) {
@@ -127,5 +151,6 @@ void RegeneratedClasses::cleanup() {
}
if (_regenerated_objs != nullptr) {
delete _regenerated_objs;
+ _regenerated_objs = nullptr;
}
}
diff --git a/src/hotspot/share/cds/regeneratedClasses.hpp b/src/hotspot/share/cds/regeneratedClasses.hpp
index 080d84a2154..ae64660685d 100644
--- a/src/hotspot/share/cds/regeneratedClasses.hpp
+++ b/src/hotspot/share/cds/regeneratedClasses.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -43,15 +43,16 @@ class RegeneratedClasses : public AllStatic {
static void add_class(InstanceKlass* orig_klass, InstanceKlass* regen_klass);
static void cleanup();
static bool has_been_regenerated(address orig_obj);
- static address get_regenerated_object(address orig_obj); // orig_obj -> regen_obj
+ static address maybe_get_regenerated_object(address orig_obj);
+ static oop maybe_get_regenerated_mirror(oop orig_java_mirror);
static void record_regenerated_objects();
// Handy functions to avoid type casts
template static bool has_been_regenerated(T orig_obj) {
return has_been_regenerated((address)orig_obj);
}
- template static T get_regenerated_object(T orig_obj) {
- return (T)get_regenerated_object((address)orig_obj);
+ template static T maybe_get_regenerated_object(T orig_obj) {
+ return (T)maybe_get_regenerated_object((address)orig_obj);
}
static bool is_regenerated_object(address regen_obj);
diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
index 92bacc4c2c3..50c7bc59ccf 100644
--- a/src/hotspot/share/ci/ciEnv.cpp
+++ b/src/hotspot/share/ci/ciEnv.cpp
@@ -1069,16 +1069,13 @@ void ciEnv::register_method(ciMethod* target,
debug_info(), dependencies(), code_buffer,
frame_words, oop_map_set,
handler_table, inc_table,
- compiler, CompLevel(task()->comp_level()));
+ compiler, CompLevel(task()->comp_level()),
+ nmethod::Flags(has_unsafe_access, has_wide_vectors, has_monitors, has_scoped_access));
// Free codeBlobs
code_buffer->free_blob();
if (nm != nullptr) {
- nm->set_has_unsafe_access(has_unsafe_access);
- nm->set_has_wide_vectors(has_wide_vectors);
- nm->set_has_monitors(has_monitors);
- nm->set_has_scoped_access(has_scoped_access);
assert(!method->is_synchronized() || nm->has_monitors(), "");
if (entry_bci == InvocationEntryBci) {
diff --git a/src/hotspot/share/ci/ciEnv.hpp b/src/hotspot/share/ci/ciEnv.hpp
index b384ff47a89..8167697e84b 100644
--- a/src/hotspot/share/ci/ciEnv.hpp
+++ b/src/hotspot/share/ci/ciEnv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -236,6 +236,9 @@ private:
ciInstanceKlass* declared_holder = get_instance_klass_for_declared_method_holder(holder);
return _factory->get_unloaded_method(declared_holder, name, signature, accessor);
}
+ InstanceKlass::ClassState get_cached_init_state(uint id) {
+ return (InstanceKlass::ClassState)_factory->cached_init_state(id);
+ }
// Get a ciKlass representing an unloaded klass.
// Ensures uniqueness of the result.
diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp
index 6243258acd9..9a2a6dcd8f2 100644
--- a/src/hotspot/share/ci/ciInstanceKlass.cpp
+++ b/src/hotspot/share/ci/ciInstanceKlass.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -136,12 +136,14 @@ ciInstanceKlass::ciInstanceKlass(ciSymbol* name,
// ------------------------------------------------------------------
-// ciInstanceKlass::compute_shared_is_initialized
-void ciInstanceKlass::compute_shared_init_state() {
- GUARDED_VM_ENTRY(
- InstanceKlass* ik = get_instanceKlass();
- _init_state = ik->init_state();
- )
+InstanceKlass::ClassState ciInstanceKlass::compute_init_state() {
+ if (_is_shared && is_loaded()) {
+ // Return cached init state of shared klass
+ ciEnv* env = CURRENT_ENV;
+ assert(env->task() != nullptr, "only calls from compilation are expected here");
+ return env->get_cached_init_state(ident());
+ }
+ return _init_state;
}
// ------------------------------------------------------------------
@@ -319,11 +321,11 @@ void ciInstanceKlass::print_impl(outputStream* st) {
bool_to_str(has_subklass()),
layout_helper());
- _flags.print_klass_flags();
+ _flags.print_klass_flags(st);
if (_super) {
st->print(" super=");
- _super->print_name();
+ _super->print_name_on(st);
}
if (_java_mirror) {
st->print(" mirror=PRESENT");
@@ -432,6 +434,55 @@ ciField* ciInstanceKlass::get_field_by_name(ciSymbol* name, ciSymbol* signature,
return field;
}
+#ifdef ASSERT
+static void assert_injected_field(InternalFieldStream& fs) {
+ assert(!fs.done(), "invarinat");
+ fieldDescriptor fd = fs.field_descriptor();
+ assert(fd.is_injected(), "invariant");
+}
+#endif
+
+// ------------------------------------------------------------------
+// ciInstanceKlass::get_injected_instance_field_by_name
+//
+// Implements also compute_injected_fields().
+//
+ciField* ciInstanceKlass::get_injected_instance_field_by_name(ciSymbol* name, ciSymbol* signature) {
+ VM_ENTRY_MARK;
+ InstanceKlass* const k = get_instanceKlass();
+ const Symbol* const name_symbol = name->get_symbol();
+ assert(name_symbol != nullptr, "invariant");
+ const Symbol* const sig_sym = signature->get_symbol();
+ assert(sig_sym != nullptr, "invariant");
+
+ if (_has_injected_fields == -1) {
+ if (super() != nullptr && super()->has_injected_fields()) {
+ _has_injected_fields = 1;
+ }
+ }
+
+ ciField* injected = nullptr;
+ for (InternalFieldStream fs(k); !fs.done(); fs.next()) {
+ if (fs.access_flags().is_static()) continue;
+ DEBUG_ONLY(assert_injected_field(fs);)
+ if (_has_injected_fields == -1) {
+ _has_injected_fields = 1;
+ }
+ if (fs.name() == name_symbol && fs.signature() == sig_sym) {
+ fieldDescriptor fd = fs.field_descriptor();
+ assert(fd.is_injected(), "invariant");
+ injected = new (CURRENT_THREAD_ENV->arena()) ciField(&fd);
+ break;
+ }
+ }
+
+ if (_has_injected_fields == -1) {
+ _has_injected_fields = 0;
+ }
+
+ return injected;
+}
+
// This is essentially a shortcut for:
// get_field_by_offset(field_offset, is_static)->layout_type()
// except this does not require allocating memory for a new ciField
diff --git a/src/hotspot/share/ci/ciInstanceKlass.hpp b/src/hotspot/share/ci/ciInstanceKlass.hpp
index a84c63981c9..6e696668638 100644
--- a/src/hotspot/share/ci/ciInstanceKlass.hpp
+++ b/src/hotspot/share/ci/ciInstanceKlass.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -106,43 +106,36 @@ protected:
bool is_shared() { return _is_shared; }
- void compute_shared_init_state();
+ InstanceKlass::ClassState compute_init_state();
bool compute_shared_has_subklass();
int compute_nonstatic_fields();
GrowableArray* compute_nonstatic_fields_impl(GrowableArray* super_fields);
bool compute_has_trusted_loader();
- // Update the init_state for shared klasses
- void update_if_shared(InstanceKlass::ClassState expected) {
- if (_is_shared && _init_state != expected) {
- if (is_loaded()) compute_shared_init_state();
- }
- }
-
public:
// Has this klass been initialized?
bool is_initialized() {
- update_if_shared(InstanceKlass::fully_initialized);
- return _init_state == InstanceKlass::fully_initialized;
+ InstanceKlass::ClassState state = compute_init_state();
+ return state == InstanceKlass::fully_initialized;
}
bool is_not_initialized() {
- update_if_shared(InstanceKlass::fully_initialized);
- return _init_state < InstanceKlass::being_initialized;
+ InstanceKlass::ClassState state = compute_init_state();
+ return state < InstanceKlass::being_initialized;
}
// Is this klass being initialized?
bool is_being_initialized() {
- update_if_shared(InstanceKlass::being_initialized);
- return _init_state == InstanceKlass::being_initialized;
+ InstanceKlass::ClassState state = compute_init_state();
+ return state == InstanceKlass::being_initialized;
}
// Has this klass been linked?
bool is_linked() {
- update_if_shared(InstanceKlass::linked);
- return _init_state >= InstanceKlass::linked;
+ InstanceKlass::ClassState state = compute_init_state();
+ return state >= InstanceKlass::linked;
}
// Is this klass in error state?
bool is_in_error_state() {
- update_if_shared(InstanceKlass::initialization_error);
- return _init_state == InstanceKlass::initialization_error;
+ InstanceKlass::ClassState state = compute_init_state();
+ return state == InstanceKlass::initialization_error;
}
// General klass information.
@@ -215,6 +208,7 @@ public:
ciInstanceKlass* get_canonical_holder(int offset);
ciField* get_field_by_offset(int field_offset, bool is_static);
ciField* get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static);
+ ciField* get_injected_instance_field_by_name(ciSymbol* name, ciSymbol* signature);
BasicType get_field_type_by_offset(int field_offset, bool is_static);
// total number of nonstatic fields (including inherited):
diff --git a/src/hotspot/share/ci/ciObjectFactory.cpp b/src/hotspot/share/ci/ciObjectFactory.cpp
index 2af5d812922..d3bef01f852 100644
--- a/src/hotspot/share/ci/ciObjectFactory.cpp
+++ b/src/hotspot/share/ci/ciObjectFactory.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,7 @@
#include "gc/shared/collectedHeap.inline.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/universe.hpp"
+#include "oops/instanceKlass.hpp"
#include "oops/oop.inline.hpp"
#include "oops/trainingData.hpp"
#include "runtime/handles.inline.hpp"
@@ -83,6 +84,7 @@ ciObjectFactory::ciObjectFactory(Arena* arena,
int expected_size)
: _arena(arena),
_ci_metadata(arena, expected_size, 0, nullptr),
+ _cached_init_state(arena, _shared_ident_limit, 0, (u1)0),
_unloaded_methods(arena, 4, 0, nullptr),
_unloaded_klasses(arena, 8, 0, nullptr),
_unloaded_instances(arena, 4, 0, nullptr),
@@ -97,6 +99,28 @@ ciObjectFactory::ciObjectFactory(Arena* arena,
// If the shared ci objects exist append them to this factory's objects
if (_shared_ci_metadata != nullptr) {
_ci_metadata.appendAll(_shared_ci_metadata);
+ // ciInstanceKlass for well-known class is shared by all
+ // compiler threads and can be updated concurrently by
+ // other compiler threads during compilation.
+ // Make local copy of class state to avoid state change
+ // during compilation.
+ int len = _ci_metadata.length();
+ for (int i = 0; i < len; i++) {
+ ciMetadata* obj = _ci_metadata.at(i);
+ if (obj->is_loaded() && obj->is_instance_klass()) {
+ ciInstanceKlass* cik = obj->as_instance_klass();
+ precond(cik->is_shared());
+ InstanceKlass::ClassState current_state = cik->_init_state;
+ InstanceKlass::ClassState state = InstanceKlass::fully_initialized;
+ if (current_state != state) {
+ GUARDED_VM_ENTRY( state = cik->get_instanceKlass()->init_state(); )
+ // Update state of shared ciInstanceKlass
+ cik->_init_state = state;
+ }
+ // Cache state for current compilation
+ _cached_init_state.at_put_grow(cik->ident(), (u1)state, 0);
+ }
+ }
}
}
diff --git a/src/hotspot/share/ci/ciObjectFactory.hpp b/src/hotspot/share/ci/ciObjectFactory.hpp
index fd7ca6bb801..c578aecb564 100644
--- a/src/hotspot/share/ci/ciObjectFactory.hpp
+++ b/src/hotspot/share/ci/ciObjectFactory.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@ private:
Arena* _arena;
GrowableArray _ci_metadata;
+ // Local copy of shared ciInstanceKlass init state for current compilation
+ GrowableArray _cached_init_state;
GrowableArray _unloaded_methods;
GrowableArray _unloaded_klasses;
GrowableArray _unloaded_instances;
@@ -103,6 +105,11 @@ public:
ciMetadata* cached_metadata(Metadata* key);
ciSymbol* get_symbol(Symbol* key);
+ // Get cached init state of shared ciInstanceKlass
+ u1 cached_init_state(uint id) {
+ return _cached_init_state.at(id);
+ }
+
// Get the ciSymbol corresponding to one of the vmSymbols.
static ciSymbol* vm_symbol_at(vmSymbolID index);
diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp
index 6d25e460688..221875e29d0 100644
--- a/src/hotspot/share/classfile/classLoader.cpp
+++ b/src/hotspot/share/classfile/classLoader.cpp
@@ -96,9 +96,18 @@ static JImageClose_t JImageClose = nullptr;
static JImageFindResource_t JImageFindResource = nullptr;
static JImageGetResource_t JImageGetResource = nullptr;
-// JimageFile pointer, or null if exploded JDK build.
+// JImageFile pointer, or null if exploded JDK build.
static JImageFile* JImage_file = nullptr;
+// PreviewMode status to control preview behaviour. JImage_file is unusable
+// for normal lookup until (Preview_mode != PREVIEW_MODE_UNINITIALIZED).
+enum PreviewMode {
+ PREVIEW_MODE_UNINITIALIZED = 0,
+ PREVIEW_MODE_DEFAULT = 1,
+ PREVIEW_MODE_ENABLE_PREVIEW = 2
+};
+static PreviewMode Preview_mode = PREVIEW_MODE_UNINITIALIZED;
+
// Globals
PerfCounter* ClassLoader::_perf_accumulated_time = nullptr;
@@ -154,7 +163,7 @@ void ClassLoader::print_counters(outputStream *st) {
GrowableArray* ClassLoader::_patch_mod_entries = nullptr;
GrowableArray* ClassLoader::_exploded_entries = nullptr;
-ClassPathEntry* ClassLoader::_jrt_entry = nullptr;
+ClassPathImageEntry* ClassLoader::_jrt_entry = nullptr;
ClassPathEntry* volatile ClassLoader::_first_append_entry_list = nullptr;
ClassPathEntry* volatile ClassLoader::_last_append_entry = nullptr;
@@ -171,15 +180,6 @@ static bool string_starts_with(const char* str, const char* str_to_find) {
}
#endif
-static const char* get_jimage_version_string() {
- static char version_string[10] = "";
- if (version_string[0] == '\0') {
- jio_snprintf(version_string, sizeof(version_string), "%d.%d",
- VM_Version::vm_major_version(), VM_Version::vm_minor_version());
- }
- return (const char*)version_string;
-}
-
bool ClassLoader::string_ends_with(const char* str, const char* str_to_find) {
size_t str_len = strlen(str);
size_t str_to_find_len = strlen(str_to_find);
@@ -234,6 +234,69 @@ Symbol* ClassLoader::package_from_class_name(const Symbol* name, bool* bad_class
return SymbolTable::new_symbol(name, pointer_delta_as_int(start, base), pointer_delta_as_int(end, base));
}
+// --------------------------------
+// The following jimage_xxx static functions encapsulate all JImage_file and Preview_mode access.
+// This is done to make it easy to reason about the JImage file state (exists vs initialized etc.).
+
+// Opens the named JImage file and sets the JImage file reference.
+// Returns true if opening the JImage file was successful (see also jimage_is_open()).
+static bool jimage_open(const char* modules_path) {
+ // Currently 'error' is not set to anything useful, so ignore it here.
+ jint error;
+ JImage_file = (*JImageOpen)(modules_path, &error);
+ if (Arguments::has_jimage() && JImage_file == nullptr) {
+ // The modules file exists but is unreadable or corrupt
+ vm_exit_during_initialization(err_msg("Unable to load %s", modules_path));
+ }
+ return JImage_file != nullptr;
+}
+
+// Closes and clears the JImage file reference (this will only be called during shutdown).
+static void jimage_close() {
+ if (JImage_file != nullptr) {
+ (*JImageClose)(JImage_file);
+ JImage_file = nullptr;
+ }
+}
+
+// Returns whether a JImage file was opened (but NOT whether it was initialized yet).
+static bool jimage_is_open() {
+ return JImage_file != nullptr;
+}
+
+// Returns the JImage file reference (which may or may not be initialized).
+static JImageFile* jimage_non_null() {
+ assert(jimage_is_open(), "should have been opened by ClassLoader::lookup_vm_options "
+ "and remains open throughout normal JVM lifetime");
+ return JImage_file;
+}
+
+// Returns true if jimage_init() has been called. Once the JImage file is initialized,
+// jimage_is_preview_enabled() can be called to correctly determine the access mode.
+static bool jimage_is_initialized() {
+ return jimage_is_open() && Preview_mode != PREVIEW_MODE_UNINITIALIZED;
+}
+
+// Returns the access mode for an initialized JImage file (reflects --enable-preview).
+static bool is_preview_enabled() {
+ return Preview_mode == PREVIEW_MODE_ENABLE_PREVIEW;
+}
+
+// Looks up the location of a named JImage resource. This "raw" lookup function allows
+// the preview mode to be manually specified, so must not be accessible outside this
+// class. ClassPathImageEntry manages all calls for resources after startup is complete.
+static JImageLocationRef jimage_find_resource(const char* module_name,
+ const char* file_name,
+ bool is_preview,
+ jlong* size) {
+ return ((*JImageFindResource)(jimage_non_null(),
+ module_name,
+ file_name,
+ is_preview,
+ size));
+}
+// --------------------------------
+
// Given a fully qualified package name, find its defining package in the class loader's
// package entry table.
PackageEntry* ClassLoader::get_package_entry(Symbol* pkg_name, ClassLoaderData* loader_data) {
@@ -372,28 +435,15 @@ ClassFileStream* ClassPathZipEntry::open_stream(JavaThread* current, const char*
DEBUG_ONLY(ClassPathImageEntry* ClassPathImageEntry::_singleton = nullptr;)
-JImageFile* ClassPathImageEntry::jimage() const {
- return JImage_file;
-}
-
-JImageFile* ClassPathImageEntry::jimage_non_null() const {
- assert(ClassLoader::has_jrt_entry(), "must be");
- assert(jimage() != nullptr, "should have been opened by ClassLoader::lookup_vm_options "
- "and remained throughout normal JVM lifetime");
- return jimage();
-}
-
void ClassPathImageEntry::close_jimage() {
- if (jimage() != nullptr) {
- (*JImageClose)(jimage());
- JImage_file = nullptr;
- }
+ jimage_close();
}
-ClassPathImageEntry::ClassPathImageEntry(JImageFile* jimage, const char* name) :
+ClassPathImageEntry::ClassPathImageEntry(const char* name) :
ClassPathEntry() {
- guarantee(jimage != nullptr, "jimage file is null");
+ guarantee(jimage_is_initialized(), "jimage is not initialized");
guarantee(name != nullptr, "jimage file name is null");
+
assert(_singleton == nullptr, "VM supports only one jimage");
DEBUG_ONLY(_singleton = this);
size_t len = strlen(name) + 1;
@@ -412,6 +462,8 @@ ClassFileStream* ClassPathImageEntry::open_stream(JavaThread* current, const cha
// 2. A package is in at most one module in the jimage file.
//
ClassFileStream* ClassPathImageEntry::open_stream_for_loader(JavaThread* current, const char* name, ClassLoaderData* loader_data) {
+ const bool is_preview = is_preview_enabled();
+
jlong size;
JImageLocationRef location = 0;
@@ -420,7 +472,7 @@ ClassFileStream* ClassPathImageEntry::open_stream_for_loader(JavaThread* current
if (pkg_name != nullptr) {
if (!Universe::is_module_initialized()) {
- location = (*JImageFindResource)(jimage_non_null(), JAVA_BASE_NAME, get_jimage_version_string(), name, &size);
+ location = jimage_find_resource(JAVA_BASE_NAME, name, is_preview, &size);
} else {
PackageEntry* package_entry = ClassLoader::get_package_entry(pkg_name, loader_data);
if (package_entry != nullptr) {
@@ -431,7 +483,7 @@ ClassFileStream* ClassPathImageEntry::open_stream_for_loader(JavaThread* current
assert(module->is_named(), "Boot classLoader package is in unnamed module");
const char* module_name = module->name()->as_C_string();
if (module_name != nullptr) {
- location = (*JImageFindResource)(jimage_non_null(), module_name, get_jimage_version_string(), name, &size);
+ location = jimage_find_resource(module_name, name, is_preview, &size);
}
}
}
@@ -444,7 +496,7 @@ ClassFileStream* ClassPathImageEntry::open_stream_for_loader(JavaThread* current
char* data = NEW_RESOURCE_ARRAY(char, size);
(*JImageGetResource)(jimage_non_null(), location, data, size);
// Resource allocated
- assert(this == (ClassPathImageEntry*)ClassLoader::get_jrt_entry(), "must be");
+ assert(this == ClassLoader::get_jrt_entry(), "must be");
return new ClassFileStream((u1*)data,
checked_cast(size),
_name,
@@ -454,16 +506,9 @@ ClassFileStream* ClassPathImageEntry::open_stream_for_loader(JavaThread* current
return nullptr;
}
-JImageLocationRef ClassLoader::jimage_find_resource(JImageFile* jf,
- const char* module_name,
- const char* file_name,
- jlong &size) {
- return ((*JImageFindResource)(jf, module_name, get_jimage_version_string(), file_name, &size));
-}
-
bool ClassPathImageEntry::is_modules_image() const {
assert(this == _singleton, "VM supports a single jimage");
- assert(this == (ClassPathImageEntry*)ClassLoader::get_jrt_entry(), "must be used for jrt entry");
+ assert(this == ClassLoader::get_jrt_entry(), "must be used for jrt entry");
return true;
}
@@ -618,14 +663,15 @@ void ClassLoader::setup_bootstrap_search_path_impl(JavaThread* current, const ch
struct stat st;
if (os::stat(path, &st) == 0) {
// Directory found
- if (JImage_file != nullptr) {
+ if (jimage_is_open()) {
assert(Arguments::has_jimage(), "sanity check");
const char* canonical_path = get_canonical_path(path, current);
assert(canonical_path != nullptr, "canonical_path issue");
- _jrt_entry = new ClassPathImageEntry(JImage_file, canonical_path);
+ // Hand over lifecycle control of the JImage file to the _jrt_entry singleton
+ // (see ClassPathImageEntry::close_jimage). The image must be initialized by now.
+ _jrt_entry = new ClassPathImageEntry(canonical_path);
assert(_jrt_entry != nullptr && _jrt_entry->is_modules_image(), "No java runtime image present");
- assert(_jrt_entry->jimage() != nullptr, "No java runtime image");
} // else it's an exploded build.
} else {
// If path does not exist, exit
@@ -645,7 +691,7 @@ void ClassLoader::setup_bootstrap_search_path_impl(JavaThread* current, const ch
static const char* get_exploded_module_path(const char* module_name, bool c_heap) {
const char *home = Arguments::get_java_home();
const char file_sep = os::file_separator()[0];
- // 10 represents the length of "modules" + 2 file separators + \0
+ // 10 represents the length of "modules" (7) + 2 file separators + \0
size_t len = strlen(home) + strlen(module_name) + 10;
char *path = c_heap ? NEW_C_HEAP_ARRAY(char, len, mtModule) : NEW_RESOURCE_ARRAY(char, len);
jio_snprintf(path, len, "%s%cmodules%c%s", home, file_sep, file_sep, module_name);
@@ -1398,20 +1444,8 @@ void ClassLoader::initialize(TRAPS) {
setup_bootstrap_search_path(THREAD);
}
-static char* lookup_vm_resource(JImageFile *jimage, const char *jimage_version, const char *path) {
- jlong size;
- JImageLocationRef location = (*JImageFindResource)(jimage, "java.base", jimage_version, path, &size);
- if (location == 0)
- return nullptr;
- char *val = NEW_C_HEAP_ARRAY(char, size+1, mtClass);
- (*JImageGetResource)(jimage, location, val, size);
- val[size] = '\0';
- return val;
-}
-
// Lookup VM options embedded in the modules jimage file
char* ClassLoader::lookup_vm_options() {
- jint error;
char modules_path[JVM_MAXPATHLEN];
const char* fileSep = os::file_separator();
@@ -1419,32 +1453,27 @@ char* ClassLoader::lookup_vm_options() {
load_jimage_library();
jio_snprintf(modules_path, JVM_MAXPATHLEN, "%s%slib%smodules", Arguments::get_java_home(), fileSep, fileSep);
- JImage_file =(*JImageOpen)(modules_path, &error);
- if (JImage_file == nullptr) {
- if (Arguments::has_jimage()) {
- // The modules file exists but is unreadable or corrupt
- vm_exit_during_initialization(err_msg("Unable to load %s", modules_path));
+ if (jimage_open(modules_path)) {
+ // Special case where we lookup the options string *before* set_preview_mode() is called.
+ // Since VM arguments have not been parsed, and the ClassPathImageEntry singleton
+ // has not been created yet, we access the JImage file directly in non-preview mode.
+ jlong size;
+ JImageLocationRef location =
+ jimage_find_resource(JAVA_BASE_NAME, "jdk/internal/vm/options", /* is_preview */ false, &size);
+ if (location != 0) {
+ char* options = NEW_C_HEAP_ARRAY(char, size+1, mtClass);
+ (*JImageGetResource)(jimage_non_null(), location, options, size);
+ options[size] = '\0';
+ return options;
}
- return nullptr;
}
-
- const char *jimage_version = get_jimage_version_string();
- char *options = lookup_vm_resource(JImage_file, jimage_version, "jdk/internal/vm/options");
- return options;
+ return nullptr;
}
-bool ClassLoader::is_module_observable(const char* module_name) {
- assert(JImageOpen != nullptr, "jimage library should have been opened");
- if (JImage_file == nullptr) {
- struct stat st;
- const char *path = get_exploded_module_path(module_name, true);
- bool res = os::stat(path, &st) == 0;
- FREE_C_HEAP_ARRAY(path);
- return res;
- }
- jlong size;
- const char *jimage_version = get_jimage_version_string();
- return (*JImageFindResource)(JImage_file, module_name, jimage_version, "module-info.class", &size) != 0;
+// Finishes initializing the JImageFile (if present) by setting the access mode.
+void ClassLoader::set_preview_mode(bool enable_preview) {
+ assert(Preview_mode == PREVIEW_MODE_UNINITIALIZED, "set_preview_mode must not be called twice");
+ Preview_mode = enable_preview ? PREVIEW_MODE_ENABLE_PREVIEW : PREVIEW_MODE_DEFAULT;
}
jlong ClassLoader::classloader_time_ms() {
diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp
index a935d3027ac..b750a2daf68 100644
--- a/src/hotspot/share/classfile/classLoader.hpp
+++ b/src/hotspot/share/classfile/classLoader.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -99,7 +99,8 @@ class ClassPathZipEntry: public ClassPathEntry {
};
-// For java image files
+// A singleton path entry which takes ownership of the initialized JImageFile
+// reference. Not used for exploded builds.
class ClassPathImageEntry: public ClassPathEntry {
private:
const char* _name;
@@ -107,11 +108,12 @@ private:
public:
bool is_modules_image() const;
const char* name() const { return _name == nullptr ? "" : _name; }
- JImageFile* jimage() const;
- JImageFile* jimage_non_null() const;
+ // Called to close the JImage during os::abort (normally not called).
void close_jimage();
- ClassPathImageEntry(JImageFile* jimage, const char* name);
+ // Takes effective ownership of the static JImageFile pointer.
+ ClassPathImageEntry(const char* name);
virtual ~ClassPathImageEntry() { ShouldNotReachHere(); }
+
ClassFileStream* open_stream(JavaThread* current, const char* name);
ClassFileStream* open_stream_for_loader(JavaThread* current, const char* name, ClassLoaderData* loader_data);
};
@@ -201,10 +203,10 @@ class ClassLoader: AllStatic {
static GrowableArray* _patch_mod_entries;
// 2. the base piece
- // Contains the ClassPathEntry of the modular java runtime image.
+ // Contains the ClassPathImageEntry of the modular java runtime image.
// If no java runtime image is present, this indicates a
// build with exploded modules is being used instead.
- static ClassPathEntry* _jrt_entry;
+ static ClassPathImageEntry* _jrt_entry;
static GrowableArray* _exploded_entries;
enum { EXPLODED_ENTRY_SIZE = 80 }; // Initial number of exploded modules
@@ -354,14 +356,15 @@ class ClassLoader: AllStatic {
static void append_boot_classpath(ClassPathEntry* new_entry);
#endif
+ // Retrieves additional VM options prior to flags processing. Options held
+ // in the JImage file are retrieved without fully initializing it. (this is
+ // the only JImage lookup which can succeed before init_jimage() is called).
static char* lookup_vm_options();
- // Determines if the named module is present in the
- // modules jimage file or in the exploded modules directory.
- static bool is_module_observable(const char* module_name);
-
- static JImageLocationRef jimage_find_resource(JImageFile* jf, const char* module_name,
- const char* file_name, jlong &size);
+ // Called once, after all flags are processed, to finish initializing the
+ // JImage file. Until this is called, jimage_find_resource(), and any other
+ // JImage resource lookups or access will fail.
+ static void set_preview_mode(bool enable_preview);
static void trace_class_path(const char* msg, const char* name = nullptr);
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index ef1eeec14dd..c659018dacb 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -89,9 +89,6 @@
#include "utilities/growableArray.hpp"
#include "utilities/preserveException.hpp"
#include "utilities/utf8.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciJavaClasses.hpp"
-#endif
#define DECLARE_INJECTED_FIELD(klass, name, signature, may_be_java) \
{ VM_CLASS_ID(klass), VM_SYMBOL_ENUM_NAME(name##_name), VM_SYMBOL_ENUM_NAME(signature), may_be_java },
@@ -3154,23 +3151,6 @@ void java_lang_StackTraceElement::decode_file_and_line(Handle java_class,
line_number = Backtrace::get_line_number(method(), bci);
}
-#if INCLUDE_JVMCI
-void java_lang_StackTraceElement::decode(const methodHandle& method, int bci,
- Symbol*& filename, int& line_number, TRAPS) {
- ResourceMark rm(THREAD);
- HandleMark hm(THREAD);
-
- filename = nullptr;
- line_number = -1;
-
- oop source_file;
- int version = method->constants()->version();
- InstanceKlass* holder = method->method_holder();
- Handle java_class(THREAD, holder->java_mirror());
- decode_file_and_line(java_class, holder, version, method, bci, filename, source_file, line_number, CHECK);
-}
-#endif // INCLUDE_JVMCI
-
// java_lang_ClassFrameInfo
int java_lang_ClassFrameInfo::_classOrMemberName_offset;
@@ -3588,6 +3568,7 @@ int java_lang_reflect_Field::_modifiers_offset;
int java_lang_reflect_Field::_trusted_final_offset;
int java_lang_reflect_Field::_signature_offset;
int java_lang_reflect_Field::_annotations_offset;
+JFR_ONLY(int java_lang_reflect_Field::_jfr_epoch_offset;)
#define FIELD_FIELDS_DO(macro) \
macro(_clazz_offset, k, vmSymbols::clazz_name(), class_signature, false); \
@@ -3602,11 +3583,13 @@ int java_lang_reflect_Field::_annotations_offset;
void java_lang_reflect_Field::compute_offsets() {
InstanceKlass* k = vmClasses::reflect_Field_klass();
FIELD_FIELDS_DO(FIELD_COMPUTE_OFFSET);
+ JFR_ONLY(FIELD_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);)
}
#if INCLUDE_CDS
void java_lang_reflect_Field::serialize_offsets(SerializeClosure* f) {
FIELD_FIELDS_DO(FIELD_SERIALIZE_OFFSET);
+ JFR_ONLY(FIELD_INJECTED_FIELDS(INJECTED_FIELD_SERIALIZE_OFFSET);)
}
#endif
@@ -3672,6 +3655,12 @@ void java_lang_reflect_Field::set_annotations(oop field, oop value) {
field->obj_field_put(_annotations_offset, value);
}
+#if INCLUDE_JFR
+u2 java_lang_reflect_Field::epoch(oop ref) {
+ return static_cast(ref->int_field(_jfr_epoch_offset));
+}
+#endif // INCLUDE_JFR
+
oop java_lang_reflect_RecordComponent::create(InstanceKlass* holder, RecordComponent* component, TRAPS) {
// Allocate java.lang.reflect.RecordComponent instance
HandleMark hm(THREAD);
@@ -5102,12 +5091,6 @@ void java_lang_Integer_IntegerCache::serialize_offsets(SerializeClosure* f) {
#endif
#undef INTEGER_CACHE_FIELDS_DO
-jint java_lang_Integer::value(oop obj) {
- jvalue v;
- java_lang_boxing_object::get_value(obj, &v);
- return v.i;
-}
-
#define LONG_CACHE_FIELDS_DO(macro) \
macro(_static_cache_offset, k, "cache", java_lang_Long_array_signature, true)
@@ -5132,12 +5115,6 @@ void java_lang_Long_LongCache::serialize_offsets(SerializeClosure* f) {
#endif
#undef LONG_CACHE_FIELDS_DO
-jlong java_lang_Long::value(oop obj) {
- jvalue v;
- java_lang_boxing_object::get_value(obj, &v);
- return v.j;
-}
-
#define CHARACTER_CACHE_FIELDS_DO(macro) \
macro(_static_cache_offset, k, "cache", java_lang_Character_array_signature, true)
@@ -5162,12 +5139,6 @@ void java_lang_Character_CharacterCache::serialize_offsets(SerializeClosure* f)
#endif
#undef CHARACTER_CACHE_FIELDS_DO
-jchar java_lang_Character::value(oop obj) {
- jvalue v;
- java_lang_boxing_object::get_value(obj, &v);
- return v.c;
-}
-
#define SHORT_CACHE_FIELDS_DO(macro) \
macro(_static_cache_offset, k, "cache", java_lang_Short_array_signature, true)
@@ -5192,12 +5163,6 @@ void java_lang_Short_ShortCache::serialize_offsets(SerializeClosure* f) {
#endif
#undef SHORT_CACHE_FIELDS_DO
-jshort java_lang_Short::value(oop obj) {
- jvalue v;
- java_lang_boxing_object::get_value(obj, &v);
- return v.s;
-}
-
#define BYTE_CACHE_FIELDS_DO(macro) \
macro(_static_cache_offset, k, "cache", java_lang_Byte_array_signature, true)
@@ -5222,12 +5187,6 @@ void java_lang_Byte_ByteCache::serialize_offsets(SerializeClosure* f) {
#endif
#undef BYTE_CACHE_FIELDS_DO
-jbyte java_lang_Byte::value(oop obj) {
- jvalue v;
- java_lang_boxing_object::get_value(obj, &v);
- return v.b;
-}
-
int java_lang_Boolean::_static_TRUE_offset;
int java_lang_Boolean::_static_FALSE_offset;
@@ -5241,16 +5200,6 @@ void java_lang_Boolean::compute_offsets(InstanceKlass *k) {
BOOLEAN_FIELDS_DO(FIELD_COMPUTE_OFFSET);
}
-oop java_lang_Boolean::get_TRUE(InstanceKlass *ik) {
- oop base = ik->static_field_base_raw();
- return base->obj_field(_static_TRUE_offset);
-}
-
-oop java_lang_Boolean::get_FALSE(InstanceKlass *ik) {
- oop base = ik->static_field_base_raw();
- return base->obj_field(_static_FALSE_offset);
-}
-
Symbol* java_lang_Boolean::symbol() {
return vmSymbols::java_lang_Boolean();
}
@@ -5262,12 +5211,6 @@ void java_lang_Boolean::serialize_offsets(SerializeClosure* f) {
#endif
#undef BOOLEAN_CACHE_FIELDS_DO
-jboolean java_lang_Boolean::value(oop obj) {
- jvalue v;
- java_lang_boxing_object::get_value(obj, &v);
- return v.z;
-}
-
// java_lang_reflect_RecordComponent
int java_lang_reflect_RecordComponent::_clazz_offset;
diff --git a/src/hotspot/share/classfile/javaClasses.hpp b/src/hotspot/share/classfile/javaClasses.hpp
index 3276d398faf..04673e904de 100644
--- a/src/hotspot/share/classfile/javaClasses.hpp
+++ b/src/hotspot/share/classfile/javaClasses.hpp
@@ -132,6 +132,7 @@ class java_lang_String : AllStatic {
static inline bool is_latin1(oop java_string);
static inline bool deduplication_forbidden(oop java_string);
static inline bool deduplication_requested(oop java_string);
+ static inline bool deduplication_requested_or_forbidden(oop java_string);
static inline int length(oop java_string);
static inline int length(oop java_string, typeArrayOop string_value);
static size_t utf8_length(oop java_string);
@@ -233,7 +234,6 @@ class java_lang_String : AllStatic {
class java_lang_Class : AllStatic {
friend class VMStructs;
- friend class JVMCIVMStructs;
friend class HeapShared;
private:
@@ -381,7 +381,6 @@ class java_lang_Class : AllStatic {
class java_lang_Thread : AllStatic {
friend class java_lang_VirtualThread;
- friend class JVMCIVMStructs;
private:
// Note that for this class the layout changed between JDK1.2 and JDK1.3,
// so we compute the offsets at startup rather than hard-wiring them.
@@ -819,6 +818,10 @@ class java_lang_reflect_Constructor : public java_lang_reflect_AccessibleObject
friend class JavaClasses;
};
+#if INCLUDE_JFR
+#define FIELD_INJECTED_FIELDS(macro) \
+ macro(java_lang_reflect_Field, jfr_epoch, int_signature, false)
+#endif // INCLUDE_JFR
// Interface to java.lang.reflect.Field objects
@@ -834,6 +837,7 @@ class java_lang_reflect_Field : public java_lang_reflect_AccessibleObject {
static int _trusted_final_offset;
static int _signature_offset;
static int _annotations_offset;
+ JFR_ONLY(static int _jfr_epoch_offset;)
static void compute_offsets();
@@ -864,6 +868,9 @@ class java_lang_reflect_Field : public java_lang_reflect_AccessibleObject {
static void set_signature(oop constructor, oop value);
static void set_annotations(oop constructor, oop value);
+ JFR_ONLY(static u2 epoch(oop field);)
+ JFR_ONLY(static int epoch_offset() { CHECK_INIT(_jfr_epoch_offset); })
+
// Debugging
friend class JavaClasses;
};
@@ -1573,10 +1580,6 @@ class java_lang_StackTraceElement: AllStatic {
static void compute_offsets();
static void serialize_offsets(SerializeClosure* f) NOT_CDS_RETURN;
-#if INCLUDE_JVMCI
- static void decode(const methodHandle& method, int bci, Symbol*& fileName, int& lineNumber, TRAPS);
-#endif
-
// Debugging
friend class JavaClasses;
};
@@ -1770,31 +1773,6 @@ class vector_VectorPayload : AllStatic {
static bool is_instance(oop obj);
};
-class java_lang_Integer : AllStatic {
-public:
- static jint value(oop obj);
-};
-
-class java_lang_Long : AllStatic {
-public:
- static jlong value(oop obj);
-};
-
-class java_lang_Character : AllStatic {
-public:
- static jchar value(oop obj);
-};
-
-class java_lang_Short : AllStatic {
-public:
- static jshort value(oop obj);
-};
-
-class java_lang_Byte : AllStatic {
-public:
- static jbyte value(oop obj);
-};
-
class java_lang_Boolean : AllStatic {
private:
static int _static_TRUE_offset;
@@ -1802,10 +1780,7 @@ class java_lang_Boolean : AllStatic {
public:
static Symbol* symbol();
static void compute_offsets(InstanceKlass* k);
- static oop get_TRUE(InstanceKlass *k);
- static oop get_FALSE(InstanceKlass *k);
static void serialize_offsets(SerializeClosure* f) NOT_CDS_RETURN;
- static jboolean value(oop obj);
};
class java_lang_Integer_IntegerCache : AllStatic {
diff --git a/src/hotspot/share/classfile/javaClasses.inline.hpp b/src/hotspot/share/classfile/javaClasses.inline.hpp
index 21ad62f8408..7843126c870 100644
--- a/src/hotspot/share/classfile/javaClasses.inline.hpp
+++ b/src/hotspot/share/classfile/javaClasses.inline.hpp
@@ -91,6 +91,10 @@ bool java_lang_String::deduplication_requested(oop java_string) {
return is_flag_set(java_string, _deduplication_requested_mask);
}
+bool java_lang_String::deduplication_requested_or_forbidden(oop java_string) {
+ return is_flag_set(java_string, _deduplication_requested_mask | _deduplication_forbidden_mask);
+}
+
void java_lang_String::set_deduplication_forbidden(oop java_string) {
test_and_set_flag(java_string, _deduplication_forbidden_mask);
}
diff --git a/src/hotspot/share/classfile/javaClassesImpl.hpp b/src/hotspot/share/classfile/javaClassesImpl.hpp
index 77975598ded..b0ca78bf0c9 100644
--- a/src/hotspot/share/classfile/javaClassesImpl.hpp
+++ b/src/hotspot/share/classfile/javaClassesImpl.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -43,7 +43,8 @@
VTHREAD_INJECTED_FIELDS(macro) \
INTERNALERROR_INJECTED_FIELDS(macro) \
STACKCHUNK_INJECTED_FIELDS(macro) \
- CONSTANTPOOL_INJECTED_FIELDS(macro)
+ CONSTANTPOOL_INJECTED_FIELDS(macro) \
+ JFR_ONLY(FIELD_INJECTED_FIELDS(macro))
#define INJECTED_FIELD_COMPUTE_OFFSET(klass, name, signature, may_be_java) \
klass::_##name##_offset = JavaClasses::compute_injected_offset(InjectedFieldID::klass##_##name##_enum);
diff --git a/src/hotspot/share/classfile/metadataOnStackMark.cpp b/src/hotspot/share/classfile/metadataOnStackMark.cpp
index 51ce2c263a3..a397774484b 100644
--- a/src/hotspot/share/classfile/metadataOnStackMark.cpp
+++ b/src/hotspot/share/classfile/metadataOnStackMark.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,9 +31,6 @@
#include "runtime/synchronizer.hpp"
#include "services/threadService.hpp"
#include "utilities/chunkedList.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
MetadataOnStackBuffer* MetadataOnStackMark::_used_buffers = nullptr;
MetadataOnStackBuffer* MetadataOnStackMark::_free_buffers = nullptr;
@@ -70,9 +67,6 @@ MetadataOnStackMark::MetadataOnStackMark(bool walk_all_metadata, bool redefiniti
}
CompileBroker::mark_on_stack();
ThreadService::metadata_do(Metadata::mark_on_stack);
-#if INCLUDE_JVMCI
- JVMCI::metadata_do(Metadata::mark_on_stack);
-#endif
}
}
diff --git a/src/hotspot/share/classfile/modules.cpp b/src/hotspot/share/classfile/modules.cpp
index 51d09d9c47f..2ab3078c1c3 100644
--- a/src/hotspot/share/classfile/modules.cpp
+++ b/src/hotspot/share/classfile/modules.cpp
@@ -453,7 +453,7 @@ void Modules::define_module(Handle module, jboolean is_open, jstring version,
ClassLoader::add_to_exploded_build_list(THREAD, module_symbol);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
// Special handling of jdk.incubator.vector
if (strcmp(module_name, "jdk.incubator.vector") == 0) {
if (FLAG_IS_DEFAULT(EnableVectorSupport)) {
@@ -469,7 +469,7 @@ void Modules::define_module(Handle module, jboolean is_open, jstring version,
log_info(compilation)("EnableVectorReboxing=%s", (EnableVectorReboxing ? "true" : "false"));
log_info(compilation)("EnableVectorAggressiveReboxing=%s", (EnableVectorAggressiveReboxing ? "true" : "false"));
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
#if INCLUDE_CDS_JAVA_HEAP
diff --git a/src/hotspot/share/classfile/stringTable.cpp b/src/hotspot/share/classfile/stringTable.cpp
index c3f60487b9c..0c3f0f716a9 100644
--- a/src/hotspot/share/classfile/stringTable.cpp
+++ b/src/hotspot/share/classfile/stringTable.cpp
@@ -86,7 +86,7 @@ typedef CompactHashtable<
StringTable::read_string_from_compact_hashtable,
StringTable::wrapped_string_equals> SharedStringTable;
-static SharedStringTable _shared_table;
+static SharedStringTable _shared_table, _shared_table_for_dumping;
#endif
// --------------------------------------------------------------------------
@@ -961,7 +961,7 @@ void StringTable::write_shared_table() {
precond(CDSConfig::is_dumping_heap());
assert(HeapShared::is_writing_mapping_mode(), "not used for streamed oops");
- _shared_table.reset();
+ _shared_table_for_dumping.reset();
CompactHashtableWriter writer((int)items_count_acquire(), ArchiveBuilder::string_stats());
auto copy_into_shared_table = [&] (WeakHandle* val) {
@@ -974,17 +974,16 @@ void StringTable::write_shared_table() {
return true;
};
_local_table->do_safepoint_scan(copy_into_shared_table);
- writer.dump(&_shared_table, "string");
+ writer.dump(&_shared_table_for_dumping, "string");
}
void StringTable::serialize_shared_table_header(SerializeClosure* soc) {
- _shared_table.serialize_header(soc);
+ SharedStringTable* table = soc->reading() ? &_shared_table : &_shared_table_for_dumping;
- if (soc->writing()) {
- // Sanity. Make sure we don't use the shared table at dump time
- _shared_table.reset();
- } else if (!AOTMappedHeapLoader::is_in_use()) {
- _shared_table.reset();
+ table->serialize_header(soc);
+ if (soc->reading() && !AOTMappedHeapLoader::is_in_use()) {
+ // AOTStreamedHeapLoader does not use _shared_table.
+ table->reset();
}
}
diff --git a/src/hotspot/share/classfile/systemDictionaryShared.cpp b/src/hotspot/share/classfile/systemDictionaryShared.cpp
index 330b8e81d7f..62a790e9820 100644
--- a/src/hotspot/share/classfile/systemDictionaryShared.cpp
+++ b/src/hotspot/share/classfile/systemDictionaryShared.cpp
@@ -79,8 +79,9 @@
#include "utilities/hashTable.hpp"
#include "utilities/stringUtils.hpp"
-SystemDictionaryShared::ArchiveInfo SystemDictionaryShared::_static_archive;
-SystemDictionaryShared::ArchiveInfo SystemDictionaryShared::_dynamic_archive;
+SystemDictionaryShared::ArchiveInfo SystemDictionaryShared::_info_for_static_archive;
+SystemDictionaryShared::ArchiveInfo SystemDictionaryShared::_info_for_dynamic_archive;
+SystemDictionaryShared::ArchiveInfo SystemDictionaryShared::_info_for_dumping;
DumpTimeSharedClassTable* SystemDictionaryShared::_dumptime_table = nullptr;
@@ -132,8 +133,8 @@ InstanceKlass* SystemDictionaryShared::lookup_from_stream(Symbol* class_name,
return nullptr;
}
- const RunTimeClassInfo* record = find_record(&_static_archive._unregistered_dictionary,
- &_dynamic_archive._unregistered_dictionary,
+ const RunTimeClassInfo* record = find_record(&_info_for_static_archive._unregistered_dictionary,
+ &_info_for_dynamic_archive._unregistered_dictionary,
class_name);
if (record == nullptr) {
return nullptr;
@@ -701,7 +702,7 @@ void SystemDictionaryShared::copy_unregistered_class_size_and_crc32(InstanceKlas
precond(klass->in_aot_cache());
// A shared class must have a RunTimeClassInfo record
- const RunTimeClassInfo* record = find_record(&_static_archive._unregistered_dictionary,
+ const RunTimeClassInfo* record = find_record(&_info_for_static_archive._unregistered_dictionary,
nullptr, klass->name());
precond(record != nullptr);
precond(record->klass() == klass);
@@ -1335,7 +1336,7 @@ void SystemDictionaryShared::write_dictionary(RunTimeSharedDictionary* dictionar
}
void SystemDictionaryShared::write_to_archive(bool is_static_archive) {
- ArchiveInfo* archive = get_archive(is_static_archive);
+ ArchiveInfo* archive = get_archive(is_static_archive, /*is_dumping=*/true);
write_dictionary(&archive->_builtin_dictionary, true);
write_dictionary(&archive->_unregistered_dictionary, false);
@@ -1348,7 +1349,7 @@ void SystemDictionaryShared::write_to_archive(bool is_static_archive) {
void SystemDictionaryShared::serialize_dictionary_headers(SerializeClosure* soc,
bool is_static_archive) {
- ArchiveInfo* archive = get_archive(is_static_archive);
+ ArchiveInfo* archive = get_archive(is_static_archive, soc->writing());
archive->_builtin_dictionary.serialize_header(soc);
archive->_unregistered_dictionary.serialize_header(soc);
@@ -1395,8 +1396,8 @@ SystemDictionaryShared::find_record(RunTimeSharedDictionary* static_dict, RunTim
}
InstanceKlass* SystemDictionaryShared::find_builtin_class(Symbol* name) {
- const RunTimeClassInfo* record = find_record(&_static_archive._builtin_dictionary,
- &_dynamic_archive._builtin_dictionary,
+ const RunTimeClassInfo* record = find_record(&_info_for_static_archive._builtin_dictionary,
+ &_info_for_dynamic_archive._builtin_dictionary,
name);
if (record != nullptr) {
assert(!record->klass()->is_hidden(), "hidden class cannot be looked up by name");
@@ -1437,11 +1438,12 @@ const char* SystemDictionaryShared::loader_type_for_shared_class(Klass* k) {
}
void SystemDictionaryShared::get_all_archived_classes(bool is_static_archive, GrowableArray* classes) {
- get_archive(is_static_archive)->_builtin_dictionary.iterate_all([&] (const RunTimeClassInfo* record) {
+ ArchiveInfo* archive = get_archive(is_static_archive, /*is_dumping=*/false);
+ archive->_builtin_dictionary.iterate_all([&] (const RunTimeClassInfo* record) {
classes->append(record->klass());
});
- get_archive(is_static_archive)->_unregistered_dictionary.iterate_all([&] (const RunTimeClassInfo* record) {
+ archive->_unregistered_dictionary.iterate_all([&] (const RunTimeClassInfo* record) {
classes->append(record->klass());
});
}
@@ -1488,10 +1490,10 @@ void SystemDictionaryShared::ArchiveInfo::print_table_statistics(const char* pre
void SystemDictionaryShared::print_shared_archive(outputStream* st, bool is_static) {
if (CDSConfig::is_using_archive()) {
if (is_static) {
- _static_archive.print_on("", st, true);
+ _info_for_static_archive.print_on("", st, true);
} else {
if (DynamicArchive::is_mapped()) {
- _dynamic_archive.print_on("Dynamic ", st, false);
+ _info_for_dynamic_archive.print_on("Dynamic ", st, false);
}
}
}
@@ -1504,9 +1506,9 @@ void SystemDictionaryShared::print_on(outputStream* st) {
void SystemDictionaryShared::print_table_statistics(outputStream* st) {
if (CDSConfig::is_using_archive()) {
- _static_archive.print_table_statistics("Static ", st, true);
+ _info_for_static_archive.print_table_statistics("Static ", st, true);
if (DynamicArchive::is_mapped()) {
- _dynamic_archive.print_table_statistics("Dynamic ", st, false);
+ _info_for_dynamic_archive.print_table_statistics("Dynamic ", st, false);
}
}
}
diff --git a/src/hotspot/share/classfile/systemDictionaryShared.hpp b/src/hotspot/share/classfile/systemDictionaryShared.hpp
index 740c7370d28..f83f5c21fb8 100644
--- a/src/hotspot/share/classfile/systemDictionaryShared.hpp
+++ b/src/hotspot/share/classfile/systemDictionaryShared.hpp
@@ -150,11 +150,16 @@ private:
class ExclusionCheckCandidates;
static DumpTimeSharedClassTable* _dumptime_table;
- static ArchiveInfo _static_archive;
- static ArchiveInfo _dynamic_archive;
+ static ArchiveInfo _info_for_static_archive;
+ static ArchiveInfo _info_for_dynamic_archive;
+ static ArchiveInfo _info_for_dumping;
- static ArchiveInfo* get_archive(bool is_static_archive) {
- return is_static_archive ? &_static_archive : &_dynamic_archive;
+ static ArchiveInfo* get_archive(bool is_static_archive, bool is_dumping) {
+ if (is_dumping) {
+ return &_info_for_dumping;
+ } else {
+ return is_static_archive ? &_info_for_static_archive : &_info_for_dynamic_archive;
+ }
}
static InstanceKlass* load_shared_class_for_builtin_loader(
diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp
index f9b12df84ca..cec3586a50b 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.cpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.cpp
@@ -487,6 +487,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
if (!UseSHA512Intrinsics) return true;
break;
case vmIntrinsics::_double_keccak:
+ case vmIntrinsics::_quad_keccak:
case vmIntrinsics::_sha3_implCompress:
if (!UseSHA3Intrinsics) return true;
break;
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 3f85fd16b61..de4eea669a1 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -526,9 +526,12 @@ class methodHandle;
\
/* support for sun.security.provider.SHAKE128Parallel */ \
do_class(sun_security_provider_sha3_parallel, "sun/security/provider/SHA3Parallel") \
- do_intrinsic(_double_keccak, sun_security_provider_sha3_parallel, double_keccak_name, double_keccak_signature, F_S) \
+ do_intrinsic(_double_keccak, sun_security_provider_sha3_parallel, double_keccak_name, double_keccak_signature, F_S) \
do_name( double_keccak_name, "doubleKeccak") \
do_signature(double_keccak_signature, "([J[J)I") \
+ do_intrinsic(_quad_keccak, sun_security_provider_sha3_parallel, quad_keccak_name, quad_keccak_signature, F_S) \
+ do_name( quad_keccak_name, "quadKeccak") \
+ do_signature(quad_keccak_signature, "([J[J[J[J)I") \
\
/* support for sun.security.provider.DigestBase */ \
do_class(sun_security_provider_digestbase, "sun/security/provider/DigestBase") \
diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
index 33d00b93365..0348fae28b0 100644
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@@ -26,7 +26,6 @@
#define SHARE_CLASSFILE_VMSYMBOLS_HPP
#include "classfile/vmIntrinsics.hpp"
-#include "jvmci/vmSymbols_jvmci.hpp"
#include "memory/iterator.hpp"
#include "oops/symbol.hpp"
#include "utilities/enumIterator.hpp"
@@ -356,9 +355,6 @@ class SerializeClosure;
\
template(jdk_internal_foreign_NativeMemorySegmentImpl, "jdk/internal/foreign/NativeMemorySegmentImpl") \
\
- /* Support for JVMCI */ \
- JVMCI_VM_SYMBOLS_DO(template, do_alias) \
- \
template(java_lang_ClassFrameInfo, "java/lang/ClassFrameInfo") \
template(java_lang_StackWalker, "java/lang/StackWalker") \
template(java_lang_StackFrameInfo, "java/lang/StackFrameInfo") \
@@ -704,12 +700,6 @@ class SerializeClosure;
template(serializePropertiesToByteArray_name, "serializePropertiesToByteArray") \
template(serializeSecurityPropertiesToByteArray_name, "serializeSecurityPropertiesToByteArray") \
template(serializeAgentPropertiesToByteArray_name, "serializeAgentPropertiesToByteArray") \
- template(encodeThrowable_name, "encodeThrowable") \
- template(encodeThrowable_signature, "(Ljava/lang/Throwable;JI)I") \
- template(decodeAndThrowThrowable_name, "decodeAndThrowThrowable") \
- template(encodeAnnotations_name, "encodeAnnotations") \
- template(encodeAnnotations_signature, "([BLjava/lang/Class;Ljdk/internal/reflect/ConstantPool;Z[Ljava/lang/Class;)[B")\
- template(decodeAndThrowThrowable_signature, "(IJZZ)V") \
template(classRedefinedCount_name, "classRedefinedCount") \
template(classLoader_name, "classLoader") \
template(componentType_name, "componentType") \
@@ -784,7 +774,6 @@ ENUMERATOR_RANGE(vmSymbolID, vmSymbolID::FIRST_SID, vmSymbolID::LAST_SID)
class vmSymbols: AllStatic {
friend class vmIntrinsics;
friend class VMStructs;
- friend class JVMCIVMStructs;
static const int NO_SID = static_cast(vmSymbolID::NO_SID); // exclusive lower limit
static const int FIRST_SID = static_cast(vmSymbolID::FIRST_SID); // inclusive lower limit
diff --git a/src/hotspot/share/code/aotCodeCache.cpp b/src/hotspot/share/code/aotCodeCache.cpp
index b330ed35d0b..b05ee22905d 100644
--- a/src/hotspot/share/code/aotCodeCache.cpp
+++ b/src/hotspot/share/code/aotCodeCache.cpp
@@ -465,6 +465,7 @@ void AOTCodeCache::Config::record(uint cpu_features_offset) {
// Special configs that cannot be checked with macros
_compressedOopBase = CompressedOops::base();
+ _compressedOopShift = CompressedOops::shift();
#if defined(X86) && !defined(ZERO)
_useUnalignedLoadStores = UseUnalignedLoadStores;
@@ -577,10 +578,17 @@ bool AOTCodeCache::Config::verify(AOTCodeCache* cache) const {
AOTCODECACHE_CONFIGS_DO(AOTCODECACHE_CHECK_VAR, AOTCODECACHE_CHECK_FUN);
// Special configs that cannot be checked with macros
+#define COMPRESSED_OOPS_HINT "Consider adding -XX:+AOTCompatibleOopCompression when creating the AOT cache"
if ((_compressedOopBase == nullptr || CompressedOops::base() == nullptr) && (_compressedOopBase != CompressedOops::base())) {
load_failure_log().print_cr("AOT Code Cache disabled: incompatible CompressedOops::base(): %p vs current %p",
_compressedOopBase, CompressedOops::base());
+ load_failure_log().print_cr(COMPRESSED_OOPS_HINT);
+ return false;
+ }
+
+ if (!check_config(_compressedOopShift, CompressedOops::shift(), "CompressedOops::shift()")) {
+ load_failure_log().print_cr(COMPRESSED_OOPS_HINT);
return false;
}
@@ -1009,11 +1017,6 @@ bool AOTCodeCache::store_code_blob(CodeBlob& blob, AOTCodeEntry::Kind entry_kind
if (AOTCodeEntry::is_blob(entry_kind) && !is_dumping_stub()) {
return false;
}
- // we do not currently store C2 stubs because we are seeing weird
- // memory errors when loading them -- see JDK-8357593
- if (entry_kind == AOTCodeEntry::C2Blob) {
- return false;
- }
log_debug(aot, codecache, stubs)("Writing blob '%s' (id=%u, kind=%s) to AOT Code Cache", name, id, aot_code_entry_kind_name[entry_kind]);
#ifdef ASSERT
@@ -1284,11 +1287,6 @@ CodeBlob* AOTCodeCache::load_code_blob(AOTCodeEntry::Kind entry_kind, uint id, c
if (AOTCodeEntry::is_blob(entry_kind) && !is_using_stub()) {
return nullptr;
}
- // we do not currently load C2 stubs because we are seeing weird
- // memory errors when loading them -- see JDK-8357593
- if (entry_kind == AOTCodeEntry::C2Blob) {
- return nullptr;
- }
log_debug(aot, codecache, stubs)("Reading blob '%s' (id=%u, kind=%s) from AOT Code Cache", name, id, aot_code_entry_kind_name[entry_kind]);
AOTCodeEntry* entry = cache->find_entry(entry_kind, encode_id(entry_kind, id));
@@ -1578,10 +1576,6 @@ void AOTCodeCache::publish_stub_addresses(CodeBlob &code_blob, BlobId blob_id, A
addresses.append(deopt_blob->unpack_with_exception());
addresses.append(deopt_blob->unpack_with_reexecution());
addresses.append(deopt_blob->unpack_with_exception_in_tls());
-#if INCLUDE_JVMCI
- addresses.append(deopt_blob->uncommon_trap());
- addresses.append(deopt_blob->implicit_exception_uncommon_trap());
-#endif // INCLUDE_JVMCI
cache()->add_stub_entries(stub_id, start, &addresses, 0);
}
}
@@ -1796,7 +1790,7 @@ bool AOTCodeCache::write_asm_remarks(CodeBlob& cb) {
}
const char* cstr = add_C_string(str);
int id = _table->id_for_C_string((address)cstr);
- assert(id != -1, "asm remark string '%s' not found in AOTCodeAddressTable", str);
+ assert(id != BAD_ADDRESS_ID, "asm remark string '%s' not found in AOTCodeAddressTable", str);
n = write_bytes(&id, sizeof(int));
if (n != sizeof(int)) {
return false;
@@ -1838,7 +1832,7 @@ bool AOTCodeCache::write_dbg_strings(CodeBlob& cb) {
log_trace(aot, codecache, stubs)("dbg string=%s", str);
const char* cstr = add_C_string(str);
int id = _table->id_for_C_string((address)cstr);
- assert(id != -1, "db string '%s' not found in AOTCodeAddressTable", str);
+ assert(id != BAD_ADDRESS_ID, "db string '%s' not found in AOTCodeAddressTable", str);
uint n = write_bytes(&id, sizeof(int));
if (n != sizeof(int)) {
return false;
@@ -2103,14 +2097,19 @@ void AOTCodeAddressTable::init_extrs() {
#endif
#if INCLUDE_SHENANDOAHGC
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::write_barrier_pre);
+ ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::write_barrier_pre_narrow);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_strong);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_strong_narrow);
+ ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_strong_narrow_narrow);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_weak);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_weak_narrow);
+ ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_weak_narrow_narrow);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_phantom);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_phantom_narrow);
+ ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::load_reference_barrier_phantom_narrow_narrow);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::arraycopy_barrier_oop);
ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::arraycopy_barrier_narrow_oop);
+ ADD_EXTERNAL_ADDRESS(ShenandoahRuntime::clone);
#endif
#if INCLUDE_ZGC
ADD_EXTERNAL_ADDRESS(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr());
diff --git a/src/hotspot/share/code/aotCodeCache.hpp b/src/hotspot/share/code/aotCodeCache.hpp
index 039735dc954..777ada59a0b 100644
--- a/src/hotspot/share/code/aotCodeCache.hpp
+++ b/src/hotspot/share/code/aotCodeCache.hpp
@@ -266,9 +266,9 @@ public:
address load_archive_data(StubId stub_id, address &end, GrowableArray* entries = nullptr, GrowableArray* extras = nullptr) NOT_CDS_RETURN_(nullptr);
void store_archive_data(StubId stub_id, address start, address end, GrowableArray* entries = nullptr, GrowableArray* extras = nullptr) NOT_CDS_RETURN;
- void stub_epilog(StubId stub_id);
+ void stub_epilog(StubId stub_id) NOT_CDS_RETURN;
#ifdef ASSERT
- void check_stored(StubId stub_id);
+ void check_stored(StubId stub_id) NOT_CDS_RETURN;
#endif
const AOTStubData* as_const() { return (const AOTStubData*)this; }
};
@@ -301,7 +301,6 @@ public:
do_var(bool, UseSHA512Intrinsics) \
do_var(bool, UseVectorizedMismatchIntrinsic) \
do_fun(int, CompressedKlassPointers_shift, CompressedKlassPointers::shift()) \
- do_fun(int, CompressedOops_shift, CompressedOops::shift()) \
do_fun(bool, JavaAssertions_systemClassDefault, JavaAssertions::systemClassDefault()) \
do_fun(bool, JavaAssertions_userClassDefault, JavaAssertions::userClassDefault()) \
do_fun(CollectedHeap::Name, Universe_heap_kind, Universe::heap()->kind()) \
@@ -321,14 +320,6 @@ public:
#define AOTCODECACHE_CONFIGS_COMPILER2_DO(do_var, do_fun)
#endif
-#if INCLUDE_JVMCI
-#define AOTCODECACHE_CONFIGS_JVMCI_DO(do_var, do_fun) \
- do_var(bool, EnableJVMCI) /* adapters and nmethods */ \
- // END
-#else
-#define AOTCODECACHE_CONFIGS_JVMCI_DO(do_var, do_fun)
-#endif
-
#if defined(AARCH64) && !defined(ZERO)
#define AOTCODECACHE_CONFIGS_AARCH64_DO(do_var, do_fun) \
do_var(intx, BlockZeroingLowLimit) /* array fill stubs */ \
@@ -360,7 +351,6 @@ public:
#define AOTCODECACHE_CONFIGS_DO(do_var, do_fun) \
AOTCODECACHE_CONFIGS_GENERIC_DO(do_var, do_fun) \
AOTCODECACHE_CONFIGS_COMPILER2_DO(do_var, do_fun) \
- AOTCODECACHE_CONFIGS_JVMCI_DO(do_var, do_fun) \
AOTCODECACHE_CONFIGS_AARCH64_DO(do_var, do_fun) \
AOTCODECACHE_CONFIGS_X86_DO(do_var, do_fun) \
// END
@@ -377,6 +367,7 @@ protected:
// Special configs that cannot be checked with macros
address _compressedOopBase;
+ int _compressedOopShift;
#if defined(X86) && !defined(ZERO)
bool _useUnalignedLoadStores;
diff --git a/src/hotspot/share/code/codeBlob.hpp b/src/hotspot/share/code/codeBlob.hpp
index 709623de308..30e264fbd57 100644
--- a/src/hotspot/share/code/codeBlob.hpp
+++ b/src/hotspot/share/code/codeBlob.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -109,7 +109,6 @@ class UncommonTrapBlob;
class CodeBlob {
friend class VMStructs;
- friend class JVMCIVMStructs;
protected:
// order fields from large to small to minimize padding between fields
@@ -578,7 +577,7 @@ class SingletonBlob: public RuntimeBlob {
class DeoptimizationBlob: public SingletonBlob {
friend class VMStructs;
- friend class JVMCIVMStructs;
+
private:
int _unpack_offset;
int _unpack_with_exception;
@@ -586,12 +585,6 @@ class DeoptimizationBlob: public SingletonBlob {
int _unpack_with_exception_in_tls;
-#if INCLUDE_JVMCI
- // Offsets when JVMCI calls uncommon_trap.
- int _uncommon_trap_offset;
- int _implicit_exception_uncommon_trap_offset;
-#endif
-
// Creation support
DeoptimizationBlob(
CodeBuffer* cb,
@@ -604,7 +597,7 @@ class DeoptimizationBlob: public SingletonBlob {
);
public:
- static const int ENTRY_COUNT = 4 JVMCI_ONLY(+ 2);
+ static const int ENTRY_COUNT = 4;
// Creation
static DeoptimizationBlob* create(
CodeBuffer* cb,
@@ -630,21 +623,6 @@ class DeoptimizationBlob: public SingletonBlob {
}
address unpack_with_exception_in_tls() const { return code_begin() + _unpack_with_exception_in_tls; }
-#if INCLUDE_JVMCI
- // Offsets when JVMCI calls uncommon_trap.
- void set_uncommon_trap_offset(int offset) {
- _uncommon_trap_offset = offset;
- assert(contains(code_begin() + _uncommon_trap_offset), "must be PC inside codeblob");
- }
- address uncommon_trap() const { return (EnableJVMCI ? code_begin() + _uncommon_trap_offset : nullptr); }
-
- void set_implicit_exception_uncommon_trap_offset(int offset) {
- _implicit_exception_uncommon_trap_offset = offset;
- assert(contains(code_begin() + _implicit_exception_uncommon_trap_offset), "must be PC inside codeblob");
- }
- address implicit_exception_uncommon_trap() const { return (EnableJVMCI ? code_begin() + _implicit_exception_uncommon_trap_offset : nullptr); }
-#endif // INCLUDE_JVMCI
-
void post_restore_impl() {
trace_new_stub(this, "DeoptimizationBlob");
}
diff --git a/src/hotspot/share/code/codeCache.cpp b/src/hotspot/share/code/codeCache.cpp
index ffa88a88b29..6f3a1b09c48 100644
--- a/src/hotspot/share/code/codeCache.cpp
+++ b/src/hotspot/share/code/codeCache.cpp
@@ -61,6 +61,7 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/safepointVerifiers.hpp"
+#include "runtime/stubCodeGenerator.hpp"
#include "runtime/vmThread.hpp"
#include "sanitizers/leak.hpp"
#include "services/memoryService.hpp"
@@ -1916,13 +1917,8 @@ void CodeCache::print_codelist(outputStream* st) {
nmethod* nm = iter.method();
ResourceMark rm;
char* method_name = nm->method()->name_and_sig_as_C_string();
- const char* jvmci_name = nullptr;
-#if INCLUDE_JVMCI
- jvmci_name = nm->jvmci_name();
-#endif
- st->print_cr("%d %d %d %s%s%s [" INTPTR_FORMAT ", " INTPTR_FORMAT " - " INTPTR_FORMAT "]",
- nm->compile_id(), nm->comp_level(), nm->get_state(),
- method_name, jvmci_name ? " jvmci_name=" : "", jvmci_name ? jvmci_name : "",
+ st->print_cr("%d %d %d %s [" INTPTR_FORMAT ", " INTPTR_FORMAT " - " INTPTR_FORMAT "]",
+ nm->compile_id(), nm->comp_level(), nm->get_state(), method_name,
(intptr_t)nm->header_begin(), (intptr_t)nm->code_begin(), (intptr_t)nm->code_end());
}
}
@@ -1941,6 +1937,18 @@ void CodeCache::log_state(outputStream* st) {
}
#ifdef LINUX
+static bool is_stub_code_blob(CodeBlob* cb) {
+ if (!cb->is_buffer_blob()) {
+ return false;
+ }
+ for (StubCodeDesc* d = StubCodeDesc::first(); d != nullptr; d = StubCodeDesc::next(d)) {
+ if (cb->code_contains(d->begin())) {
+ return true;
+ }
+ }
+ return false;
+}
+
void CodeCache::write_perf_map(const char* filename, outputStream* st) {
MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
char fname[JVM_MAXPATHLEN];
@@ -1963,21 +1971,25 @@ void CodeCache::write_perf_map(const char* filename, outputStream* st) {
AllCodeBlobsIterator iter(AllCodeBlobsIterator::not_unloading);
while (iter.next()) {
CodeBlob *cb = iter.method();
+ if (is_stub_code_blob(cb)) {
+ // Individual stub routines are dumped after the main loop.
+ continue;
+ }
ResourceMark rm;
const char* method_name = nullptr;
- const char* jvmci_name = nullptr;
if (cb->is_nmethod()) {
nmethod* nm = cb->as_nmethod();
method_name = nm->method()->external_name();
-#if INCLUDE_JVMCI
- jvmci_name = nm->jvmci_name();
-#endif
} else {
method_name = cb->name();
}
- fs.print_cr(INTPTR_FORMAT " " INTPTR_FORMAT " %s%s%s",
- (intptr_t)cb->code_begin(), (intptr_t)cb->code_size(),
- method_name, jvmci_name ? " jvmci_name=" : "", jvmci_name ? jvmci_name : "");
+ fs.print_cr(INTPTR_FORMAT " " INTPTR_FORMAT " %s",
+ (intptr_t)cb->code_begin(), (intptr_t)cb->code_size(), method_name);
+ }
+ for (StubCodeDesc* d = StubCodeDesc::first(); d != nullptr; d = StubCodeDesc::next(d)) {
+ fs.print_cr(INTPTR_FORMAT " " INTPTR_FORMAT " %s %s",
+ (intptr_t)d->begin(), (intptr_t)d->size_in_bytes(),
+ d->group(), d->name());
}
}
#endif // LINUX
diff --git a/src/hotspot/share/code/codeCache.hpp b/src/hotspot/share/code/codeCache.hpp
index 6384cb397b8..3b8aa5b2e58 100644
--- a/src/hotspot/share/code/codeCache.hpp
+++ b/src/hotspot/share/code/codeCache.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -87,7 +87,6 @@ class ReservedSpace;
class CodeCache : AllStatic {
friend class VMStructs;
- friend class JVMCIVMStructs;
template friend class CodeBlobIterator;
friend class WhiteBox;
friend class ShenandoahParallelCodeHeapIterator;
diff --git a/src/hotspot/share/code/codeHeapState.cpp b/src/hotspot/share/code/codeHeapState.cpp
index ea4a1519f79..a545285d1da 100644
--- a/src/hotspot/share/code/codeHeapState.cpp
+++ b/src/hotspot/share/code/codeHeapState.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -226,7 +226,7 @@ const char* blobTypeName[] = {"noType"
, "buffer blob"
, "lastType"
};
-const char* compTypeName[] = { "none", "c1", "c2", "jvmci" };
+const char* compTypeName[] = { "none", "c1", "c2" };
// Be prepared for ten different CodeHeap segments. Should be enough for a few years.
const unsigned int nSizeDistElements = 31; // logarithmic range growth, max size: 2**32
@@ -734,16 +734,6 @@ void CodeHeapState::aggregate(outputStream* out, CodeHeap* heap, size_t granular
} else {
blob_name = os::strdup(cb->name());
}
-#if INCLUDE_JVMCI
- const char* jvmci_name = nm->jvmci_name();
- if (jvmci_name != nullptr) {
- size_t size = ::strlen(blob_name) + ::strlen(" jvmci_name=") + ::strlen(jvmci_name) + 1;
- char* new_blob_name = (char*)os::malloc(size, mtInternal);
- os::snprintf_checked(new_blob_name, size, "%s jvmci_name=%s", blob_name, jvmci_name);
- os::free((void*)blob_name);
- blob_name = new_blob_name;
- }
-#endif
nm_size = nm->total_size();
compile_id = nm->compile_id();
comp_lvl = (CompLevel)(nm->comp_level());
@@ -753,9 +743,6 @@ void CodeHeapState::aggregate(outputStream* out, CodeHeap* heap, size_t granular
if (nm->is_compiled_by_c2()) {
cType = c2;
}
- if (nm->is_compiled_by_jvmci()) {
- cType = jvmci;
- }
switch (cbType) {
case nMethod_inuse: { // only for executable methods!!!
// space for these cbs is accounted for later.
@@ -2192,12 +2179,6 @@ void CodeHeapState::print_names(outputStream* out, CodeHeap* heap) {
ast->print("%s.", classNameS);
ast->print("%s", methNameS);
ast->print("%s", methSigS);
-#if INCLUDE_JVMCI
- const char* jvmci_name = nm->jvmci_name();
- if (jvmci_name != nullptr) {
- ast->print(" jvmci_name=%s", jvmci_name);
- }
-#endif
} else {
ast->print("%s", blob_name);
}
diff --git a/src/hotspot/share/code/codeHeapState.hpp b/src/hotspot/share/code/codeHeapState.hpp
index f30e492d7a5..cf0aef8d6e5 100644
--- a/src/hotspot/share/code/codeHeapState.hpp
+++ b/src/hotspot/share/code/codeHeapState.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -38,7 +38,6 @@ class CodeHeapState : public CHeapObj {
noComp = 0, // must be! due to initialization by memset to zero
c1,
c2,
- jvmci,
lastComp
};
diff --git a/src/hotspot/share/code/compiledIC.hpp b/src/hotspot/share/code/compiledIC.hpp
index f60ed93aa97..a42e09dc821 100644
--- a/src/hotspot/share/code/compiledIC.hpp
+++ b/src/hotspot/share/code/compiledIC.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -61,7 +61,6 @@ public:
class CompiledICData : public CHeapObj {
friend class VMStructs;
- friend class JVMCIVMStructs;
Method* volatile _speculated_method;
uintptr_t volatile _speculated_klass;
diff --git a/src/hotspot/share/code/debugInfo.cpp b/src/hotspot/share/code/debugInfo.cpp
index b6f58908c2c..6b9c668f997 100644
--- a/src/hotspot/share/code/debugInfo.cpp
+++ b/src/hotspot/share/code/debugInfo.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -345,9 +345,7 @@ void ConstantDoubleValue::print_on(outputStream* st) const {
void ConstantOopWriteValue::write_on(DebugInfoWriteStream* stream) {
#ifdef ASSERT
{
- // cannot use ThreadInVMfromNative here since in case of JVMCI compiler,
- // thread is already in VM state.
- ThreadInVMfromUnknown tiv;
+ ThreadInVMfromNative tiv(JavaThread::current());
assert(JNIHandles::resolve(value()) == nullptr ||
Universe::heap()->is_in(JNIHandles::resolve(value())),
"Should be in heap");
@@ -358,9 +356,7 @@ void ConstantOopWriteValue::write_on(DebugInfoWriteStream* stream) {
}
void ConstantOopWriteValue::print_on(outputStream* st) const {
- // using ThreadInVMfromUnknown here since in case of JVMCI compiler,
- // thread is already in VM state.
- ThreadInVMfromUnknown tiv;
+ ThreadInVMfromNative tiv(JavaThread::current());
JNIHandles::resolve(value())->print_value_on(st);
}
diff --git a/src/hotspot/share/code/dependencies.cpp b/src/hotspot/share/code/dependencies.cpp
index dbfe1cd884e..dd92e594f4d 100644
--- a/src/hotspot/share/code/dependencies.cpp
+++ b/src/hotspot/share/code/dependencies.cpp
@@ -65,9 +65,6 @@ void Dependencies::initialize(ciEnv* env) {
_oop_recorder = env->oop_recorder();
_log = env->log();
_dep_seen = new(arena) GrowableArray(arena, 500, 0, 0);
-#if INCLUDE_JVMCI
- _using_dep_values = false;
-#endif
DEBUG_ONLY(_deps[end_marker] = nullptr);
for (int i = (int)FIRST_TYPE; i < (int)TYPE_LIMIT; i++) {
_deps[i] = new(arena) GrowableArray(arena, 10, 0, nullptr);
@@ -102,16 +99,10 @@ void Dependencies::assert_abstract_with_unique_concrete_subtype(ciKlass* ctxk, c
assert_common_2(abstract_with_unique_concrete_subtype, ctxk, conck);
}
-void Dependencies::assert_unique_concrete_method(ciKlass* ctxk, ciMethod* uniqm) {
- check_ctxk(ctxk);
- check_unique_method(ctxk, uniqm);
- assert_common_2(unique_concrete_method_2, ctxk, uniqm);
-}
-
void Dependencies::assert_unique_concrete_method(ciKlass* ctxk, ciMethod* uniqm, ciKlass* resolved_klass, ciMethod* resolved_method) {
check_ctxk(ctxk);
check_unique_method(ctxk, uniqm);
- assert_common_4(unique_concrete_method_4, ctxk, uniqm, resolved_klass, resolved_method);
+ assert_common_4(unique_concrete_method, ctxk, uniqm, resolved_klass, resolved_method);
}
void Dependencies::assert_unique_implementor(ciInstanceKlass* ctxk, ciInstanceKlass* uniqk) {
@@ -129,74 +120,6 @@ void Dependencies::assert_call_site_target_value(ciCallSite* call_site, ciMethod
assert_common_2(call_site_target_value, call_site, method_handle);
}
-#if INCLUDE_JVMCI
-
-Dependencies::Dependencies(Arena* arena, OopRecorder* oop_recorder, CompileLog* log) {
- _oop_recorder = oop_recorder;
- _log = log;
- _dep_seen = new(arena) GrowableArray(arena, 500, 0, 0);
- _using_dep_values = true;
- DEBUG_ONLY(_dep_values[end_marker] = nullptr);
- for (int i = (int)FIRST_TYPE; i < (int)TYPE_LIMIT; i++) {
- _dep_values[i] = new(arena) GrowableArray(arena, 10, 0, DepValue());
- }
- _content_bytes = nullptr;
- _size_in_bytes = (size_t)-1;
-
- assert(TYPE_LIMIT <= (1<is_array_klass()) {
- // As a special case, support this assertion on an array type,
- // which reduces to an assertion on its element type.
- // Note that this cannot be done with assertions that
- // relate to concreteness or abstractness.
- BasicType elemt = ArrayKlass::cast(ctxk)->element_type();
- if (is_java_primitive(elemt)) return; // Ex: int[][]
- ctxk = ObjArrayKlass::cast(ctxk)->bottom_klass();
- //if (ctxk->is_final()) return; // Ex: String[][]
- }
- check_ctxk(ctxk);
- assert_common_1(leaf_type, DepValue(_oop_recorder, ctxk));
-}
-
-void Dependencies::assert_abstract_with_unique_concrete_subtype(Klass* ctxk, Klass* conck) {
- check_ctxk_abstract(ctxk);
- DepValue ctxk_dv(_oop_recorder, ctxk);
- DepValue conck_dv(_oop_recorder, conck, &ctxk_dv);
- assert_common_2(abstract_with_unique_concrete_subtype, ctxk_dv, conck_dv);
-}
-
-void Dependencies::assert_unique_implementor(InstanceKlass* ctxk, InstanceKlass* uniqk) {
- check_ctxk(ctxk);
- assert(ctxk->is_interface(), "not an interface");
- assert(ctxk->implementor() == uniqk, "not a unique implementor");
- assert_common_2(unique_implementor, DepValue(_oop_recorder, ctxk), DepValue(_oop_recorder, uniqk));
-}
-
-void Dependencies::assert_unique_concrete_method(Klass* ctxk, Method* uniqm) {
- check_ctxk(ctxk);
- check_unique_method(ctxk, uniqm);
- assert_common_2(unique_concrete_method_2, DepValue(_oop_recorder, ctxk), DepValue(_oop_recorder, uniqm));
-}
-
-void Dependencies::assert_call_site_target_value(oop call_site, oop method_handle) {
- assert_common_2(call_site_target_value, DepValue(_oop_recorder, JNIHandles::make_local(call_site)), DepValue(_oop_recorder, JNIHandles::make_local(method_handle)));
-}
-
-#endif // INCLUDE_JVMCI
-
-
// Helper function. If we are adding a new dep. under ctxk2,
// try to find an old dep. under a broader* ctxk1. If there is
//
@@ -303,79 +226,6 @@ void Dependencies::assert_common_4(DepType dept,
deps->append(x3);
}
-#if INCLUDE_JVMCI
-bool Dependencies::maybe_merge_ctxk(GrowableArray* deps,
- int ctxk_i, DepValue ctxk2_dv) {
- Klass* ctxk1 = deps->at(ctxk_i).as_klass(_oop_recorder);
- Klass* ctxk2 = ctxk2_dv.as_klass(_oop_recorder);
- if (ctxk2->is_subtype_of(ctxk1)) {
- return true; // success, and no need to change
- } else if (ctxk1->is_subtype_of(ctxk2)) {
- // new context class fully subsumes previous one
- deps->at_put(ctxk_i, ctxk2_dv);
- return true;
- } else {
- return false;
- }
-}
-
-void Dependencies::assert_common_1(DepType dept, DepValue x) {
- assert(dep_args(dept) == 1, "sanity");
- //log_dependency(dept, x);
- GrowableArray* deps = _dep_values[dept];
-
- // see if the same (or a similar) dep is already recorded
- if (note_dep_seen(dept, x)) {
- assert(deps->find(x) >= 0, "sanity");
- } else {
- deps->append(x);
- }
-}
-
-void Dependencies::assert_common_2(DepType dept,
- DepValue x0, DepValue x1) {
- assert(dep_args(dept) == 2, "sanity");
- //log_dependency(dept, x0, x1);
- GrowableArray* deps = _dep_values[dept];
-
- // see if the same (or a similar) dep is already recorded
- bool has_ctxk = has_explicit_context_arg(dept);
- if (has_ctxk) {
- assert(dep_context_arg(dept) == 0, "sanity");
- if (note_dep_seen(dept, x1)) {
- // look in this bucket for redundant assertions
- const int stride = 2;
- for (int i = deps->length(); (i -= stride) >= 0; ) {
- DepValue y1 = deps->at(i+1);
- if (x1 == y1) { // same subject; check the context
- if (maybe_merge_ctxk(deps, i+0, x0)) {
- return;
- }
- }
- }
- }
- } else {
- bool dep_seen_x0 = note_dep_seen(dept, x0); // records x0 for future queries
- bool dep_seen_x1 = note_dep_seen(dept, x1); // records x1 for future queries
- if (dep_seen_x0 && dep_seen_x1) {
- // look in this bucket for redundant assertions
- const int stride = 2;
- for (int i = deps->length(); (i -= stride) >= 0; ) {
- DepValue y0 = deps->at(i+0);
- DepValue y1 = deps->at(i+1);
- if (x0 == y0 && x1 == y1) {
- return;
- }
- }
- }
- }
-
- // append the assertion in the correct bucket:
- deps->append(x0);
- deps->append(x1);
-}
-#endif // INCLUDE_JVMCI
-
/// Support for encoding dependencies into an nmethod:
void Dependencies::copy_to(nmethod* nm) {
@@ -402,40 +252,7 @@ static int sort_dep_arg_3(ciBaseObject** p1, ciBaseObject** p2)
static int sort_dep_arg_4(ciBaseObject** p1, ciBaseObject** p2)
{ return sort_dep(p1, p2, 4); }
-#if INCLUDE_JVMCI
-// metadata deps are sorted before object deps
-static int sort_dep_value(Dependencies::DepValue* p1, Dependencies::DepValue* p2, int narg) {
- for (int i = 0; i < narg; i++) {
- int diff = p1[i].sort_key() - p2[i].sort_key();
- if (diff != 0) return diff;
- }
- return 0;
-}
-static int sort_dep_value_arg_1(Dependencies::DepValue* p1, Dependencies::DepValue* p2)
-{ return sort_dep_value(p1, p2, 1); }
-static int sort_dep_value_arg_2(Dependencies::DepValue* p1, Dependencies::DepValue* p2)
-{ return sort_dep_value(p1, p2, 2); }
-static int sort_dep_value_arg_3(Dependencies::DepValue* p1, Dependencies::DepValue* p2)
-{ return sort_dep_value(p1, p2, 3); }
-#endif // INCLUDE_JVMCI
-
void Dependencies::sort_all_deps() {
-#if INCLUDE_JVMCI
- if (_using_dep_values) {
- for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
- DepType dept = (DepType)deptv;
- GrowableArray* deps = _dep_values[dept];
- if (deps->length() <= 1) continue;
- switch (dep_args(dept)) {
- case 1: deps->sort(sort_dep_value_arg_1, 1); break;
- case 2: deps->sort(sort_dep_value_arg_2, 2); break;
- case 3: deps->sort(sort_dep_value_arg_3, 3); break;
- default: ShouldNotReachHere(); break;
- }
- }
- return;
- }
-#endif // INCLUDE_JVMCI
for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
DepType dept = (DepType)deptv;
GrowableArray* deps = _deps[dept];
@@ -452,16 +269,6 @@ void Dependencies::sort_all_deps() {
size_t Dependencies::estimate_size_in_bytes() {
size_t est_size = 100;
-#if INCLUDE_JVMCI
- if (_using_dep_values) {
- for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
- DepType dept = (DepType)deptv;
- GrowableArray* deps = _dep_values[dept];
- est_size += deps->length() * 2; // tags and argument(s)
- }
- return est_size;
- }
-#endif // INCLUDE_JVMCI
for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
DepType dept = (DepType)deptv;
GrowableArray* deps = _deps[dept];
@@ -472,8 +279,7 @@ size_t Dependencies::estimate_size_in_bytes() {
ciKlass* Dependencies::ctxk_encoded_as_null(DepType dept, ciBaseObject* x) {
switch (dept) {
- case unique_concrete_method_2:
- case unique_concrete_method_4:
+ case unique_concrete_method:
return x->as_metadata()->as_method()->holder();
default:
return nullptr; // let nullptr be nullptr
@@ -483,8 +289,7 @@ ciKlass* Dependencies::ctxk_encoded_as_null(DepType dept, ciBaseObject* x) {
Klass* Dependencies::ctxk_encoded_as_null(DepType dept, Metadata* x) {
assert(must_be_in_vm(), "raw oops here");
switch (dept) {
- case unique_concrete_method_2:
- case unique_concrete_method_4:
+ case unique_concrete_method:
assert(x->is_method(), "sanity");
return ((Method*)x)->method_holder();
default:
@@ -498,37 +303,6 @@ void Dependencies::encode_content_bytes() {
// cast is safe, no deps can overflow INT_MAX
CompressedWriteStream bytes((int)estimate_size_in_bytes());
-#if INCLUDE_JVMCI
- if (_using_dep_values) {
- for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
- DepType dept = (DepType)deptv;
- GrowableArray* deps = _dep_values[dept];
- if (deps->length() == 0) continue;
- int stride = dep_args(dept);
- int ctxkj = dep_context_arg(dept); // -1 if no context arg
- assert(stride > 0, "sanity");
- for (int i = 0; i < deps->length(); i += stride) {
- jbyte code_byte = (jbyte)dept;
- int skipj = -1;
- if (ctxkj >= 0 && ctxkj+1 < stride) {
- Klass* ctxk = deps->at(i+ctxkj+0).as_klass(_oop_recorder);
- DepValue x = deps->at(i+ctxkj+1); // following argument
- if (ctxk == ctxk_encoded_as_null(dept, x.as_metadata(_oop_recorder))) {
- skipj = ctxkj; // we win: maybe one less oop to keep track of
- code_byte |= default_context_type_bit;
- }
- }
- bytes.write_byte(code_byte);
- for (int j = 0; j < stride; j++) {
- if (j == skipj) continue;
- DepValue v = deps->at(i+j);
- int idx = v.index();
- bytes.write_int(idx);
- }
- }
- }
- } else {
-#endif // INCLUDE_JVMCI
for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
DepType dept = (DepType)deptv;
GrowableArray* deps = _deps[dept];
@@ -562,9 +336,6 @@ void Dependencies::encode_content_bytes() {
}
}
}
-#if INCLUDE_JVMCI
- }
-#endif
// write a sentinel byte to mark the end
bytes.write_byte(end_marker);
@@ -587,8 +358,7 @@ const char* Dependencies::_dep_name[TYPE_LIMIT] = {
"evol_method",
"leaf_type",
"abstract_with_unique_concrete_subtype",
- "unique_concrete_method_2",
- "unique_concrete_method_4",
+ "unique_concrete_method",
"unique_implementor",
"no_finalizable_subclasses",
"call_site_target_value"
@@ -599,8 +369,7 @@ int Dependencies::_dep_args[TYPE_LIMIT] = {
1, // evol_method m
1, // leaf_type ctxk
2, // abstract_with_unique_concrete_subtype ctxk, k
- 2, // unique_concrete_method_2 ctxk, m
- 4, // unique_concrete_method_4 ctxk, m, resolved_klass, resolved_method
+ 4, // unique_concrete_method ctxk, m, resolved_klass, resolved_method
2, // unique_implementor ctxk, implementor
1, // no_finalizable_subclasses ctxk
2 // call_site_target_value call_site, method_handle
@@ -1242,172 +1011,6 @@ Klass* ConcreteSubtypeFinder::find_witness_anywhere(InstanceKlass* context_type)
return nullptr;
}
-class ConcreteMethodFinder : public AbstractClassHierarchyWalker {
- private:
- Symbol* _name;
- Symbol* _signature;
-
- // cache of method lookups
- Method* _found_methods[PARTICIPANT_LIMIT+1];
-
- bool is_witness(Klass* k);
-
- protected:
- virtual Klass* find_witness_in(KlassDepChange& changes);
- virtual Klass* find_witness_anywhere(InstanceKlass* context_type);
-
- public:
- bool witnessed_reabstraction_in_supers(Klass* k);
-
- ConcreteMethodFinder(Method* m, Klass* participant = nullptr) : AbstractClassHierarchyWalker(participant) {
- assert(m != nullptr && m->is_method(), "sanity");
- _name = m->name();
- _signature = m->signature();
-
- for (int i = 0; i < PARTICIPANT_LIMIT+1; i++) {
- _found_methods[i] = nullptr;
- }
- }
-
- // Note: If n==num_participants, returns nullptr.
- Method* found_method(uint n) {
- assert(n <= num_participants(), "oob");
- Method* fm = _found_methods[n];
- assert(n == num_participants() || fm != nullptr, "proper usage");
- if (fm != nullptr && fm->method_holder() != participant(n)) {
- // Default methods from interfaces can be added to classes. In
- // that case the holder of the method is not the class but the
- // interface where it's defined.
- assert(fm->is_default_method(), "sanity");
- return nullptr;
- }
- return fm;
- }
-
- void add_participant(Klass* participant) {
- AbstractClassHierarchyWalker::add_participant(participant);
- _found_methods[num_participants()] = nullptr;
- }
-
- bool record_witness(Klass* witness, Method* m) {
- _found_methods[num_participants()] = m;
- return AbstractClassHierarchyWalker::record_witness(witness);
- }
-
- private:
- static PerfCounter* _perf_find_witness_anywhere_calls_count;
- static PerfCounter* _perf_find_witness_anywhere_steps_count;
- static PerfCounter* _perf_find_witness_in_calls_count;
-
- public:
- static void init();
- static void print_statistics();
-};
-
-bool ConcreteMethodFinder::is_witness(Klass* k) {
- if (is_participant(k)) {
- return false; // do not report participant types
- }
- if (k->is_instance_klass()) {
- InstanceKlass* ik = InstanceKlass::cast(k);
- // Search class hierarchy first, skipping private implementations
- // as they never override any inherited methods
- Method* m = ik->find_instance_method(_name, _signature, Klass::PrivateLookupMode::skip);
- if (Dependencies::is_concrete_method(m, ik)) {
- return record_witness(k, m); // concrete method found
- } else {
- // Check for re-abstraction of method
- if (!ik->is_interface() && m != nullptr && m->is_abstract()) {
- // Found a matching abstract method 'm' in the class hierarchy.
- // This is fine iff 'k' is an abstract class and all concrete subtypes
- // of 'k' override 'm' and are participates of the current search.
- ConcreteSubtypeFinder wf;
- for (uint i = 0; i < num_participants(); i++) {
- Klass* p = participant(i);
- wf.add_participant(p);
- }
- Klass* w = wf.find_witness(ik);
- if (w != nullptr) {
- Method* wm = InstanceKlass::cast(w)->find_instance_method(_name, _signature, Klass::PrivateLookupMode::skip);
- if (!Dependencies::is_concrete_method(wm, w)) {
- // Found a concrete subtype 'w' which does not override abstract method 'm'.
- // Bail out because 'm' could be called with 'w' as receiver (leading to an
- // AbstractMethodError) and thus the method we are looking for is not unique.
- return record_witness(k, m);
- }
- }
- }
- // Check interface defaults also, if any exist.
- Array* default_methods = ik->default_methods();
- if (default_methods != nullptr) {
- Method* dm = ik->find_method(default_methods, _name, _signature);
- if (Dependencies::is_concrete_method(dm, nullptr)) {
- return record_witness(k, dm); // default method found
- }
- }
- return false; // no concrete method found
- }
- } else {
- return false; // no methods to find in an array type
- }
-}
-
-Klass* ConcreteMethodFinder::find_witness_in(KlassDepChange& changes) {
- // When looking for unexpected concrete methods, look beneath expected ones, to see if there are overrides.
- // * CX.m > CC.m > C'.m is not OK, if C'.m is new, and C' is the witness.
- Klass* new_type = changes.as_new_klass_change()->new_type();
- assert(!is_participant(new_type), "only old classes are participants");
- if (is_witness(new_type)) {
- return new_type;
- } else {
- // No witness found, but is_witness() doesn't detect method re-abstraction in case of spot-checking.
- if (witnessed_reabstraction_in_supers(new_type)) {
- return new_type;
- }
- }
- // No witness found. The dependency remains unbroken.
- return nullptr;
-}
-
-bool ConcreteMethodFinder::witnessed_reabstraction_in_supers(Klass* k) {
- if (!k->is_instance_klass()) {
- return false; // no methods to find in an array type
- } else {
- // Looking for a case when an abstract method is inherited into a concrete class.
- if (Dependencies::is_concrete_klass(k) && !k->is_interface()) {
- Method* m = InstanceKlass::cast(k)->find_instance_method(_name, _signature, Klass::PrivateLookupMode::skip);
- if (m != nullptr) {
- return false; // no reabstraction possible: local method found
- }
- for (InstanceKlass* super = k->java_super(); super != nullptr; super = super->java_super()) {
- m = super->find_instance_method(_name, _signature, Klass::PrivateLookupMode::skip);
- if (m != nullptr) { // inherited method found
- if (m->is_abstract() || m->is_overpass()) {
- return record_witness(super, m); // abstract method found
- }
- return false;
- }
- }
- // Miranda.
- return true;
- }
- return false;
- }
-}
-
-
-Klass* ConcreteMethodFinder::find_witness_anywhere(InstanceKlass* context_type) {
- // Walk hierarchy under a context type, looking for unexpected types.
- for (CountingClassHierarchyIterator iter(context_type); !iter.done(); iter.next()) {
- Klass* sub = iter.klass();
- if (is_witness(sub)) {
- return sub; // found a witness
- }
- }
- // No witness found. The dependency remains unbroken.
- return nullptr;
-}
-
// For some method m and some class ctxk (subclass of method holder),
// enumerate all distinct overrides of m in concrete subclasses of ctxk.
// It relies on vtable/itable information to perform method selection on each linked subclass
@@ -1769,60 +1372,6 @@ Klass* Dependencies::find_unique_concrete_subtype(InstanceKlass* ctxk) {
}
}
-// Try to determine whether root method in some context is concrete or not based on the information about the unique method
-// in that context. It exploits the fact that concrete root method is always inherited into the context when there's a unique method.
-// Hence, unique method holder is always a supertype of the context class when root method is concrete.
-// Examples for concrete_root_method
-// C (C.m uniqm)
-// |
-// CX (ctxk) uniqm is inherited into context.
-//
-// CX (ctxk) (CX.m uniqm) here uniqm is defined in ctxk.
-// Examples for !concrete_root_method
-// CX (ctxk)
-// |
-// C (C.m uniqm) uniqm is in subtype of ctxk.
-bool Dependencies::is_concrete_root_method(Method* uniqm, InstanceKlass* ctxk) {
- if (uniqm == nullptr) {
- return false; // match Dependencies::is_concrete_method() behavior
- }
- // Theoretically, the "direction" of subtype check matters here.
- // On one hand, in case of interface context with a single implementor, uniqm can be in a superclass of the implementor which
- // is not related to context class.
- // On another hand, uniqm could come from an interface unrelated to the context class, but right now it is not possible:
- // it is required that uniqm->method_holder() is the participant (uniqm->method_holder() <: ctxk), hence a default method
- // can't be used as unique.
- if (ctxk->is_interface()) {
- InstanceKlass* implementor = ctxk->implementor();
- assert(implementor != ctxk, "single implementor only"); // should have been invalidated earlier
- ctxk = implementor;
- }
- InstanceKlass* holder = uniqm->method_holder();
- assert(!holder->is_interface(), "no default methods allowed");
- assert(ctxk->is_subclass_of(holder) || holder->is_subclass_of(ctxk), "not related");
- return ctxk->is_subclass_of(holder);
-}
-
-// If a class (or interface) has a unique concrete method uniqm, return nullptr.
-// Otherwise, return a class that contains an interfering method.
-Klass* Dependencies::check_unique_concrete_method(InstanceKlass* ctxk,
- Method* uniqm,
- NewKlassDepChange* changes) {
- ConcreteMethodFinder wf(uniqm, uniqm->method_holder());
- Klass* k = wf.find_witness(ctxk, changes);
- if (k != nullptr) {
- return k;
- }
- if (!Dependencies::is_concrete_root_method(uniqm, ctxk) || changes != nullptr) {
- Klass* conck = find_witness_AME(ctxk, uniqm, changes);
- if (conck != nullptr) {
- // Found a concrete subtype 'conck' which does not override abstract root method.
- return conck;
- }
- }
- return nullptr;
-}
-
Klass* Dependencies::check_unique_implementor(InstanceKlass* ctxk, Klass* uniqk, NewKlassDepChange* changes) {
assert(ctxk->is_interface(), "sanity");
assert(ctxk->nof_implementors() > 0, "no implementors");
@@ -1833,107 +1382,6 @@ Klass* Dependencies::check_unique_implementor(InstanceKlass* ctxk, Klass* uniqk,
return ctxk; // no unique implementor
}
-// Search for AME.
-// There are two version of checks.
-// 1) Spot checking version(Classload time). Newly added class is checked for AME.
-// Checks whether abstract/overpass method is inherited into/declared in newly added concrete class.
-// 2) Compile time analysis for abstract/overpass(abstract klass) root_m. The non uniqm subtrees are checked for concrete classes.
-Klass* Dependencies::find_witness_AME(InstanceKlass* ctxk, Method* m, KlassDepChange* changes) {
- if (m != nullptr) {
- if (changes != nullptr) {
- // Spot checking version.
- ConcreteMethodFinder wf(m);
- Klass* new_type = changes->as_new_klass_change()->new_type();
- if (wf.witnessed_reabstraction_in_supers(new_type)) {
- return new_type;
- }
- } else {
- // Note: It is required that uniqm->method_holder() is the participant (see ClassHierarchyWalker::found_method()).
- ConcreteSubtypeFinder wf(m->method_holder());
- Klass* conck = wf.find_witness(ctxk);
- if (conck != nullptr) {
- Method* cm = InstanceKlass::cast(conck)->find_instance_method(m->name(), m->signature(), Klass::PrivateLookupMode::skip);
- if (!Dependencies::is_concrete_method(cm, conck)) {
- return conck;
- }
- }
- }
- }
- return nullptr;
-}
-
-// This function is used by find_unique_concrete_method(non vtable based)
-// to check whether subtype method overrides the base method.
-static bool overrides(Method* sub_m, Method* base_m) {
- assert(base_m != nullptr, "base method should be non null");
- if (sub_m == nullptr) {
- return false;
- }
- /**
- * If base_m is public or protected then sub_m always overrides.
- * If base_m is !public, !protected and !private (i.e. base_m is package private)
- * then sub_m should be in the same package as that of base_m.
- * For package private base_m this is conservative approach as it allows only subset of all allowed cases in
- * the jvm specification.
- **/
- if (base_m->is_public() || base_m->is_protected() ||
- base_m->method_holder()->is_same_class_package(sub_m->method_holder())) {
- return true;
- }
- return false;
-}
-
-// Find the set of all non-abstract methods under ctxk that match m.
-// (The method m must be defined or inherited in ctxk.)
-// Include m itself in the set, unless it is abstract.
-// If this set has exactly one element, return that element.
-Method* Dependencies::find_unique_concrete_method(InstanceKlass* ctxk, Method* m, Klass** participant) {
- // Return nullptr if m is marked old; must have been a redefined method.
- if (m->is_old()) {
- return nullptr;
- }
- if (m->is_default_method()) {
- return nullptr; // not supported
- }
- assert(verify_method_context(ctxk, m), "proper context");
- ConcreteMethodFinder wf(m);
- wf.record_witnesses(1);
- Klass* wit = wf.find_witness(ctxk);
- if (wit != nullptr) return nullptr; // Too many witnesses.
- Method* fm = wf.found_method(0); // Will be nullptr if num_parts == 0.
- if (participant != nullptr) {
- (*participant) = wf.participant(0);
- }
- if (!Dependencies::is_concrete_method(fm, nullptr)) {
- fm = nullptr; // ignore abstract methods
- }
- if (Dependencies::is_concrete_method(m, ctxk)) {
- if (fm == nullptr) {
- // It turns out that m was always the only implementation.
- fm = m;
- } else if (fm != m) {
- // Two conflicting implementations after all.
- // (This can happen if m is inherited into ctxk and fm overrides it.)
- return nullptr;
- }
- } else if (Dependencies::find_witness_AME(ctxk, fm) != nullptr) {
- // Found a concrete subtype which does not override abstract root method.
- return nullptr;
- } else if (!overrides(fm, m)) {
- // Found method doesn't override abstract root method.
- return nullptr;
- }
- assert(Dependencies::is_concrete_root_method(fm, ctxk) == Dependencies::is_concrete_method(m, ctxk), "mismatch");
-#ifndef PRODUCT
- // Make sure the dependency mechanism will pass this discovery:
- if (VerifyDependencies && fm != nullptr) {
- guarantee(nullptr == (void *)check_unique_concrete_method(ctxk, fm),
- "verify dep.");
- }
-#endif //PRODUCT
- return fm;
-}
-
// If a class (or interface) has a unique concrete method uniqm, return nullptr.
// Otherwise, return a class that contains an interfering method.
Klass* Dependencies::check_unique_concrete_method(InstanceKlass* ctxk,
@@ -2008,18 +1456,6 @@ Method* Dependencies::find_unique_concrete_method(InstanceKlass* ctxk, Method* m
}
#endif // PRODUCT
assert(fm == nullptr || !fm->is_abstract(), "sanity");
- // Old CHA conservatively reports concrete methods in abstract classes
- // irrespective of whether they have concrete subclasses or not.
- // Also, abstract root method case is not fully supported.
-#ifdef ASSERT
- Klass* uniqp = nullptr;
- Method* uniqm = Dependencies::find_unique_concrete_method(ctxk, m, &uniqp);
- assert(uniqm == nullptr || uniqm == fm ||
- m->is_abstract() ||
- uniqm->method_holder()->is_abstract() ||
- (fm == nullptr && uniqm != nullptr && uniqp != nullptr && !InstanceKlass::cast(uniqp)->is_linked()),
- "sanity");
-#endif // ASSERT
return fm;
}
@@ -2078,10 +1514,7 @@ Klass* Dependencies::DepStream::check_new_klass_dependency(NewKlassDepChange* ch
case abstract_with_unique_concrete_subtype:
witness = check_abstract_with_unique_concrete_subtype(context_type(), type_argument(1), changes);
break;
- case unique_concrete_method_2:
- witness = check_unique_concrete_method(context_type(), method_argument(1), changes);
- break;
- case unique_concrete_method_4:
+ case unique_concrete_method:
witness = check_unique_concrete_method(context_type(), method_argument(1), type_argument(2), method_argument(3), changes);
break;
case unique_implementor:
@@ -2102,18 +1535,13 @@ Klass* Dependencies::DepStream::check_klass_init_dependency(KlassInitDepChange*
assert_locked_or_safepoint(Compile_lock);
Dependencies::check_valid_dependency_type(type());
- // No new types added. Only unique_concrete_method_4 is sensitive to class initialization changes.
- Klass* witness = nullptr;
- switch (type()) {
- case unique_concrete_method_4:
- witness = check_unique_concrete_method(context_type(), method_argument(1), type_argument(2), method_argument(3), changes);
- break;
- default:
- witness = nullptr;
- break;
+ // No new types added. Only unique_concrete_method is sensitive to class initialization changes.
+ if (type() == unique_concrete_method) {
+ Klass* witness = check_unique_concrete_method(context_type(), method_argument(1), type_argument(2), method_argument(3), changes);
+ trace_and_log_witness(witness);
+ return witness;
}
- trace_and_log_witness(witness);
- return witness;
+ return nullptr;
}
Klass* Dependencies::DepStream::check_klass_dependency(KlassDepChange* changes) {
diff --git a/src/hotspot/share/code/dependencies.hpp b/src/hotspot/share/code/dependencies.hpp
index 582a08183f9..b7b43b98a70 100644
--- a/src/hotspot/share/code/dependencies.hpp
+++ b/src/hotspot/share/code/dependencies.hpp
@@ -120,30 +120,23 @@ class Dependencies: public ResourceObj {
// An abstract class CX has exactly one concrete subtype CC.
abstract_with_unique_concrete_subtype,
- // Given a method M1 and a context class CX, the set MM(CX, M1) of
+ // Given a method M1 and a context class CX, the set MM(CX, M1, RC1, RM1) of
// "concrete matching methods" in CX of M1 is the set of every
// concrete M2 for which it is possible to create an invokevirtual
// or invokeinterface call site that can reach either M1 or M2.
// That is, M1 and M2 share a name, signature, and vtable index.
- // We wish to notice when the set MM(CX, M1) is just {M1}, or
+ // We wish to notice when the set MM(CX, M1, RC1, RM1) is just {M1}, or
// perhaps a set of two {M1,M2}, and issue dependencies on this.
- // The set MM(CX, M1) can be computed by starting with any matching
+ // The set MM(CX, M1, RC1, RM1) can be computed by starting with any matching
// concrete M2 that is inherited into CX, and then walking the
// subtypes* of CX looking for concrete definitions.
- // The parameters to this dependency are the method M1 and the
- // context class CX. M1 must be either inherited in CX or defined
- // in a subtype* of CX. It asserts that MM(CX, M1) is no greater
- // than {M1}.
- unique_concrete_method_2, // one unique concrete method under CX
-
- // In addition to the method M1 and the context class CX, the parameters
- // to this dependency are the resolved class RC1 and the
- // resolved method RM1. It asserts that MM(CX, M1, RC1, RM1)
- // is no greater than {M1}. RC1 and RM1 are used to improve the precision
- // of the analysis.
- unique_concrete_method_4, // one unique concrete method under CX
+ // The parameters to this dependency are the context class CX, the method M1,
+ // the resolved class RC1, and the resolved method RM1. M1 must be either inherited in CX
+ // or defined in a subtype* of CX. It asserts that MM(CX, M1, RC1, RM1) is
+ // no greater than {M1}. RC1 and RM1 are used to improve the precision of the analysis.
+ unique_concrete_method, // one unique concrete method under CX
// This dependency asserts that interface CX has a unique implementor class.
unique_implementor, // one unique implementor under CX
@@ -195,69 +188,10 @@ class Dependencies: public ResourceObj {
static void check_valid_dependency_type(DepType dept);
-#if INCLUDE_JVMCI
- // A Metadata* or object value recorded in an OopRecorder
- class DepValue {
- private:
- // Unique identifier of the value within the associated OopRecorder that
- // encodes both the category of the value (0: invalid, positive: metadata, negative: object)
- // and the index within a category specific array (metadata: index + 1, object: -(index + 1))
- int _id;
-
- public:
- DepValue() : _id(0) {}
- DepValue(OopRecorder* rec, Metadata* metadata, DepValue* candidate = nullptr) {
- assert(candidate == nullptr || candidate->is_metadata(), "oops");
- if (candidate != nullptr && candidate->as_metadata(rec) == metadata) {
- _id = candidate->_id;
- } else {
- _id = rec->find_index(metadata) + 1;
- }
- }
- DepValue(OopRecorder* rec, jobject obj, DepValue* candidate = nullptr) {
- assert(candidate == nullptr || candidate->is_object(), "oops");
- if (candidate != nullptr && candidate->as_object(rec) == obj) {
- _id = candidate->_id;
- } else {
- _id = -(rec->find_index(obj) + 1);
- }
- }
-
- // Used to sort values in ascending order of index() with metadata values preceding object values
- int sort_key() const { return -_id; }
-
- bool operator == (const DepValue& other) const { return other._id == _id; }
-
- bool is_valid() const { return _id != 0; }
- int index() const { assert(is_valid(), "oops"); return _id < 0 ? -(_id + 1) : _id - 1; }
- bool is_metadata() const { assert(is_valid(), "oops"); return _id > 0; }
- bool is_object() const { assert(is_valid(), "oops"); return _id < 0; }
-
- Metadata* as_metadata(OopRecorder* rec) const { assert(is_metadata(), "oops"); return rec->metadata_at(index()); }
- Klass* as_klass(OopRecorder* rec) const {
- Metadata* m = as_metadata(rec);
- assert(m != nullptr, "as_metadata returned nullptr");
- assert(m->is_klass(), "oops");
- return (Klass*) m;
- }
- Method* as_method(OopRecorder* rec) const {
- Metadata* m = as_metadata(rec);
- assert(m != nullptr, "as_metadata returned nullptr");
- assert(m->is_method(), "oops");
- return (Method*) m;
- }
- jobject as_object(OopRecorder* rec) const { assert(is_object(), "oops"); return rec->oop_at(index()); }
- };
-#endif // INCLUDE_JVMCI
-
private:
// State for writing a new set of dependencies:
GrowableArray* _dep_seen; // (seen[h->ident] & (1<* _deps[TYPE_LIMIT];
-#if INCLUDE_JVMCI
- bool _using_dep_values;
- GrowableArray* _dep_values[TYPE_LIMIT];
-#endif
static const char* _dep_name[TYPE_LIMIT];
static int _dep_args[TYPE_LIMIT];
@@ -276,25 +210,8 @@ class Dependencies: public ResourceObj {
return (seen & (1<at_grow(x_id, 0);
- _dep_seen->at_put(x_id, seen | (1<* deps,
int ctxk_i, ciKlass* ctxk);
-#if INCLUDE_JVMCI
- bool maybe_merge_ctxk(GrowableArray* deps,
- int ctxk_i, DepValue ctxk);
-#endif
void sort_all_deps();
size_t estimate_size_in_bytes();
@@ -316,9 +233,6 @@ class Dependencies: public ResourceObj {
Dependencies(ciEnv* env) {
initialize(env);
}
-#if INCLUDE_JVMCI
- Dependencies(Arena* arena, OopRecorder* oop_recorder, CompileLog* log);
-#endif
private:
// Check for a valid context type.
@@ -349,36 +263,10 @@ class Dependencies: public ResourceObj {
void assert_evol_method(ciMethod* m);
void assert_leaf_type(ciKlass* ctxk);
void assert_abstract_with_unique_concrete_subtype(ciKlass* ctxk, ciKlass* conck);
- void assert_unique_concrete_method(ciKlass* ctxk, ciMethod* uniqm);
void assert_unique_concrete_method(ciKlass* ctxk, ciMethod* uniqm, ciKlass* resolved_klass, ciMethod* resolved_method);
void assert_unique_implementor(ciInstanceKlass* ctxk, ciInstanceKlass* uniqk);
void assert_has_no_finalizable_subclasses(ciKlass* ctxk);
void assert_call_site_target_value(ciCallSite* call_site, ciMethodHandle* method_handle);
-#if INCLUDE_JVMCI
- private:
- static void check_ctxk(Klass* ctxk) {
- assert(ctxk->is_instance_klass(), "java types only");
- }
- static void check_ctxk_abstract(Klass* ctxk) {
- check_ctxk(ctxk);
- assert(ctxk->is_abstract(), "must be abstract");
- }
- static void check_unique_method(Klass* ctxk, Method* m) {
- assert(!m->can_be_statically_bound(InstanceKlass::cast(ctxk)), "redundant");
- }
-
- void assert_common_1(DepType dept, DepValue x);
- void assert_common_2(DepType dept, DepValue x0, DepValue x1);
-
- public:
- void assert_evol_method(Method* m);
- void assert_has_no_finalizable_subclasses(Klass* ctxk);
- void assert_leaf_type(Klass* ctxk);
- void assert_unique_implementor(InstanceKlass* ctxk, InstanceKlass* uniqk);
- void assert_unique_concrete_method(Klass* ctxk, Method* uniqm);
- void assert_abstract_with_unique_concrete_subtype(Klass* ctxk, Klass* conck);
- void assert_call_site_target_value(oop callSite, oop methodHandle);
-#endif // INCLUDE_JVMCI
// Define whether a given method or type is concrete.
// These methods define the term "concrete" as used in this module.
@@ -393,9 +281,6 @@ class Dependencies: public ResourceObj {
static bool is_concrete_method(Method* m, Klass* k); // m is invocable
static Klass* find_finalizable_subclass(InstanceKlass* ik);
- static bool is_concrete_root_method(Method* uniqm, InstanceKlass* ctxk);
- static Klass* find_witness_AME(InstanceKlass* ctxk, Method* m, KlassDepChange* changes = nullptr);
-
// These versions of the concreteness queries work through the CI.
// The CI versions are allowed to skew sometimes from the VM
// (oop-based) versions. The cost of such a difference is a
@@ -423,7 +308,6 @@ class Dependencies: public ResourceObj {
static Klass* check_leaf_type(InstanceKlass* ctxk);
static Klass* check_abstract_with_unique_concrete_subtype(InstanceKlass* ctxk, Klass* conck, NewKlassDepChange* changes = nullptr);
static Klass* check_unique_implementor(InstanceKlass* ctxk, Klass* uniqk, NewKlassDepChange* changes = nullptr);
- static Klass* check_unique_concrete_method(InstanceKlass* ctxk, Method* uniqm, NewKlassDepChange* changes = nullptr);
static Klass* check_unique_concrete_method(InstanceKlass* ctxk, Method* uniqm, Klass* resolved_klass, Method* resolved_method, KlassDepChange* changes = nullptr);
static Klass* check_has_no_finalizable_subclasses(InstanceKlass* ctxk, NewKlassDepChange* changes = nullptr);
static Klass* check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes = nullptr);
@@ -443,8 +327,7 @@ class Dependencies: public ResourceObj {
// Detecting possible new assertions:
static Klass* find_unique_concrete_subtype(InstanceKlass* ctxk);
- static Method* find_unique_concrete_method(InstanceKlass* ctxk, Method* m,
- Klass** participant = nullptr); // out parameter
+
static Method* find_unique_concrete_method(InstanceKlass* ctxk, Method* m, Klass* resolved_klass, Method* resolved_method);
#ifdef ASSERT
diff --git a/src/hotspot/share/code/exceptionHandlerTable.hpp b/src/hotspot/share/code/exceptionHandlerTable.hpp
index 9d7981f392c..1dfab530dad 100644
--- a/src/hotspot/share/code/exceptionHandlerTable.hpp
+++ b/src/hotspot/share/code/exceptionHandlerTable.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -155,13 +155,6 @@ public:
void set_size( uint size );
void append( uint exec_off, uint cont_off );
-#if INCLUDE_JVMCI
- void add_deoptimize(uint exec_off) {
- // Use the same offset as a marker value for deoptimization
- append(exec_off, exec_off);
- }
-#endif
-
// Returns the offset to continue execution at. If the returned
// value equals exec_off then the dispatch is expected to be a
// deoptimization instead.
diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
index 815c0c7b4b0..27f01797d39 100644
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@@ -86,9 +86,6 @@
#include "utilities/globalDefinitions.hpp"
#include "utilities/hashTable.hpp"
#include "utilities/xmlstream.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciRuntime.hpp"
-#endif
#ifdef DTRACE_ENABLED
@@ -144,10 +141,6 @@ struct java_nmethod_stats_struct {
uint handler_table_size;
uint scopes_pcs_size;
uint scopes_data_size;
-#if INCLUDE_JVMCI
- uint speculations_size;
- uint jvmci_data_size;
-#endif
void note_nmethod(nmethod* nm) {
nmethod_count += 1;
@@ -165,10 +158,6 @@ struct java_nmethod_stats_struct {
dependencies_size += nm->dependencies_size();
handler_table_size += nm->handler_table_size();
nul_chk_table_size += nm->nul_chk_table_size();
-#if INCLUDE_JVMCI
- speculations_size += nm->speculations_size();
- jvmci_data_size += nm->jvmci_data_size();
-#endif
}
void print_nmethod_stats(const char* name) {
if (nmethod_count == 0) return;
@@ -203,11 +192,6 @@ struct java_nmethod_stats_struct {
if (metadata_size != 0) {
tty->print_cr(" metadata = %u (%f%%)", metadata_size, (metadata_size * 100.0f)/total_mut_size);
}
-#if INCLUDE_JVMCI
- if (jvmci_data_size != 0) {
- tty->print_cr(" JVMCI data = %u (%f%%)", jvmci_data_size, (jvmci_data_size * 100.0f)/total_mut_size);
- }
-#endif
if (total_immut_size != 0) {
tty->print_cr(" immutable data = %u (%f%%)", total_immut_size, (total_immut_size * 100.0f)/total_size);
}
@@ -226,11 +210,6 @@ struct java_nmethod_stats_struct {
if (scopes_data_size != 0) {
tty->print_cr(" scopes data = %u (%f%%)", scopes_data_size, (scopes_data_size * 100.0f)/total_immut_size);
}
-#if INCLUDE_JVMCI
- if (speculations_size != 0) {
- tty->print_cr(" speculations = %u (%f%%)", speculations_size, (speculations_size * 100.0f)/total_immut_size);
- }
-#endif
}
};
@@ -289,9 +268,6 @@ static java_nmethod_stats_struct c1_java_nmethod_stats;
#ifdef COMPILER2
static java_nmethod_stats_struct c2_java_nmethod_stats;
#endif
-#if INCLUDE_JVMCI
-static java_nmethod_stats_struct jvmci_java_nmethod_stats;
-#endif
static java_nmethod_stats_struct unknown_java_nmethod_stats;
static native_nmethod_stats_struct native_nmethod_stats;
@@ -307,11 +283,6 @@ static void note_java_nmethod(nmethod* nm) {
if (nm->is_compiled_by_c2()) {
c2_java_nmethod_stats.note_nmethod(nm);
} else
-#endif
-#if INCLUDE_JVMCI
- if (nm->is_compiled_by_jvmci()) {
- jvmci_java_nmethod_stats.note_nmethod(nm);
- } else
#endif
{
unknown_java_nmethod_stats.note_nmethod(nm);
@@ -926,7 +897,7 @@ void nmethod::cleanup_inline_caches_impl(bool unloading_occurred, bool clean_all
}
}
-address nmethod::continuation_for_implicit_exception(address pc, bool for_div0_check) {
+address nmethod::continuation_for_implicit_exception(address pc) {
// Exception happened outside inline-cache check code => we are inside
// an active nmethod => use cpc to determine a return address
int exception_offset = int(pc - code_begin());
@@ -953,18 +924,7 @@ address nmethod::continuation_for_implicit_exception(address pc, bool for_div0_c
// Let the normal error handling report the exception
return nullptr;
}
- if (cont_offset == exception_offset) {
-#if INCLUDE_JVMCI
- Deoptimization::DeoptReason deopt_reason = for_div0_check ? Deoptimization::Reason_div0_check : Deoptimization::Reason_null_check;
- JavaThread *thread = JavaThread::current();
- thread->set_jvmci_implicit_exception_pc(pc);
- thread->set_pending_deoptimization(Deoptimization::make_trap_request(deopt_reason,
- Deoptimization::Action_reinterpret));
- return (SharedRuntime::deopt_blob()->implicit_exception_uncommon_trap());
-#else
- ShouldNotReachHere();
-#endif
- }
+ guarantee(cont_offset != exception_offset, "continuation offset and exception offset must be different");
return code_begin() + cont_offset;
}
@@ -1062,11 +1022,9 @@ static void assert_no_oops_or_metadata(nmethod* nm) {
}
#endif
-static int required_mutable_data_size(CodeBuffer* code_buffer,
- int jvmci_data_size = 0) {
+static int required_mutable_data_size(CodeBuffer* code_buffer) {
return align_up(code_buffer->total_relocation_size(), oopSize) +
- align_up(code_buffer->total_metadata_size(), oopSize) +
- align_up(jvmci_data_size, oopSize);
+ align_up(code_buffer->total_metadata_size(), oopSize);
}
nmethod* nmethod::new_native_nmethod(const methodHandle& method,
@@ -1130,13 +1088,8 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
ExceptionHandlerTable* handler_table,
ImplicitExceptionTable* nul_chk_table,
AbstractCompiler* compiler,
- CompLevel comp_level
-#if INCLUDE_JVMCI
- , char* speculations,
- int speculations_len,
- JVMCINMethodData* jvmci_data
-#endif
-)
+ CompLevel comp_level,
+ Flags flags)
{
assert(debug_info->oop_recorder() == code_buffer->oop_recorder(), "shared OR");
code_buffer->finalize_oop_references(method);
@@ -1149,9 +1102,6 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
+ align_up((int)dependencies->size_in_bytes(), oopSize)
+ align_up(handler_table->size_in_bytes() , oopSize)
+ align_up(nul_chk_table->size_in_bytes() , oopSize)
-#if INCLUDE_JVMCI
- + align_up(speculations_len , oopSize)
-#endif
+ align_up(debug_info->data_size() , oopSize);
// First, allocate space for immutable data in C heap.
@@ -1165,8 +1115,7 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
}
}
- int mutable_data_size = required_mutable_data_size(code_buffer
- JVMCI_ONLY(COMMA (compiler->is_jvmci() ? jvmci_data->size() : 0)));
+ int mutable_data_size = required_mutable_data_size(code_buffer);
{
MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
@@ -1175,13 +1124,7 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
nmethod(method(), compiler->type(), nmethod_size, immutable_data_size, mutable_data_size,
compile_id, entry_bci, immutable_data, offsets, orig_pc_offset,
debug_info, dependencies, code_buffer, frame_size, oop_maps,
- handler_table, nul_chk_table, compiler, comp_level
-#if INCLUDE_JVMCI
- , speculations,
- speculations_len,
- jvmci_data
-#endif
- );
+ handler_table, nul_chk_table, compiler, comp_level, flags);
if (nm != nullptr) {
// To make dependency checking during class loading fast, record
@@ -1229,13 +1172,9 @@ void nmethod::init_defaults(CodeBuffer *code_buffer, CodeOffsets* offsets) {
_is_unloading_state = 0;
_state = not_installed;
- _has_unsafe_access = 0;
- _has_wide_vectors = 0;
- _has_monitors = 0;
- _has_scoped_access = 0;
- _has_flushed_dependencies = 0;
- _is_unlinked = 0;
- _load_reported = 0; // jvmti state
+ _has_flushed_dependencies = false;
+ _is_unlinked = false;
+ _load_reported = false; // jvmti state
_deoptimization_status = not_marked;
@@ -1319,7 +1258,6 @@ nmethod::nmethod(
_unwind_handler_offset = 0;
int metadata_size = align_up(code_buffer->total_metadata_size(), wordSize);
- JVMCI_ONLY( _metadata_size = metadata_size; )
assert(_mutable_data_size == _relocation_size + metadata_size,
"wrong mutable data size: %d != %d + %d",
_mutable_data_size, _relocation_size, metadata_size);
@@ -1331,9 +1269,6 @@ nmethod::nmethod(
_handler_table_offset = 0;
_scopes_pcs_offset = 0;
_scopes_data_offset = 0;
-#if INCLUDE_JVMCI
- _speculations_offset = 0;
-#endif
_immutable_data_ref_count_offset = 0;
code_buffer->copy_code_and_locs_to(this);
@@ -1386,7 +1321,8 @@ nmethod::nmethod(
}
-nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm._header_size)
+nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm._header_size),
+ _flags(nm._flags)
{
if (nm._oop_maps != nullptr) {
@@ -1456,16 +1392,10 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
_deopt_handler_entry_offset = nm._deopt_handler_entry_offset;
_unwind_handler_offset = nm._unwind_handler_offset;
_num_stack_arg_slots = nm._num_stack_arg_slots;
-#if INCLUDE_JVMCI
- _metadata_size = nm._metadata_size;
-#endif
_nul_chk_table_offset = nm._nul_chk_table_offset;
_handler_table_offset = nm._handler_table_offset;
_scopes_pcs_offset = nm._scopes_pcs_offset;
_scopes_data_offset = nm._scopes_data_offset;
-#if INCLUDE_JVMCI
- _speculations_offset = nm._speculations_offset;
-#endif
_immutable_data_ref_count_offset = nm._immutable_data_ref_count_offset;
// Increment number of references to immutable data to share it between nmethods
@@ -1483,10 +1413,6 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
_is_unloading_state = nm._is_unloading_state;
_state = not_installed;
- _has_unsafe_access = nm._has_unsafe_access;
- _has_wide_vectors = nm._has_wide_vectors;
- _has_monitors = nm._has_monitors;
- _has_scoped_access = nm._has_scoped_access;
_has_flushed_dependencies = nm._has_flushed_dependencies;
_is_unlinked = nm._is_unlinked;
_load_reported = nm._load_reported;
@@ -1631,12 +1557,6 @@ bool nmethod::is_relocatable() {
return false;
}
-#if INCLUDE_JVMCI
- if (jvmci_nmethod_data() != nullptr && jvmci_nmethod_data()->has_mirror()) {
- return false;
- }
-#endif
-
if (is_unloading()) {
return false;
}
@@ -1684,19 +1604,15 @@ nmethod::nmethod(
ExceptionHandlerTable* handler_table,
ImplicitExceptionTable* nul_chk_table,
AbstractCompiler* compiler,
- CompLevel comp_level
-#if INCLUDE_JVMCI
- , char* speculations,
- int speculations_len,
- JVMCINMethodData* jvmci_data
-#endif
- )
+ CompLevel comp_level,
+ Flags flags)
: CodeBlob("nmethod", CodeBlobKind::Nmethod, code_buffer, nmethod_size, sizeof(nmethod),
offsets->value(CodeOffsets::Frame_Complete), frame_size, oop_maps, false, mutable_data_size),
_deoptimization_generation(0),
_gc_epoch(CodeCache::gc_epoch()),
_method(method),
- _osr_link(nullptr)
+ _osr_link(nullptr),
+ _flags(flags)
{
assert(debug_info->oop_recorder() == code_buffer->oop_recorder(), "shared OR");
{
@@ -1716,36 +1632,20 @@ nmethod::nmethod(
set_ctable_begin(header_begin() + content_offset());
-#if INCLUDE_JVMCI
- if (compiler->is_jvmci()) {
- // JVMCI might not produce any stub sections
- if (offsets->value(CodeOffsets::Exceptions) != -1) {
- _exception_offset = code_offset() + offsets->value(CodeOffsets::Exceptions);
- } else {
- _exception_offset = -1;
- }
- if (offsets->value(CodeOffsets::Deopt) != -1) {
- _deopt_handler_entry_offset = code_offset() + offsets->value(CodeOffsets::Deopt);
- } else {
- _deopt_handler_entry_offset = -1;
- }
- } else
-#endif
- {
- // Exception handler and deopt handler are in the stub section
- assert(offsets->value(CodeOffsets::Deopt ) != -1, "must be set");
+ // Exception handler and deopt handler are in the stub section
+ assert(offsets->value(CodeOffsets::Deopt ) != -1, "must be set");
- bool has_exception_handler = (offsets->value(CodeOffsets::Exceptions) != -1);
- assert(has_exception_handler == (compiler->type() != compiler_c2),
- "C2 compiler doesn't provide exception handler stub code.");
- if (has_exception_handler) {
- _exception_offset = _stub_offset + offsets->value(CodeOffsets::Exceptions);
- } else {
- _exception_offset = -1;
- }
-
- _deopt_handler_entry_offset = _stub_offset + offsets->value(CodeOffsets::Deopt);
+ bool has_exception_handler = (offsets->value(CodeOffsets::Exceptions) != -1);
+ assert(has_exception_handler == (compiler->type() != compiler_c2),
+ "C2 compiler doesn't provide exception handler stub code.");
+ if (has_exception_handler) {
+ _exception_offset = _stub_offset + offsets->value(CodeOffsets::Exceptions);
+ } else {
+ _exception_offset = -1;
}
+
+ _deopt_handler_entry_offset = _stub_offset + offsets->value(CodeOffsets::Deopt);
+
if (offsets->value(CodeOffsets::UnwindHandler) != -1) {
// C1 generates UnwindHandler at the end of instructions section.
// Calculate positive offset as distance between the start of stubs section
@@ -1757,11 +1657,9 @@ nmethod::nmethod(
}
int metadata_size = align_up(code_buffer->total_metadata_size(), wordSize);
- JVMCI_ONLY( _metadata_size = metadata_size; )
- int jvmci_data_size = 0 JVMCI_ONLY( + align_up(compiler->is_jvmci() ? jvmci_data->size() : 0, oopSize));
- assert(_mutable_data_size == _relocation_size + metadata_size + jvmci_data_size,
- "wrong mutable data size: %d != %d + %d + %d",
- _mutable_data_size, _relocation_size, metadata_size, jvmci_data_size);
+ assert(_mutable_data_size == _relocation_size + metadata_size,
+ "wrong mutable data size: %d != %d + %d",
+ _mutable_data_size, _relocation_size, metadata_size);
assert(nmethod_size == data_end() - header_begin(), "wrong nmethod size: %d != %d",
nmethod_size, (int)(code_end() - header_begin()));
@@ -1778,12 +1676,7 @@ nmethod::nmethod(
_scopes_pcs_offset = _handler_table_offset + align_up(handler_table->size_in_bytes(), oopSize);
_scopes_data_offset = _scopes_pcs_offset + adjust_pcs_size(debug_info->pcs_size());
-#if INCLUDE_JVMCI
- _speculations_offset = _scopes_data_offset + align_up(debug_info->data_size(), oopSize);
- _immutable_data_ref_count_offset = _speculations_offset + align_up(speculations_len, oopSize);
-#else
_immutable_data_ref_count_offset = _scopes_data_offset + align_up(debug_info->data_size(), oopSize);
-#endif
DEBUG_ONLY( int immutable_data_end_offset = _immutable_data_ref_count_offset + ImmutableDataRefCountSize; )
assert(immutable_data_end_offset <= immutable_data_size, "wrong read-only data size: %d > %d",
immutable_data_end_offset, immutable_data_size);
@@ -1799,30 +1692,17 @@ nmethod::nmethod(
// Create cache after PcDesc data is copied - it will be used to initialize cache
_pc_desc_container = new PcDescContainer(scopes_pcs_begin());
-#if INCLUDE_JVMCI
- if (compiler->is_jvmci()) {
- // Initialize the JVMCINMethodData object inlined into nm
- jvmci_nmethod_data()->copy(jvmci_data);
- }
-#endif
-
// Copy contents of ExceptionHandlerTable to nmethod
handler_table->copy_to(this);
nul_chk_table->copy_to(this);
-#if INCLUDE_JVMCI
- // Copy speculations to nmethod
- if (speculations_size() != 0) {
- memcpy(speculations_begin(), speculations, speculations_len);
- }
-#endif
init_immutable_data_ref_count();
post_init();
// we use the information of entry points to find out if a method is
// static or non static
- assert(compiler->is_c2() || compiler->is_jvmci() ||
+ assert(compiler->is_c2() ||
_method->is_static() == (entry_point() == verified_entry_point()),
" entry points must be same for static methods and vice versa");
}
@@ -1838,16 +1718,6 @@ void nmethod::log_identity(xmlStream* log) const {
if (TieredCompilation) {
log->print(" level='%d'", comp_level());
}
-#if INCLUDE_JVMCI
- if (jvmci_nmethod_data() != nullptr) {
- const char* jvmci_name = jvmci_nmethod_data()->name();
- if (jvmci_name != nullptr) {
- log->print(" jvmci_mirror_name='");
- log->text("%s", jvmci_name);
- log->print("'");
- }
- }
-#endif
}
@@ -1956,10 +1826,6 @@ void nmethod::print_nmethod(bool printmethod) {
tty->cr();
tty->print_cr("============================= C1-compiled nmethod ==============================");
}
- if (is_compiled_by_jvmci()) {
- tty->cr();
- tty->print_cr("=========================== JVMCI-compiled nmethod =============================");
- }
tty->print_cr("----------------------------------- Assembly -----------------------------------");
decode2(tty);
#if defined(SUPPORT_DATA_STRUCTS)
@@ -2251,13 +2117,10 @@ bool nmethod::is_maybe_on_stack() {
}
void nmethod::inc_decompile_count() {
- if (!is_compiled_by_c2() && !is_compiled_by_jvmci()) return;
- // Could be gated by ProfileTraps, but do not bother...
-#if INCLUDE_JVMCI
- if (jvmci_skip_profile_deopt()) {
+ if (!is_compiled_by_c2()) {
return;
}
-#endif
+ // Could be gated by ProfileTraps, but do not bother...
Method* m = method();
if (m == nullptr) return;
MethodData* mdo = m->method_data();
@@ -2381,14 +2244,6 @@ bool nmethod::make_not_entrant(InvalidationReason invalidation_reason) {
} // leave critical region under NMethodState_lock
-#if INCLUDE_JVMCI
- // Invalidate can't occur while holding the NMethodState_lock
- JVMCINMethodData* nmethod_data = jvmci_nmethod_data();
- if (nmethod_data != nullptr) {
- nmethod_data->invalidate_nmethod_mirror(this, invalidation_reason);
- }
-#endif
-
#ifdef ASSERT
if (is_osr_method() && method() != nullptr) {
// Make sure osr nmethod is invalidated, i.e. not on the list
@@ -2419,16 +2274,6 @@ void nmethod::unlink() {
invalidate_osr_method();
}
-#if INCLUDE_JVMCI
- // Clear the link between this nmethod and a HotSpotNmethod mirror
- JVMCINMethodData* nmethod_data = jvmci_nmethod_data();
- if (nmethod_data != nullptr) {
- nmethod_data->invalidate_nmethod_mirror(this, is_cold() ?
- nmethod::InvalidationReason::UNLOADING_COLD :
- nmethod::InvalidationReason::UNLOADING);
- }
-#endif
-
// Post before flushing as jmethodID is being used
post_compiled_method_unload();
@@ -2491,7 +2336,6 @@ void nmethod::purge(bool unregister_nmethod) {
CodeCache::unregister_old_nmethod(this);
- JVMCI_ONLY( _metadata_size = 0; )
CodeBlob::purge();
}
@@ -3308,30 +3152,6 @@ void nmethod::verify() {
}
}
-#ifdef ASSERT
-#if INCLUDE_JVMCI
- {
- // Verify that implicit exceptions that deoptimize have a PcDesc and OopMap
- ImmutableOopMapSet* oms = oop_maps();
- ImplicitExceptionTable implicit_table(this);
- for (uint i = 0; i < implicit_table.len(); i++) {
- int exec_offset = (int) implicit_table.get_exec_offset(i);
- if (implicit_table.get_exec_offset(i) == implicit_table.get_cont_offset(i)) {
- assert(pc_desc_at(code_begin() + exec_offset) != nullptr, "missing PcDesc");
- bool found = false;
- for (int i = 0, imax = oms->count(); i < imax; i++) {
- if (oms->pair_at(i)->pc_offset() == exec_offset) {
- found = true;
- break;
- }
- }
- assert(found, "missing oopmap");
- }
- }
- }
-#endif
-#endif
-
VerifyOopsClosure voc(this);
oops_do(&voc);
assert(voc.ok(), "embedded oops must be OK");
@@ -3425,8 +3245,6 @@ void nmethod::print_on_impl(outputStream* st) const {
st->print("(c1) ");
} else if (is_compiled_by_c2()) {
st->print("(c2) ");
- } else if (is_compiled_by_jvmci()) {
- st->print("(JVMCI) ");
} else {
st->print("(n/a) ");
}
@@ -3472,12 +3290,6 @@ void nmethod::print_on_impl(outputStream* st) const {
p2i(metadata_begin()),
p2i(metadata_end()),
metadata_size());
-#if INCLUDE_JVMCI
- if (jvmci_data_size () > 0) st->print_cr(" JVMCI data [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
- p2i(jvmci_data_begin()),
- p2i(jvmci_data_end()),
- jvmci_data_size());
-#endif
if (immutable_data_size() > 0) st->print_cr(" immutable data [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
p2i(immutable_data_begin()),
p2i(immutable_data_end()),
@@ -3502,12 +3314,6 @@ void nmethod::print_on_impl(outputStream* st) const {
p2i(scopes_data_begin()),
p2i(scopes_data_end()),
scopes_data_size());
-#if INCLUDE_JVMCI
- if (speculations_size () > 0) st->print_cr(" speculations [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
- p2i(speculations_begin()),
- p2i(speculations_end()),
- speculations_size());
-#endif
}
void nmethod::print_code() {
@@ -4080,8 +3886,8 @@ const char* nmethod::nmethod_section_label(address pos) const {
if (pos == consts_begin() && pos != insts_begin()) label = "[Constants]";
// Check stub_code before checking exception_handler or deopt_handler.
if (pos == this->stub_begin()) label = "[Stub Code]";
- if (JVMCI_ONLY(_exception_offset >= 0 &&) pos == exception_begin()) label = "[Exception Handler]";
- if (JVMCI_ONLY(_deopt_handler_entry_offset != -1 &&) pos == deopt_handler_entry()) label = "[Deopt Handler Entry Point]";
+ if (pos == exception_begin()) label = "[Exception Handler]";
+ if (pos == deopt_handler_entry()) label = "[Deopt Handler Entry Point]";
return label;
}
@@ -4232,12 +4038,7 @@ void nmethod::print_code_comment_on(outputStream* st, int column, address begin,
const ImmutableOopMap* om = pair->get_from(oms);
address pc = base + pair->pc_offset();
if (pc >= begin) {
-#if INCLUDE_JVMCI
- bool is_implicit_deopt = implicit_table.continuation_offset(pair->pc_offset()) == (uint) pair->pc_offset();
-#else
- bool is_implicit_deopt = false;
-#endif
- if (is_implicit_deopt ? pc == begin : pc > begin && pc <= end) {
+ if (pc > begin && pc <= end) {
st->move_to(column, 6, 0);
st->print("; ");
om->print_on(st);
@@ -4431,9 +4232,6 @@ void nmethod::print_statistics() {
#endif
#ifdef COMPILER2
c2_java_nmethod_stats.print_nmethod_stats("C2");
-#endif
-#if INCLUDE_JVMCI
- jvmci_java_nmethod_stats.print_nmethod_stats("JVMCI");
#endif
unknown_java_nmethod_stats.print_nmethod_stats("Unknown");
DebugInformationRecorder::print_statistics();
@@ -4444,25 +4242,3 @@ void nmethod::print_statistics() {
}
#endif // !PRODUCT
-
-#if INCLUDE_JVMCI
-void nmethod::update_speculation(JavaThread* thread) {
- jlong speculation = thread->pending_failed_speculation();
- if (speculation != 0) {
- guarantee(jvmci_nmethod_data() != nullptr, "failed speculation in nmethod without failed speculation list");
- jvmci_nmethod_data()->add_failed_speculation(this, speculation);
- thread->set_pending_failed_speculation(0);
- }
-}
-
-const char* nmethod::jvmci_name() {
- if (jvmci_nmethod_data() != nullptr) {
- return jvmci_nmethod_data()->name();
- }
- return nullptr;
-}
-
-bool nmethod::jvmci_skip_profile_deopt() const {
- return jvmci_nmethod_data() != nullptr && !jvmci_nmethod_data()->profile_deopt();
-}
-#endif
diff --git a/src/hotspot/share/code/nmethod.hpp b/src/hotspot/share/code/nmethod.hpp
index ea8c0e2ad5d..86db3603966 100644
--- a/src/hotspot/share/code/nmethod.hpp
+++ b/src/hotspot/share/code/nmethod.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -146,7 +146,6 @@ public:
// As a CodeBlob, an nmethod references [mutable data] allocated on the C heap:
// - CodeBlob relocation data
// - Metainfo
-// - JVMCI data
//
// An nmethod references [immutable data] allocated on C heap:
// - Dependency assertions data
@@ -155,19 +154,11 @@ public:
// - Debugging information:
// - Scopes data array
// - Scopes pcs array
-// - JVMCI speculations array
// - Nmethod reference counter
-#if INCLUDE_JVMCI
-class FailedSpeculation;
-class JVMCINMethodData;
-#endif
-
class nmethod : public CodeBlob {
friend class VMStructs;
- friend class JVMCIVMStructs;
friend class CodeCache; // scavengable oops
- friend class JVMCINMethodData;
friend class DeoptimizationScope;
#define ImmutableDataRefCountSize ((int)sizeof(int))
@@ -236,21 +227,12 @@ class nmethod : public CodeBlob {
// Number of arguments passed on the stack
uint16_t _num_stack_arg_slots;
-#if INCLUDE_JVMCI
- // _metadata_size is not specific to JVMCI. In the non-JVMCI case, it can be derived as:
- // _metadata_size = mutable_data_size - relocation_size
- int _metadata_size;
-#endif
-
// Offset in immutable data section
// _dependencies_offset == 0
uint16_t _nul_chk_table_offset;
uint16_t _handler_table_offset; // This table could be big in C1 code
int _scopes_pcs_offset;
int _scopes_data_offset;
-#if INCLUDE_JVMCI
- int _speculations_offset;
-#endif
int _immutable_data_ref_count_offset;
// location in frame (offset for sp) that deopt can store the original
@@ -267,14 +249,50 @@ class nmethod : public CodeBlob {
// Protected by NMethodState_lock
volatile signed char _state; // {not_installed, in_use, not_entrant}
- // set during construction
- uint8_t _has_unsafe_access:1, // May fault due to unsafe access.
- _has_wide_vectors:1, // Preserve wide vectors at safepoints
- _has_monitors:1, // Fastpath monitor detection for continuations
- _has_scoped_access:1, // used by for shared scope closure (scopedMemoryAccess.cpp)
- _has_flushed_dependencies:1, // Used for maintenance of dependencies (under CodeCache_lock)
- _is_unlinked:1, // mark during class unloading
- _load_reported:1; // used by jvmti to track if an event has been posted for this nmethod
+public:
+ struct Flags {
+ uint8_t const _bits;
+
+ enum : uint8_t {
+ UNSAFE_ACCESS = 1 << 0,
+ WIDE_VECTORS = 1 << 1,
+ MONITORS = 1 << 2,
+ SCOPED_ACCESS = 1 << 3
+ };
+
+ Flags() : _bits(0) {}
+ Flags(bool has_unsafe_access, bool has_wide_vectors, bool has_monitors, bool has_scoped_access) :
+ _bits((has_unsafe_access ? UNSAFE_ACCESS : 0) |
+ (has_wide_vectors ? WIDE_VECTORS : 0) |
+ (has_monitors ? MONITORS : 0) |
+ (has_scoped_access ? SCOPED_ACCESS : 0))
+ {}
+
+ // May fault due to unsafe access
+ bool has_unsafe_access() const { return (_bits & UNSAFE_ACCESS) != 0; }
+
+ // Preserve wide vectors at safepoints
+ bool has_wide_vectors() const { return (_bits & WIDE_VECTORS) != 0; }
+
+ // Fastpath monitor detection for continuations
+ bool has_monitors() const { return (_bits & MONITORS) != 0; }
+
+ // Used by shared scope closure (scopedMemoryAccess.cpp)
+ bool has_scoped_access() const { return (_bits & SCOPED_ACCESS) != 0; }
+ };
+
+private:
+ // Persistent bits, set once during construction.
+ Flags const _flags;
+
+ // Used for maintenance of dependencies (under CodeCache_lock)
+ bool _has_flushed_dependencies;
+
+ // Mark during class unloading
+ bool _is_unlinked;
+
+ // Used by JVMTI to track if an event has been posted for this nmethod
+ bool _load_reported;
enum DeoptimizationStatus : u1 {
not_marked,
@@ -327,13 +345,8 @@ class nmethod : public CodeBlob {
ExceptionHandlerTable* handler_table,
ImplicitExceptionTable* nul_chk_table,
AbstractCompiler* compiler,
- CompLevel comp_level
-#if INCLUDE_JVMCI
- , char* speculations = nullptr,
- int speculations_len = 0,
- JVMCINMethodData* jvmci_data = nullptr
-#endif
- );
+ CompLevel comp_level,
+ Flags flags);
nmethod(const nmethod &nm);
@@ -474,8 +487,6 @@ class nmethod : public CodeBlob {
void oops_do_set_strong_done(nmethod* old_head);
public:
- // If you change anything in this enum please patch
- // vmStructs_jvmci.cpp accordingly.
enum class InvalidationReason : s1 {
NOT_INVALIDATED = -1,
C1_CODEPATCH,
@@ -485,10 +496,6 @@ public:
CI_REPLAY,
UNLOADING,
UNLOADING_COLD,
- JVMCI_INVALIDATE,
- JVMCI_MATERIALIZE_VIRTUAL_OBJECT,
- JVMCI_REPLACED_WITH_NEW_CODE,
- JVMCI_REPROFILE,
MARKED_FOR_DEOPTIMIZATION,
MISSING_EXCEPTION_HANDLER,
NOT_USED,
@@ -516,14 +523,6 @@ public:
return "C1 predicate failed trap";
case InvalidationReason::CI_REPLAY:
return "CI replay";
- case InvalidationReason::JVMCI_INVALIDATE:
- return "JVMCI invalidate";
- case InvalidationReason::JVMCI_MATERIALIZE_VIRTUAL_OBJECT:
- return "JVMCI materialize virtual object";
- case InvalidationReason::JVMCI_REPLACED_WITH_NEW_CODE:
- return "JVMCI replaced with new code";
- case InvalidationReason::JVMCI_REPROFILE:
- return "JVMCI reprofile";
case InvalidationReason::MARKED_FOR_DEOPTIMIZATION:
return "marked for deoptimization";
case InvalidationReason::MISSING_EXCEPTION_HANDLER:
@@ -567,13 +566,8 @@ public:
ExceptionHandlerTable* handler_table,
ImplicitExceptionTable* nul_chk_table,
AbstractCompiler* compiler,
- CompLevel comp_level
-#if INCLUDE_JVMCI
- , char* speculations = nullptr,
- int speculations_len = 0,
- JVMCINMethodData* jvmci_data = nullptr
-#endif
- );
+ CompLevel comp_level,
+ Flags flags);
// Relocate the nmethod to the code heap identified by code_blob_type.
// Returns nullptr if the code heap does not have enough space, the
@@ -608,7 +602,6 @@ public:
inline bool is_compiled_by_c1 () const { return _compiler_type == compiler_c1; }
inline bool is_compiled_by_c2 () const { return _compiler_type == compiler_c2; }
- inline bool is_compiled_by_jvmci() const { return _compiler_type == compiler_jvmci; }
CompilerType compiler_type () const { return _compiler_type; }
const char* compiler_name () const;
@@ -627,13 +620,7 @@ public:
// mutable data
Metadata** metadata_begin () const { return (Metadata**) (mutable_data_begin() + _relocation_size); }
-#if INCLUDE_JVMCI
- Metadata** metadata_end () const { return (Metadata**) (mutable_data_begin() + _relocation_size + _metadata_size); }
- address jvmci_data_begin () const { return mutable_data_begin() + _relocation_size + _metadata_size; }
- address jvmci_data_end () const { return mutable_data_end(); }
-#else
Metadata** metadata_end () const { return (Metadata**) mutable_data_end(); }
-#endif
// immutable data
address immutable_data_begin () const { return _immutable_data; }
@@ -648,13 +635,7 @@ public:
PcDesc* scopes_pcs_end () const { return (PcDesc*)(_immutable_data + _scopes_data_offset) ; }
address scopes_data_begin () const { return _immutable_data + _scopes_data_offset ; }
-#if INCLUDE_JVMCI
- address scopes_data_end () const { return _immutable_data + _speculations_offset ; }
- address speculations_begin () const { return _immutable_data + _speculations_offset ; }
- address speculations_end () const { return _immutable_data + _immutable_data_ref_count_offset ; }
-#else
address scopes_data_end () const { return _immutable_data + _immutable_data_ref_count_offset ; }
-#endif
address immutable_data_ref_count_begin () const { return _immutable_data + _immutable_data_ref_count_offset ; }
// Sizes
@@ -669,10 +650,6 @@ public:
int dependencies_size () const { return int( dependencies_end () - dependencies_begin ()); }
int handler_table_size () const { return int( handler_table_end() - handler_table_begin()); }
int nul_chk_table_size () const { return int( nul_chk_table_end() - nul_chk_table_begin()); }
-#if INCLUDE_JVMCI
- int speculations_size () const { return int( speculations_end () - speculations_begin ()); }
- int jvmci_data_size () const { return int( jvmci_data_end () - jvmci_data_begin ()); }
-#endif
int oops_count() const { assert(oops_size() % oopSize == 0, ""); return (oops_size() / oopSize) + 1; }
int metadata_count() const { assert(metadata_size() % wordSize == 0, ""); return (metadata_size() / wordSize) + 1; }
@@ -751,17 +728,10 @@ public:
template
void set_gc_data(T* gc_data) { _gc_data = reinterpret_cast(gc_data); }
- bool has_unsafe_access() const { return _has_unsafe_access; }
- void set_has_unsafe_access(bool z) { _has_unsafe_access = z; }
-
- bool has_monitors() const { return _has_monitors; }
- void set_has_monitors(bool z) { _has_monitors = z; }
-
- bool has_scoped_access() const { return _has_scoped_access; }
- void set_has_scoped_access(bool z) { _has_scoped_access = z; }
-
- bool has_wide_vectors() const { return _has_wide_vectors; }
- void set_has_wide_vectors(bool z) { _has_wide_vectors = z; }
+ bool has_unsafe_access() const { return _flags.has_unsafe_access(); }
+ bool has_monitors() const { return _flags.has_monitors(); }
+ bool has_scoped_access() const { return _flags.has_scoped_access(); }
+ bool has_wide_vectors() const { return _flags.has_wide_vectors(); }
bool has_flushed_dependencies() const { return _has_flushed_dependencies; }
void set_has_flushed_dependencies(bool z) {
@@ -848,15 +818,12 @@ public:
void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map, OopClosure* f);
// implicit exceptions support
- address continuation_for_implicit_div0_exception(address pc) { return continuation_for_implicit_exception(pc, true); }
- address continuation_for_implicit_null_exception(address pc) { return continuation_for_implicit_exception(pc, false); }
+ address continuation_for_implicit_exception(address pc);
// Inline cache support for class unloading and nmethod unloading
private:
void cleanup_inline_caches_impl(bool unloading_occurred, bool clean_all);
- address continuation_for_implicit_exception(address pc, bool for_div0_check);
-
public:
// Serial version used by whitebox test
void cleanup_inline_caches_whitebox();
@@ -907,26 +874,6 @@ public:
// Evolution support. We make old (discarded) compiled methods point to new Method*s.
void set_method(Method* method) { _method = method; }
-#if INCLUDE_JVMCI
- // Gets the JVMCI name of this nmethod.
- const char* jvmci_name();
-
- // Records the pending failed speculation in the
- // JVMCI speculation log associated with this nmethod.
- void update_speculation(JavaThread* thread);
-
- // Gets the data specific to a JVMCI compiled method.
- // This returns a non-nullptr value iff this nmethod was
- // compiled by the JVMCI compiler.
- JVMCINMethodData* jvmci_nmethod_data() const {
- return jvmci_data_size() == 0 ? nullptr : (JVMCINMethodData*) jvmci_data_begin();
- }
-
- // Returns true if the runtime should NOT collect deoptimization profile for a JVMCI
- // compiled method
- bool jvmci_skip_profile_deopt() const;
-#endif
-
void oops_do(OopClosure* f);
// All-in-one claiming of nmethods: returns true if the caller successfully claimed that
diff --git a/src/hotspot/share/code/scopeDesc.cpp b/src/hotspot/share/code/scopeDesc.cpp
index d3e08a886e6..5038cd282e2 100644
--- a/src/hotspot/share/code/scopeDesc.cpp
+++ b/src/hotspot/share/code/scopeDesc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -263,8 +263,8 @@ void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
}
}
-#if COMPILER2_OR_JVMCI
- if (NOT_JVMCI(DoEscapeAnalysis &&) is_top() && _objects != nullptr) {
+#ifdef COMPILER2
+ if (DoEscapeAnalysis && is_top() && _objects != nullptr) {
st->print_cr(" Objects");
for (int i = 0; i < _objects->length(); i++) {
ObjectValue* sv = (ObjectValue*) _objects->at(i);
@@ -278,7 +278,7 @@ void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
st->cr();
}
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
#endif
diff --git a/src/hotspot/share/compiler/abstractCompiler.hpp b/src/hotspot/share/compiler/abstractCompiler.hpp
index 5b97feb1a49..5ed256d640c 100644
--- a/src/hotspot/share/compiler/abstractCompiler.hpp
+++ b/src/hotspot/share/compiler/abstractCompiler.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -146,12 +146,8 @@ class AbstractCompiler : public CHeapObj {
// Compiler type queries.
bool is_c1() const { return _type == compiler_c1; }
bool is_c2() const { return _type == compiler_c2; }
- bool is_jvmci() const { return _type == compiler_jvmci; }
CompilerType type() const { return _type; }
- // Compiler threads are hidden by default.
- virtual bool is_hidden_from_external_view() const { return true; }
-
// Customization
virtual void initialize () = 0;
@@ -168,18 +164,6 @@ class AbstractCompiler : public CHeapObj {
ShouldNotReachHere();
}
- // Notifies this compiler that the current thread (`current`) is about to stop.
- // The current thread currently holds the CompileThread_lock.
- virtual void stopping_compiler_thread(CompilerThread* current) {
- // Do nothing
- }
-
- // Notifies this compiler that queue is empty just prior to waiting on
- // MethodCompileQueue_lock which is held by the current thread (`thread`).
- virtual void on_empty_queue(CompileQueue* queue, CompilerThread* thread) {
- // Do nothing
- }
-
// Print compilation timers and statistics
virtual void print_timers() {
ShouldNotReachHere();
diff --git a/src/hotspot/share/compiler/compilationLog.cpp b/src/hotspot/share/compiler/compilationLog.cpp
index e9592415f3d..88bd29d6843 100644
--- a/src/hotspot/share/compiler/compilationLog.cpp
+++ b/src/hotspot/share/compiler/compilationLog.cpp
@@ -51,25 +51,24 @@ void CompilationLog::log_nmethod(JavaThread* thread, nmethod* nm) {
void CompilationLog::log_failure(JavaThread* thread, CompileTask* task, const char* reason, const char* retry_message) {
StringLogMessage lm;
+ stringStream sstr(lm.buffer(), lm.size());
if (task == nullptr) {
- lm.print("Id not known, task was 0; COMPILE SKIPPED: %s", reason);
+ sstr.print("Id not known, task was 0; COMPILE SKIPPED: %s", reason);
} else {
- lm.print("%4d COMPILE SKIPPED: %s", task->compile_id(), reason);
+ sstr.print("%4d COMPILE SKIPPED: %s", task->compile_id(), reason);
}
if (retry_message != nullptr) {
- lm.append(" (%s)", retry_message);
+ sstr.print(" (%s)", retry_message);
}
- lm.print("\n");
log(thread, "%s", (const char*)lm);
}
void CompilationLog::log_metaspace_failure(const char* reason) {
// Note: This method can be called from non-Java/compiler threads to
// log the global metaspace failure that might affect profiling.
- ResourceMark rm;
StringLogMessage lm;
- lm.print("%4d COMPILE PROFILING SKIPPED: %s", -1, reason);
- lm.print("\n");
+ stringStream sstr(lm.buffer(), lm.size());
+ sstr.print("%4d COMPILE PROFILING SKIPPED: %s", -1, reason);
log(Thread::current(), "%s", (const char*)lm);
}
diff --git a/src/hotspot/share/compiler/compilationPolicy.cpp b/src/hotspot/share/compiler/compilationPolicy.cpp
index 1cc44602186..94e734aaad5 100644
--- a/src/hotspot/share/compiler/compilationPolicy.cpp
+++ b/src/hotspot/share/compiler/compilationPolicy.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,9 +48,6 @@
#ifdef COMPILER2
#include "opto/c2compiler.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
int64_t CompilationPolicy::_start_time = 0;
int CompilationPolicy::_c1_count = 0;
@@ -194,7 +191,7 @@ static inline CompLevel adjust_level_for_compilability_query(CompLevel comp_leve
if (comp_level == CompLevel_any) {
if (CompilerConfig::is_c1_only()) {
comp_level = CompLevel_simple;
- } else if (CompilerConfig::is_c2_or_jvmci_compiler_only()) {
+ } else if (CompilerConfig::is_c2_only()) {
comp_level = CompLevel_full_optimization;
}
}
@@ -240,34 +237,6 @@ bool CompilationPolicy::is_compilation_enabled() {
return CompileBroker::should_compile_new_jobs();
}
-CompileTask* CompilationPolicy::select_task_helper(CompileQueue* compile_queue) {
- // Remove unloaded methods from the queue
- for (CompileTask* task = compile_queue->first(); task != nullptr; ) {
- CompileTask* next = task->next();
- if (task->is_unloaded()) {
- compile_queue->remove_and_mark_stale(task);
- }
- task = next;
- }
-#if INCLUDE_JVMCI
- if (UseJVMCICompiler && !BackgroundCompilation) {
- /*
- * In blocking compilation mode, the CompileBroker will make
- * compilations submitted by a JVMCI compiler thread non-blocking. These
- * compilations should be scheduled after all blocking compilations
- * to service non-compiler related compilations sooner and reduce the
- * chance of such compilations timing out.
- */
- for (CompileTask* task = compile_queue->first(); task != nullptr; task = task->next()) {
- if (task->is_blocking()) {
- return task;
- }
- }
- }
-#endif
- return compile_queue->first();
-}
-
// Simple methods are as good being compiled with C1 as C2.
// Determine if a given method is such a case.
bool CompilationPolicy::is_trivial(const methodHandle& method) {
@@ -278,20 +247,6 @@ bool CompilationPolicy::is_trivial(const methodHandle& method) {
return false;
}
-bool CompilationPolicy::force_comp_at_level_simple(const methodHandle& method) {
- if (CompilationModeFlag::quick_internal()) {
-#if INCLUDE_JVMCI
- if (UseJVMCICompiler) {
- AbstractCompiler* comp = CompileBroker::compiler(CompLevel_full_optimization);
- if (comp != nullptr && comp->is_jvmci() && ((JVMCICompiler*) comp)->force_comp_at_level_simple(method)) {
- return true;
- }
- }
-#endif
- }
- return false;
-}
-
CompLevel CompilationPolicy::comp_level(Method* method) {
nmethod *nm = method->code();
if (nm != nullptr && nm->is_in_use()) {
@@ -559,7 +514,7 @@ void CompilationPolicy::initialize() {
if (!CompilerConfig::is_interpreter_only()) {
int count = CICompilerCount;
bool c1_only = CompilerConfig::is_c1_only();
- bool c2_only = CompilerConfig::is_c2_or_jvmci_compiler_only();
+ bool c2_only = CompilerConfig::is_c2_only();
int min_count = (c1_only || c2_only) ? 1 : 2;
#ifdef _LP64
@@ -617,18 +572,8 @@ void CompilationPolicy::initialize() {
// No C1 compiler threads are needed
set_c2_count(count);
} else {
-#if INCLUDE_JVMCI
- if (UseJVMCICompiler && UseJVMCINativeLibrary) {
- int libjvmci_count = MAX2((int) (count * JVMCINativeLibraryThreadFraction), 1);
- int c1_count = MAX2(count - libjvmci_count, 1);
- set_c2_count(libjvmci_count);
- set_c1_count(c1_count);
- } else
-#endif
- {
- set_c1_count(MAX2(count / 3, 1));
- set_c2_count(MAX2(count - c1_count(), 1));
- }
+ set_c1_count(MAX2(count / 3, 1));
+ set_c2_count(MAX2(count - c1_count(), 1));
}
assert(count == c1_count() + c2_count(), "inconsistent compiler thread count");
set_increase_threshold_at_ratio();
@@ -649,7 +594,7 @@ bool CompilationPolicy::verify_level(CompLevel level) {
if (!CompilerConfig::is_c1_enabled() && is_c1_compile(level)) {
return false;
}
- if (!CompilerConfig::is_c2_or_jvmci_compiler_enabled() && is_c2_compile(level)) {
+ if (!CompilerConfig::is_c2_enabled() && is_c2_compile(level)) {
return false;
}
@@ -663,8 +608,6 @@ bool CompilationPolicy::verify_level(CompLevel level) {
return level == CompLevel_simple;
} else if (CompilationModeFlag::high_only()) {
return level == CompLevel_full_optimization;
- } else if (CompilationModeFlag::high_only_quick_internal()) {
- return level == CompLevel_full_optimization || level == CompLevel_simple;
}
return false;
}
@@ -675,7 +618,7 @@ CompLevel CompilationPolicy::highest_compile_level() {
CompLevel level = CompLevel_none;
// Setup the maximum level available for the current compiler configuration.
if (!CompilerConfig::is_interpreter_only()) {
- if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) {
+ if (CompilerConfig::is_c2_enabled()) {
level = CompLevel_full_optimization;
} else if (CompilerConfig::is_c1_enabled()) {
if (CompilerConfig::is_c1_simple_only()) {
@@ -696,7 +639,6 @@ CompLevel CompilationPolicy::highest_compile_level() {
if (!CompilationModeFlag::normal()) {
// a) quick_only - levels 2,3,4 are invalid; levels -1,0,1 are valid;
// b) high_only - levels 1,2,3 are invalid; levels -1,0,4 are valid;
- // c) high_only_quick_internal - levels 2,3 are invalid; levels -1,0,1,4 are valid.
if (CompilationModeFlag::quick_only()) {
if (level == CompLevel_limited_profile || level == CompLevel_full_profile || level == CompLevel_full_optimization) {
level = CompLevel_simple;
@@ -705,10 +647,6 @@ CompLevel CompilationPolicy::highest_compile_level() {
if (level == CompLevel_simple || level == CompLevel_limited_profile || level == CompLevel_full_profile) {
level = CompLevel_none;
}
- } else if (CompilationModeFlag::high_only_quick_internal()) {
- if (level == CompLevel_limited_profile || level == CompLevel_full_profile) {
- level = CompLevel_simple;
- }
}
}
@@ -730,12 +668,6 @@ CompLevel CompilationPolicy::initial_compile_level(const methodHandle& method) {
level = CompLevel_simple;
} else if (CompilationModeFlag::high_only()) {
level = CompLevel_full_optimization;
- } else if (CompilationModeFlag::high_only_quick_internal()) {
- if (force_comp_at_level_simple(method)) {
- level = CompLevel_simple;
- } else {
- level = CompLevel_full_optimization;
- }
}
assert(level != CompLevel_any, "Unhandled compilation mode");
return limit_level(level);
@@ -805,32 +737,36 @@ CompileTask* CompilationPolicy::select_task(CompileQueue* compile_queue, JavaThr
}
if (max_blocking_task != nullptr) {
- // In blocking compilation mode, the CompileBroker will make
- // compilations submitted by a JVMCI compiler thread non-blocking. These
- // compilations should be scheduled after all blocking compilations
- // to service non-compiler related compilations sooner and reduce the
- // chance of such compilations timing out.
max_task = max_blocking_task;
max_method = max_task->method();
}
- methodHandle max_method_h(THREAD, max_method);
+ if (max_task != nullptr && max_method != nullptr) {
+ methodHandle max_method_h(THREAD, max_method);
- if (max_task != nullptr && max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile &&
- max_method != nullptr && is_method_profiled(max_method_h) && !Arguments::is_compiler_only()) {
- max_task->set_comp_level(CompLevel_limited_profile);
+ if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile &&
+ is_method_profiled(max_method_h) && !Arguments::is_compiler_only()) {
- if (CompileBroker::compilation_is_complete(max_method_h, max_task->osr_bci(), CompLevel_limited_profile)) {
- if (PrintTieredEvents) {
- print_event(REMOVE_FROM_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
+ CompilerDirectiveMatcher directive_matcher(max_method_h, CompLevel_limited_profile);
+ bool exclude_limited_profile = directive_matcher.directive_set()->ExcludeOption;
+
+ if (!exclude_limited_profile) {
+ max_task->set_comp_level(CompLevel_limited_profile);
+ max_task->transfer_directive(directive_matcher);
+
+ if (CompileBroker::compilation_is_complete(max_method_h, max_task->osr_bci(), CompLevel_limited_profile)) {
+ if (PrintTieredEvents) {
+ print_event(REMOVE_FROM_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
+ }
+ compile_queue->remove_and_mark_stale(max_task);
+ max_method->clear_queued_for_compilation();
+ return nullptr;
+ }
+
+ if (PrintTieredEvents) {
+ print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
+ }
}
- compile_queue->remove_and_mark_stale(max_task);
- max_method->clear_queued_for_compilation();
- return nullptr;
- }
-
- if (PrintTieredEvents) {
- print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
}
}
return max_task;
@@ -1066,7 +1002,7 @@ bool CompilationPolicy::is_mature(MethodData* mdo) {
// start profiling without waiting for the compiled method to arrive.
// We also take the load on compilers into the account.
bool CompilationPolicy::should_create_mdo(const methodHandle& method, CompLevel cur_level) {
- if (cur_level != CompLevel_none || force_comp_at_level_simple(method) || CompilationModeFlag::quick_only() || !ProfileInterpreter) {
+ if (cur_level != CompLevel_none || CompilationModeFlag::quick_only() || !ProfileInterpreter) {
return false;
}
@@ -1249,9 +1185,6 @@ CompLevel CompilationPolicy::trained_transition(const methodHandle& method, Comp
}
// We don't have any special strategies for the C2-only compilation modes, so just fix up the levels for now.
- if (CompilationModeFlag::high_only_quick_internal() && CompLevel_simple < next_level && next_level < CompLevel_full_optimization) {
- return CompLevel_none;
- }
if (CompilationModeFlag::high_only() && next_level < CompLevel_full_optimization) {
return CompLevel_none;
}
@@ -1264,7 +1197,7 @@ CompLevel CompilationPolicy::trained_transition(const methodHandle& method, Comp
* 1 - pure C1 (CompLevel_simple)
* 2 - C1 with invocation and backedge counting (CompLevel_limited_profile)
* 3 - C1 with full profiling (CompLevel_full_profile)
- * 4 - C2 or Graal (CompLevel_full_optimization)
+ * 4 - C2 (CompLevel_full_optimization)
*
* Common state transition patterns:
* a. 0 -> 3 -> 4.
@@ -1301,9 +1234,7 @@ template
CompLevel CompilationPolicy::common(const methodHandle& method, CompLevel cur_level, JavaThread* THREAD, bool disable_feedback) {
CompLevel next_level = cur_level;
- if (force_comp_at_level_simple(method)) {
- next_level = CompLevel_simple;
- } else if (is_trivial(method) || method->is_native()) {
+ if (is_trivial(method) || method->is_native()) {
// We do not care if there is profiling data for these methods, throw them to compiler.
next_level = CompilationModeFlag::disable_intermediate() ? CompLevel_full_optimization : CompLevel_simple;
} else if (MethodTrainingData::have_data()) {
diff --git a/src/hotspot/share/compiler/compilationPolicy.hpp b/src/hotspot/share/compiler/compilationPolicy.hpp
index 3efc374d998..e9a2de076ff 100644
--- a/src/hotspot/share/compiler/compilationPolicy.hpp
+++ b/src/hotspot/share/compiler/compilationPolicy.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -322,8 +322,6 @@ class CompilationPolicy : AllStatic {
// Simple methods are as good being compiled with C1 as C2.
// This function tells if it's such a function.
inline static bool is_trivial(const methodHandle& method);
- // Force method to be compiled at CompLevel_simple?
- inline static bool force_comp_at_level_simple(const methodHandle& method);
// Get a compilation level for a given method.
static CompLevel comp_level(Method* method);
@@ -358,7 +356,6 @@ class CompilationPolicy : AllStatic {
static bool can_be_osr_compiled(const methodHandle& m, int comp_level = CompLevel_any);
static bool is_compilation_enabled();
- static CompileTask* select_task_helper(CompileQueue* compile_queue);
// Return initial compile level to use with Xcomp (depends on compilation mode).
static void reprofile(ScopeDesc* trap_scope, bool is_osr);
static nmethod* event(const methodHandle& method, const methodHandle& inlinee,
diff --git a/src/hotspot/share/compiler/compileBroker.cpp b/src/hotspot/share/compiler/compileBroker.cpp
index c806d356d0c..099f29f96bf 100644
--- a/src/hotspot/share/compiler/compileBroker.cpp
+++ b/src/hotspot/share/compiler/compileBroker.cpp
@@ -81,10 +81,6 @@
#ifdef COMPILER2
#include "opto/c2compiler.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciEnv.hpp"
-#include "jvmci/jvmciRuntime.hpp"
-#endif
#ifdef DTRACE_ENABLED
@@ -238,15 +234,6 @@ CompileTaskWrapper::~CompileTaskWrapper() {
{
MutexLocker notifier(thread, CompileTaskWait_lock);
task->mark_complete();
-#if INCLUDE_JVMCI
- if (CompileBroker::compiler(task->comp_level())->is_jvmci()) {
- if (!task->has_waiter()) {
- // The waiting thread timed out and thus did not delete the task.
- free_task = true;
- }
- task->set_blocking_jvmci_compile_state(nullptr);
- }
-#endif
if (!free_task) {
// Notify the waiting thread that the compilation has completed
// so that it can free the task.
@@ -283,18 +270,6 @@ bool CompileBroker::can_remove(CompilerThread *ct, bool do_it) {
// Keep thread alive for at least some time.
if (ct->idle_time_millis() < (c1 ? 500 : 100)) return false;
-#if INCLUDE_JVMCI
- if (compiler->is_jvmci() && !UseJVMCINativeLibrary) {
- // Handles for JVMCI thread objects may get released concurrently.
- if (do_it) {
- assert(CompileThread_lock->owner() == ct, "must be holding lock");
- } else {
- // Skip check if it's the last thread and let caller check again.
- return true;
- }
- }
-#endif
-
// We only allow the last compiler thread of each type to get removed.
jobject last_compiler = c1 ? compiler1_object(compiler_count - 1)
: compiler2_object(compiler_count - 1);
@@ -302,13 +277,6 @@ bool CompileBroker::can_remove(CompilerThread *ct, bool do_it) {
if (do_it) {
assert_locked_or_safepoint(CompileThread_lock); // Update must be consistent.
compiler->set_num_compiler_threads(compiler_count - 1);
-#if INCLUDE_JVMCI
- if (compiler->is_jvmci() && !UseJVMCINativeLibrary) {
- // Old j.l.Thread object can die when no longer referenced elsewhere.
- JNIHandles::destroy_global(compiler2_object(compiler_count - 1));
- _compiler2_objects[compiler_count - 1] = nullptr;
- }
-#endif
}
return true;
}
@@ -424,15 +392,6 @@ CompileTask* CompileQueue::get(CompilerThread* thread) {
return nullptr;
}
- AbstractCompiler* compiler = thread->compiler();
- guarantee(compiler != nullptr, "Compiler object must exist");
- compiler->on_empty_queue(this, thread);
- if (_first != nullptr) {
- // The call to on_empty_queue may have temporarily unlocked the MCQ lock
- // so check again whether any tasks were added to the queue.
- break;
- }
-
// If there are no compilation tasks and we can compile new jobs
// (i.e., there is enough free space in the code cache) there is
// no need to invoke the GC.
@@ -600,31 +559,21 @@ CompilerCounters::CompilerCounters() {
_compile_type = CompileBroker::no_compile;
}
-#if INCLUDE_JFR && COMPILER2_OR_JVMCI
+#if INCLUDE_JFR && defined(COMPILER2)
// It appends new compiler phase names to growable array phase_names(a new CompilerPhaseType mapping
// in compiler/compilerEvent.cpp) and registers it with its serializer.
//
// c2 uses explicit CompilerPhaseType idToPhase mapping in opto/phasetype.hpp,
// so if c2 is used, it should be always registered first.
// This function is called during vm initialization.
-static void register_jfr_phasetype_serializer(CompilerType compiler_type) {
+static void register_jfr_phasetype_serializer() {
ResourceMark rm;
- static bool first_registration = true;
- if (compiler_type == compiler_jvmci) {
- CompilerEvent::PhaseEvent::get_phase_id("NOT_A_PHASE_NAME", false, false, false);
- first_registration = false;
-#ifdef COMPILER2
- } else if (compiler_type == compiler_c2) {
- assert(first_registration, "invariant"); // c2 must be registered first.
- for (int i = 0; i < PHASE_NUM_TYPES; i++) {
- const char* phase_name = CompilerPhaseTypeHelper::to_description((CompilerPhaseType) i);
- CompilerEvent::PhaseEvent::get_phase_id(phase_name, false, false, false);
- }
- first_registration = false;
-#endif // COMPILER2
+ for (int i = 0; i < PHASE_NUM_TYPES; i++) {
+ const char* phase_name = CompilerPhaseTypeHelper::to_description((CompilerPhaseType) i);
+ CompilerEvent::PhaseEvent::get_phase_id(phase_name, false, false, false);
}
}
-#endif // INCLUDE_JFR && COMPILER2_OR_JVMCI
+#endif // INCLUDE_JFR && defined(COMPILER2)
// ------------------------------------------------------------------
// CompileBroker::compilation_init
@@ -639,31 +588,6 @@ void CompileBroker::compilation_init(JavaThread* THREAD) {
_c1_count = CompilationPolicy::c1_count();
_c2_count = CompilationPolicy::c2_count();
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- // This is creating a JVMCICompiler singleton.
- JVMCICompiler* jvmci = new JVMCICompiler();
-
- if (UseJVMCICompiler) {
- _compilers[1] = jvmci;
- if (FLAG_IS_DEFAULT(JVMCIThreads)) {
- if (BootstrapJVMCI) {
- // JVMCI will bootstrap so give it more threads
- _c2_count = MIN2(32, os::active_processor_count());
- }
- } else {
- _c2_count = JVMCIThreads;
- }
- if (FLAG_IS_DEFAULT(JVMCIHostThreads)) {
- } else {
-#ifdef COMPILER1
- _c1_count = JVMCIHostThreads;
-#endif // COMPILER1
- }
- }
- }
-#endif // INCLUDE_JVMCI
-
#ifdef COMPILER1
if (_c1_count > 0) {
_compilers[0] = new Compiler();
@@ -671,24 +595,14 @@ void CompileBroker::compilation_init(JavaThread* THREAD) {
#endif // COMPILER1
#ifdef COMPILER2
- if (true JVMCI_ONLY( && !UseJVMCICompiler)) {
- if (_c2_count > 0) {
- _compilers[1] = new C2Compiler();
- // Register c2 first as c2 CompilerPhaseType idToPhase mapping is explicit.
- // idToPhase mapping for c2 is in opto/phasetype.hpp
- JFR_ONLY(register_jfr_phasetype_serializer(compiler_c2);)
- }
+ if (_c2_count > 0) {
+ _compilers[1] = new C2Compiler();
+ // Register c2 as c2 CompilerPhaseType idToPhase mapping is explicit.
+ // idToPhase mapping for c2 is in opto/phasetype.hpp
+ JFR_ONLY(register_jfr_phasetype_serializer();)
}
#endif // COMPILER2
-#if INCLUDE_JVMCI
- // Register after c2 registration.
- // JVMCI CompilerPhaseType idToPhase mapping is dynamic.
- if (EnableJVMCI) {
- JFR_ONLY(register_jfr_phasetype_serializer(compiler_jvmci);)
- }
-#endif // INCLUDE_JVMCI
-
if (CompilerOracle::should_collect_memstat()) {
CompilationMemoryStatistic::initialize();
}
@@ -801,7 +715,7 @@ void TrainingReplayThread::training_replay_thread_entry(JavaThread* thread, TRAP
CompilationPolicy::replay_training_at_init_loop(thread);
}
-#if defined(ASSERT) && COMPILER2_OR_JVMCI
+#if defined(ASSERT) && defined(COMPILER2)
// Entry for DeoptimizeObjectsALotThread. The threads are started in
// CompileBroker::init_compiler_threads() iff DeoptimizeObjectsALot is enabled
void DeoptimizeObjectsALotThread::deopt_objs_alot_thread_entry(JavaThread* thread, TRAPS) {
@@ -852,7 +766,7 @@ void DeoptimizeObjectsALotThread::deoptimize_objects_alot_loop_all() {
sleep(DeoptimizeObjectsALotInterval);
}
}
-#endif // defined(ASSERT) && COMPILER2_OR_JVMCI
+#endif // defined(ASSERT) && defined(COMPILER2)
JavaThread* CompileBroker::make_thread(ThreadType type, jobject thread_handle, CompileQueue* queue, AbstractCompiler* comp, JavaThread* THREAD) {
@@ -873,11 +787,11 @@ JavaThread* CompileBroker::make_thread(ThreadType type, jobject thread_handle, C
new_thread = new CompilerThread(queue, counters);
}
break;
-#if defined(ASSERT) && COMPILER2_OR_JVMCI
+#if defined(ASSERT) && defined(COMPILER2)
case deoptimizer_t:
new_thread = new DeoptimizeObjectsALotThread();
break;
-#endif // ASSERT
+#endif // defined(ASSERT) && defined(COMPILER2)
case training_replay_t:
new_thread = new TrainingReplayThread();
break;
@@ -968,7 +882,7 @@ void CompileBroker::init_compiler_threads() {
#endif // !ZERO
// Initialize the compilation queue
if (_c2_count > 0) {
- const char* name = JVMCI_ONLY(UseJVMCICompiler ? "JVMCI compile queue" :) "C2 compile queue";
+ const char* name = "C2 compile queue";
_c2_compile_queue = new CompileQueue(name);
_compiler2_objects = NEW_C_HEAP_ARRAY(jobject, _c2_count, mtCompiler);
_compiler2_logs = NEW_C_HEAP_ARRAY(CompileLog*, _c2_count, mtCompiler);
@@ -1025,7 +939,7 @@ void CompileBroker::init_compiler_threads() {
PerfDataManager::create_constant(SUN_CI, "threads", PerfData::U_Bytes, _c1_count + _c2_count, CHECK);
}
-#if defined(ASSERT) && COMPILER2_OR_JVMCI
+#if defined(ASSERT) && defined(COMPILER2)
if (DeoptimizeObjectsALot) {
// Initialize and start the object deoptimizer threads
const int total_count = DeoptimizeObjectsALotThreadCountSingle + DeoptimizeObjectsALotThreadCountAll;
@@ -1035,7 +949,7 @@ void CompileBroker::init_compiler_threads() {
make_thread(deoptimizer_t, thread_handle, nullptr, nullptr, THREAD);
}
}
-#endif // defined(ASSERT) && COMPILER2_OR_JVMCI
+#endif // defined(ASSERT) && defined(COMPILER2)
}
void CompileBroker::init_training_replay() {
@@ -1084,43 +998,6 @@ void CompileBroker::possibly_add_compiler_threads(JavaThread* THREAD) {
(int)(available_cc_np / (128*K)));
for (int i = old_c2_count; i < new_c2_count; i++) {
-#if INCLUDE_JVMCI
- if (UseJVMCICompiler && !UseJVMCINativeLibrary && _compiler2_objects[i] == nullptr) {
- // Native compiler threads as used in C1/C2 can reuse the j.l.Thread objects as their
- // existence is completely hidden from the rest of the VM (and those compiler threads can't
- // call Java code to do the creation anyway).
- //
- // For pure Java JVMCI we have to create new j.l.Thread objects as they are visible and we
- // can see unexpected thread lifecycle transitions if we bind them to new JavaThreads. For
- // native library JVMCI it's preferred to use the C1/C2 strategy as this avoids unnecessary
- // coupling with Java.
- if (!THREAD->can_call_java()) break;
- char name_buffer[256];
- os::snprintf_checked(name_buffer, sizeof(name_buffer), "%s CompilerThread%d", _compilers[1]->name(), i);
- Handle thread_oop;
- {
- // We have to give up the lock temporarily for the Java calls.
- MutexUnlocker mu(CompileThread_lock);
- thread_oop = JavaThread::create_system_thread_object(name_buffer, THREAD);
- }
- if (HAS_PENDING_EXCEPTION) {
- if (trace_compiler_threads()) {
- ResourceMark rm;
- stringStream msg;
- msg.print_cr("JVMCI compiler thread creation failed:");
- PENDING_EXCEPTION->print_on(&msg);
- print_compiler_threads(msg);
- }
- CLEAR_PENDING_EXCEPTION;
- break;
- }
- // Check if another thread has beaten us during the Java calls.
- if (get_c2_thread_count() != i) break;
- jobject thread_handle = JNIHandles::make_global(thread_oop);
- assert(compiler2_object(i) == nullptr, "Old one must be released!");
- _compiler2_objects[i] = thread_handle;
- }
-#endif
guarantee(compiler2_object(i) != nullptr, "Thread oop must exist");
JavaThread *ct = make_thread(compiler_t, compiler2_object(i), _c2_compile_queue, _compilers[1], THREAD);
if (ct == nullptr) break;
@@ -1276,42 +1153,6 @@ void CompileBroker::compile_method_base(const methodHandle& method,
return;
}
-#if INCLUDE_JVMCI
- if (UseJVMCICompiler && blocking) {
- // Don't allow blocking compiles for requests triggered by JVMCI.
- if (thread->is_Compiler_thread()) {
- blocking = false;
- }
-
- // In libjvmci, JVMCI initialization should not deadlock with other threads
- if (!UseJVMCINativeLibrary) {
- // Don't allow blocking compiles if inside a class initializer or while performing class loading
- vframeStream vfst(JavaThread::cast(thread));
- for (; !vfst.at_end(); vfst.next()) {
- if (vfst.method()->is_static_initializer() ||
- (vfst.method()->method_holder()->is_subclass_of(vmClasses::ClassLoader_klass()) &&
- vfst.method()->name() == vmSymbols::loadClass_name())) {
- blocking = false;
- break;
- }
- }
-
- // Don't allow blocking compilation requests to JVMCI
- // if JVMCI itself is not yet initialized
- if (!JVMCI::is_compiler_initialized() && compiler(comp_level)->is_jvmci()) {
- blocking = false;
- }
- }
-
- // Don't allow blocking compilation requests if we are in JVMCIRuntime::shutdown
- // to avoid deadlock between compiler thread(s) and threads run at shutdown
- // such as the DestroyJavaVM thread.
- if (JVMCI::in_shutdown()) {
- blocking = false;
- }
- }
-#endif // INCLUDE_JVMCI
-
// We will enter the compilation in the queue.
// 14012000: Note that this sets the queued_for_compile bits in
// the target method. We can now reason that a method cannot be
@@ -1375,14 +1216,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
AbstractCompiler *comp = CompileBroker::compiler(comp_level);
assert(comp != nullptr, "Ensure we have a compiler");
-#if INCLUDE_JVMCI
- if (comp->is_jvmci() && !JVMCI::can_initialize_JVMCI()) {
- // JVMCI compilation is not yet initializable.
- return nullptr;
- }
-#endif
-
- CompilerDirectiveMatcher matcher(method, comp);
+ CompilerDirectiveMatcher matcher(method, comp_level);
// CompileBroker::compile_method can trap and can have pending async exception.
nmethod* nm = CompileBroker::compile_method(method, osr_bci, comp_level, hot_count, compile_reason, matcher.directive_set(), THREAD);
return nm;
@@ -1430,7 +1264,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
assert(!HAS_PENDING_EXCEPTION, "No exception should be present");
// some prerequisites that are compiler specific
- if (comp->is_c2() || comp->is_jvmci()) {
+ if (comp->is_c2()) {
InternalOOMEMark iom(THREAD);
method->constants()->resolve_string_constants(CHECK_AND_CLEAR_NONASYNC_NULL);
// Resolve all classes seen in the signature of the method
@@ -1642,68 +1476,6 @@ CompileTask* CompileBroker::create_compile_task(CompileQueue* queue,
return new_task;
}
-#if INCLUDE_JVMCI
-// The number of milliseconds to wait before checking if
-// JVMCI compilation has made progress.
-static const long JVMCI_COMPILATION_PROGRESS_WAIT_TIMESLICE = 1000;
-
-// The number of JVMCI compilation progress checks that must fail
-// before unblocking a thread waiting for a blocking compilation.
-static const int JVMCI_COMPILATION_PROGRESS_WAIT_ATTEMPTS = 10;
-
-/**
- * Waits for a JVMCI compiler to complete a given task. This thread
- * waits until either the task completes or it sees no JVMCI compilation
- * progress for N consecutive milliseconds where N is
- * JVMCI_COMPILATION_PROGRESS_WAIT_TIMESLICE *
- * JVMCI_COMPILATION_PROGRESS_WAIT_ATTEMPTS.
- *
- * @return true if this thread needs to delete the task
- */
-bool CompileBroker::wait_for_jvmci_completion(JVMCICompiler* jvmci, CompileTask* task, JavaThread* thread) {
- assert(UseJVMCICompiler, "sanity");
- MonitorLocker ml(thread, CompileTaskWait_lock);
- int progress_wait_attempts = 0;
- jint thread_jvmci_compilation_ticks = 0;
- jint global_jvmci_compilation_ticks = jvmci->global_compilation_ticks();
- while (!task->is_complete() && !is_compilation_disabled_forever() &&
- ml.wait(JVMCI_COMPILATION_PROGRESS_WAIT_TIMESLICE)) {
- JVMCICompileState* jvmci_compile_state = task->blocking_jvmci_compile_state();
-
- bool progress;
- if (jvmci_compile_state != nullptr) {
- jint ticks = jvmci_compile_state->compilation_ticks();
- progress = (ticks - thread_jvmci_compilation_ticks) != 0;
- JVMCI_event_1("waiting on compilation %d [ticks=%d]", task->compile_id(), ticks);
- thread_jvmci_compilation_ticks = ticks;
- } else {
- // Still waiting on JVMCI compiler queue. This thread may be holding a lock
- // that all JVMCI compiler threads are blocked on. We use the global JVMCI
- // compilation ticks to determine whether JVMCI compilation
- // is still making progress through the JVMCI compiler queue.
- jint ticks = jvmci->global_compilation_ticks();
- progress = (ticks - global_jvmci_compilation_ticks) != 0;
- JVMCI_event_1("waiting on compilation %d to be queued [ticks=%d]", task->compile_id(), ticks);
- global_jvmci_compilation_ticks = ticks;
- }
-
- if (!progress) {
- if (++progress_wait_attempts == JVMCI_COMPILATION_PROGRESS_WAIT_ATTEMPTS) {
- if (PrintCompilation) {
- task->print(tty, "wait for blocking compilation timed out");
- }
- JVMCI_event_1("waiting on compilation %d timed out", task->compile_id());
- break;
- }
- } else {
- progress_wait_attempts = 0;
- }
- }
- task->clear_waiter();
- return task->is_complete();
-}
-#endif
-
/**
* Wait for the compilation task to complete.
*/
@@ -1718,19 +1490,8 @@ void CompileBroker::wait_for_completion(CompileTask* task) {
JavaThread* thread = JavaThread::current();
methodHandle method(thread, task->method());
- bool free_task;
-#if INCLUDE_JVMCI
- AbstractCompiler* comp = compiler(task->comp_level());
- if (!UseJVMCINativeLibrary && comp->is_jvmci() && !task->should_wait_for_compilation()) {
- // It may return before compilation is completed.
- // Note that libjvmci should not pre-emptively unblock
- // a thread waiting for a compilation as it does not call
- // Java code and so is not deadlock prone like jarjvmci.
- free_task = wait_for_jvmci_completion((JVMCICompiler*) comp, task, thread);
- } else
-#endif
+ bool free_task = true;
{
- free_task = true;
// Wait until the task is complete or compilation is shut down.
MonitorLocker ml(thread, CompileTaskWait_lock);
while (!task->is_complete() && !is_compilation_disabled_forever()) {
@@ -1968,9 +1729,6 @@ void CompileBroker::compiler_thread_loop() {
print_compiler_threads(msg);
}
- // Notify compiler that the compiler thread is about to stop
- thread->compiler()->stopping_compiler_thread(thread);
-
free_buffer_blob_if_allocated(thread);
return; // Stop this thread.
}
@@ -2244,69 +2002,6 @@ void CompileBroker::invoke_compiler_on_method(CompileTask* task) {
bool failure_reason_on_C_heap = false;
const char* retry_message = nullptr;
-#if INCLUDE_JVMCI
- if (UseJVMCICompiler && comp != nullptr && comp->is_jvmci()) {
- JVMCICompiler* jvmci = (JVMCICompiler*) comp;
-
- TraceTime t1("compilation", &time);
- EventCompilation event;
- JVMCICompileState compile_state(task, jvmci);
- JVMCIRuntime *runtime = nullptr;
-
- if (JVMCI::in_shutdown()) {
- failure_reason = "in JVMCI shutdown";
- retry_message = "not retryable";
- compilable = ciEnv::MethodCompilable_never;
- } else if (compile_state.target_method_is_old()) {
- // Skip redefined methods
- failure_reason = "redefined method";
- retry_message = "not retryable";
- compilable = ciEnv::MethodCompilable_never;
- } else {
- JVMCIEnv env(thread, &compile_state, __FILE__, __LINE__);
- if (env.init_error() != JNI_OK) {
- const char* msg = env.init_error_msg();
- failure_reason = os::strdup(err_msg("Error attaching to libjvmci (err: %d, %s)",
- env.init_error(), msg == nullptr ? "unknown" : msg), mtJVMCI);
- bool reason_on_C_heap = true;
- // In case of JNI_ENOMEM, there's a good chance a subsequent attempt to create libjvmci or attach to it
- // might succeed. Other errors most likely indicate a non-recoverable error in the JVMCI runtime.
- bool retryable = env.init_error() == JNI_ENOMEM;
- compile_state.set_failure(retryable, failure_reason, reason_on_C_heap);
- }
- if (failure_reason == nullptr) {
- if (WhiteBoxAPI && WhiteBox::compilation_locked) {
- // Must switch to native to block
- ThreadToNativeFromVM ttn(thread);
- whitebox_lock_compilation();
- }
- methodHandle method(thread, target_handle);
- runtime = env.runtime();
- runtime->compile_method(&env, jvmci, method, osr_bci);
-
- failure_reason = compile_state.failure_reason();
- failure_reason_on_C_heap = compile_state.failure_reason_on_C_heap();
- if (!compile_state.retryable()) {
- retry_message = "not retryable";
- compilable = ciEnv::MethodCompilable_not_at_tier;
- }
- if (!task->is_success()) {
- assert(failure_reason != nullptr, "must specify failure_reason");
- }
- }
- }
- if (!task->is_success() && !JVMCI::in_shutdown()) {
- handle_compile_error(thread, task, nullptr, compilable, failure_reason);
- }
- if (event.should_commit()) {
- post_compilation_event(event, task);
- }
-
- if (runtime != nullptr) {
- runtime->post_compile(thread);
- }
- } else
-#endif // INCLUDE_JVMCI
{
NoHandleMark nhm;
ThreadToNativeFromVM ttn(thread);
@@ -2767,15 +2462,6 @@ void CompileBroker::print_times(bool per_compiler, bool aggregate) {
tty->cr();
comp->print_timers();
}
-#if INCLUDE_JVMCI
- if (EnableJVMCI) {
- JVMCICompiler *jvmci_comp = JVMCICompiler::instance(false, JavaThread::current_or_null());
- if (jvmci_comp != nullptr && jvmci_comp != comp) {
- tty->cr();
- jvmci_comp->print_timers();
- }
- }
-#endif
tty->cr();
tty->print_cr(" Total compiled methods : %8u methods", total_compile_count);
diff --git a/src/hotspot/share/compiler/compileBroker.hpp b/src/hotspot/share/compiler/compileBroker.hpp
index cb657b7b601..f1d69c8c0bf 100644
--- a/src/hotspot/share/compiler/compileBroker.hpp
+++ b/src/hotspot/share/compiler/compileBroker.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,13 +33,10 @@
#include "runtime/atomicAccess.hpp"
#include "runtime/perfDataTypes.hpp"
#include "utilities/stack.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciCompiler.hpp"
-#endif
class nmethod;
-#if defined(ASSERT) && COMPILER2_OR_JVMCI
+#if defined(ASSERT) && defined(COMPILER2)
// Stress testing. Dedicated threads revert optimizations based on escape analysis concurrently to
// the running java application. Configured with vm options DeoptimizeObjectsALot*.
class DeoptimizeObjectsALotThread : public JavaThread {
@@ -53,7 +50,7 @@ public:
bool is_hidden_from_external_view() const { return true; }
};
-#endif
+#endif // defined(ASSERT) && defined(COMPILER2)
// CompilerCounters
//
@@ -273,10 +270,6 @@ class CompileBroker: AllStatic {
CompileTask::CompileReason compile_reason,
bool blocking);
static void wait_for_completion(CompileTask* task);
-#if INCLUDE_JVMCI
- static bool wait_for_jvmci_completion(JVMCICompiler* comp, CompileTask* task, JavaThread* thread);
-#endif
-
static void free_buffer_blob_if_allocated(CompilerThread* thread);
static void invoke_compiler_on_method(CompileTask* task);
diff --git a/src/hotspot/share/compiler/compileTask.cpp b/src/hotspot/share/compiler/compileTask.cpp
index 193770b66a0..6a5f706ec7a 100644
--- a/src/hotspot/share/compiler/compileTask.cpp
+++ b/src/hotspot/share/compiler/compileTask.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -57,9 +57,7 @@ CompileTask::CompileTask(int compile_id,
_nm_insts_size(0),
_comp_level(comp_level),
_compiler(CompileBroker::compiler(comp_level)),
- _comp_directive_matcher(method, _compiler),
- JVMCI_ONLY(_has_waiter(_compiler->is_jvmci()) COMMA)
- JVMCI_ONLY(_blocking_jvmci_compile_state(nullptr) COMMA)
+ _comp_directive_matcher(method, static_cast(comp_level)),
_num_inlined_bytecodes(0),
_next(nullptr),
_prev(nullptr),
diff --git a/src/hotspot/share/compiler/compileTask.hpp b/src/hotspot/share/compiler/compileTask.hpp
index 4b48ee63be5..155dc4d139b 100644
--- a/src/hotspot/share/compiler/compileTask.hpp
+++ b/src/hotspot/share/compiler/compileTask.hpp
@@ -35,8 +35,6 @@
class CompileTrainingData;
-JVMCI_ONLY(class JVMCICompileState;)
-
enum class InliningResult { SUCCESS, FAILURE };
inline InliningResult inlining_result_of(bool success) {
@@ -50,7 +48,6 @@ inline InliningResult inlining_result_of(bool success) {
class CompileTask : public CHeapObj {
friend class VMStructs;
- friend class JVMCIVMStructs;
public:
// Different reasons for a compilation
@@ -63,7 +60,6 @@ class CompileTask : public CHeapObj {
Reason_Replay, // ciReplay
Reason_Whitebox, // Whitebox API
Reason_MustBeCompiled, // Used for -Xcomp or AlwaysCompileLoopMethods (see CompilationPolicy::must_be_compiled())
- Reason_Bootstrap, // JVMCI bootstrap
Reason_Count
};
@@ -96,11 +92,6 @@ class CompileTask : public CHeapObj {
int _comp_level;
AbstractCompiler* _compiler;
CompilerDirectiveMatcher _comp_directive_matcher;
-#if INCLUDE_JVMCI
- bool _has_waiter;
- // Compilation state for a blocking JVMCI compilation
- JVMCICompileState* _blocking_jvmci_compile_state;
-#endif
int _num_inlined_bytecodes;
CompileTask* _next;
CompileTask* _prev;
@@ -131,6 +122,7 @@ class CompileTask : public CHeapObj {
bool is_blocking() const { return _is_blocking; }
bool is_success() const { return _is_success; }
DirectiveSet* directive() const { return _comp_directive_matcher.directive_set(); }
+ void transfer_directive(CompilerDirectiveMatcher& matcher) { _comp_directive_matcher.transfer_from(matcher); }
CompileReason compile_reason() const { return _compile_reason; }
CodeSection::csize_t nm_content_size() { return _nm_content_size; }
void set_nm_content_size(CodeSection::csize_t size) { _nm_content_size = size; }
@@ -148,26 +140,6 @@ class CompileTask : public CHeapObj {
return false;
}
}
-#if INCLUDE_JVMCI
- bool should_wait_for_compilation() const {
- // Wait for blocking compilation to finish.
- switch (_compile_reason) {
- case Reason_Replay:
- case Reason_Whitebox:
- case Reason_Bootstrap:
- return _is_blocking;
- default:
- return false;
- }
- }
-
- bool has_waiter() const { return _has_waiter; }
- void clear_waiter() { _has_waiter = false; }
- JVMCICompileState* blocking_jvmci_compile_state() const { return _blocking_jvmci_compile_state; }
- void set_blocking_jvmci_compile_state(JVMCICompileState* state) {
- _blocking_jvmci_compile_state = state;
- }
-#endif
void mark_complete() { _is_complete = true; }
void mark_success() { _is_success = true; }
diff --git a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp
index cf7744cfe03..69c9bc585f7 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.cpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.cpp
@@ -39,8 +39,7 @@
const char* compilertype2name_tab[compiler_number_of_types] = {
"",
"c1",
- "c2",
- "jvmci"
+ "c2"
};
CompilationModeFlag::Mode CompilationModeFlag::_mode = CompilationModeFlag::Mode::NORMAL;
@@ -63,17 +62,11 @@ bool CompilationModeFlag::initialize() {
_mode = Mode::QUICK_ONLY;
}
} else if (strcmp(CompilationMode, "high-only") == 0) {
- if (!CompilerConfig::has_c2() && !CompilerConfig::is_jvmci_compiler()) {
- print_mode_unavailable("high-only", "there is no c2 or jvmci compiler present");
+ if (!CompilerConfig::has_c2()) {
+ print_mode_unavailable("high-only", "there is no c2 compiler present");
} else {
_mode = Mode::HIGH_ONLY;
}
- } else if (strcmp(CompilationMode, "high-only-quick-internal") == 0) {
- if (!CompilerConfig::has_c1() || !CompilerConfig::is_jvmci_compiler()) {
- print_mode_unavailable("high-only-quick-internal", "there is no c1 and jvmci compiler present");
- } else {
- _mode = Mode::HIGH_ONLY_QUICK_INTERNAL;
- }
} else {
print_error();
return false;
@@ -84,12 +77,8 @@ bool CompilationModeFlag::initialize() {
if (normal()) {
if (CompilerConfig::is_c1_simple_only()) {
_mode = Mode::QUICK_ONLY;
- } else if (CompilerConfig::is_c2_or_jvmci_compiler_only()) {
+ } else if (CompilerConfig::is_c2_only()) {
_mode = Mode::HIGH_ONLY;
- } else if (CompilerConfig::is_jvmci_compiler_enabled() && CompilerConfig::is_c1_enabled() && !TieredCompilation) {
- warning("Disabling tiered compilation with non-native JVMCI compiler is not recommended, "
- "disabling intermediate compilation levels instead. ");
- _mode = Mode::HIGH_ONLY_QUICK_INTERNAL;
}
}
return true;
@@ -102,14 +91,10 @@ void CompilationModeFlag::print_error() {
jio_fprintf(defaultStream::error_stream(), "%s quick-only", comma ? "," : "");
comma = true;
}
- if (CompilerConfig::has_c2() || CompilerConfig::has_jvmci()) {
+ if (CompilerConfig::has_c2()) {
jio_fprintf(defaultStream::error_stream(), "%s high-only", comma ? "," : "");
comma = true;
}
- if (CompilerConfig::has_c1() && CompilerConfig::has_jvmci()) {
- jio_fprintf(defaultStream::error_stream(), "%s high-only-quick-internal", comma ? "," : "");
- comma = true;
- }
jio_fprintf(defaultStream::error_stream(), "\n");
}
@@ -206,7 +191,7 @@ void CompilerConfig::set_legacy_emulation_flags() {
if (!FLAG_IS_DEFAULT(CompileThreshold) ||
!FLAG_IS_DEFAULT(OnStackReplacePercentage) ||
!FLAG_IS_DEFAULT(InterpreterProfilePercentage)) {
- if (CompilerConfig::is_c1_only() || CompilerConfig::is_c2_or_jvmci_compiler_only()) {
+ if (CompilerConfig::is_c1_only() || CompilerConfig::is_c2_only()) {
// This function is called before these flags are validated. In order to not confuse the user with extraneous
// error messages, we check the validity of these flags here and bail out if any of them are invalid.
if (!check_legacy_flags()) {
@@ -238,7 +223,7 @@ void CompilerConfig::set_legacy_emulation_flags() {
FLAG_SET_ERGO(Tier3MinInvocationThreshold, threshold);
FLAG_SET_ERGO(Tier3CompileThreshold, threshold);
FLAG_SET_ERGO(Tier3BackEdgeThreshold, osr_threshold);
- if (CompilerConfig::is_c2_or_jvmci_compiler_only()) {
+ if (CompilerConfig::is_c2_only()) {
FLAG_SET_ERGO(Tier4InvocationThreshold, threshold);
FLAG_SET_ERGO(Tier4MinInvocationThreshold, threshold);
FLAG_SET_ERGO(Tier4CompileThreshold, threshold);
@@ -401,54 +386,6 @@ void CompilerConfig::set_compilation_policy_flags() {
}
-#if INCLUDE_JVMCI
-void CompilerConfig::set_jvmci_specific_flags() {
- if (UseJVMCICompiler) {
- if (FLAG_IS_DEFAULT(TypeProfileWidth)) {
- FLAG_SET_DEFAULT(TypeProfileWidth, 8);
- }
- if (FLAG_IS_DEFAULT(TypeProfileLevel)) {
- FLAG_SET_DEFAULT(TypeProfileLevel, 0);
- }
-
- if (UseJVMCINativeLibrary) {
- // SVM compiled code requires more stack space
- if (FLAG_IS_DEFAULT(CompilerThreadStackSize)) {
- // Duplicate logic in the implementations of os::create_thread
- // so that we can then double the computed stack size. Once
- // the stack size requirements of SVM are better understood,
- // this logic can be pushed down into os::create_thread.
- int stack_size = CompilerThreadStackSize;
- if (stack_size == 0) {
- stack_size = VMThreadStackSize;
- }
- if (stack_size != 0) {
- FLAG_SET_DEFAULT(CompilerThreadStackSize, stack_size * 2);
- }
- }
- } else {
- // JVMCI needs values not less than defaults
- if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) {
- FLAG_SET_DEFAULT(ReservedCodeCacheSize, MAX2(64*M, ReservedCodeCacheSize));
- }
- if (FLAG_IS_DEFAULT(InitialCodeCacheSize)) {
- FLAG_SET_DEFAULT(InitialCodeCacheSize, MAX2(16*M, InitialCodeCacheSize));
- }
- if (FLAG_IS_DEFAULT(Tier3DelayOn)) {
- // This effectively prevents the compile broker scheduling tier 2
- // (i.e., limited C1 profiling) compilations instead of tier 3
- // (i.e., full C1 profiling) compilations when the tier 4 queue
- // backs up (which is quite likely when using a non-AOT compiled JVMCI
- // compiler). The observation based on jargraal is that the downside
- // of skipping full profiling is much worse for performance than the
- // queue backing up.
- FLAG_SET_DEFAULT(Tier3DelayOn, 100000);
- }
- } // !UseJVMCINativeLibrary
- } // UseJVMCICompiler
-}
-#endif // INCLUDE_JVMCI
-
bool CompilerConfig::check_args_consistency(bool status) {
// Check lower bounds of the code cache
// Template Interpreter code is approximately 3X larger in debug builds.
@@ -512,19 +449,6 @@ bool CompilerConfig::check_args_consistency(bool status) {
warning("SegmentedCodeCache has no meaningful effect with -Xint");
FLAG_SET_DEFAULT(SegmentedCodeCache, false);
}
-#if INCLUDE_JVMCI
- if (EnableJVMCI || UseJVMCICompiler) {
- if (!FLAG_IS_DEFAULT(EnableJVMCI) || !FLAG_IS_DEFAULT(UseJVMCICompiler)) {
- warning("JVMCI Compiler disabled due to -Xint.");
- }
- FLAG_SET_CMDLINE(EnableJVMCI, false);
- FLAG_SET_CMDLINE(UseJVMCICompiler, false);
- }
-#endif
- } else {
-#if INCLUDE_JVMCI
- status = status && JVMCIGlobals::check_jvmci_flags_are_consistent();
-#endif
}
return status;
@@ -538,15 +462,6 @@ void CompilerConfig::ergo_initialize() {
set_legacy_emulation_flags();
set_compilation_policy_flags();
-#if INCLUDE_JVMCI
- // Check that JVMCI supports selected GC.
- // Should be done after GCConfig::initialize() was called.
- JVMCIGlobals::check_jvmci_supported_gc();
-
- // Do JVMCI specific settings
- set_jvmci_specific_flags();
-#endif
-
if (UseOnStackReplacement && !UseLoopCounter) {
warning("On-stack-replacement requires loop counters; enabling loop counters");
FLAG_SET_DEFAULT(UseLoopCounter, true);
diff --git a/src/hotspot/share/compiler/compilerDefinitions.hpp b/src/hotspot/share/compiler/compilerDefinitions.hpp
index e8ba977f705..e086fc8add6 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.hpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,7 +33,6 @@ enum CompilerType : u1 {
compiler_none,
compiler_c1,
compiler_c2,
- compiler_jvmci,
compiler_number_of_types
};
@@ -59,7 +58,7 @@ enum CompLevel : s1 {
CompLevel_simple = 1, // C1
CompLevel_limited_profile = 2, // C1, invocation & backedge counters
CompLevel_full_profile = 3, // C1, invocation & backedge counters + mdo
- CompLevel_full_optimization = 4, // C2 or JVMCI
+ CompLevel_full_optimization = 4, // C2
CompLevel_count = 5
};
@@ -67,24 +66,17 @@ class CompilationModeFlag : AllStatic {
enum class Mode {
NORMAL,
QUICK_ONLY,
- HIGH_ONLY,
- HIGH_ONLY_QUICK_INTERNAL
+ HIGH_ONLY
};
static Mode _mode;
static void print_error();
public:
static bool initialize();
- static bool normal() { return _mode == Mode::NORMAL; }
- static bool quick_only() { return _mode == Mode::QUICK_ONLY; }
- static bool high_only() { return _mode == Mode::HIGH_ONLY; }
- static bool high_only_quick_internal() { return _mode == Mode::HIGH_ONLY_QUICK_INTERNAL; }
+ static bool normal() { return _mode == Mode::NORMAL; }
+ static bool quick_only() { return _mode == Mode::QUICK_ONLY; }
+ static bool high_only() { return _mode == Mode::HIGH_ONLY; }
- static bool disable_intermediate() { return high_only() || high_only_quick_internal(); }
- static bool quick_internal() { return !high_only(); }
-
- static void set_high_only_quick_internal() { _mode = Mode::HIGH_ONLY_QUICK_INTERNAL; }
- static void set_quick_only() { _mode = Mode::QUICK_ONLY; }
- static void set_high_only() { _mode = Mode::HIGH_ONLY; }
+ static bool disable_intermediate() { return high_only(); }
};
inline bool is_c1_compile(int comp_level) {
@@ -119,41 +111,27 @@ public:
// Which compilers are baked in?
constexpr static bool has_c1() { return COMPILER1_PRESENT(true) NOT_COMPILER1(false); }
constexpr static bool has_c2() { return COMPILER2_PRESENT(true) NOT_COMPILER2(false); }
- constexpr static bool has_jvmci() { return JVMCI_ONLY(true) NOT_JVMCI(false); }
- constexpr static bool has_tiered() { return has_c1() && (has_c2() || has_jvmci()); }
+ constexpr static bool has_tiered() { return has_c1() && has_c2(); }
- inline static bool is_jvmci_compiler();
- inline static bool is_jvmci();
inline static bool is_interpreter_only();
// is_*_only() functions describe situations in which the JVM is in one way or another
- // forced to use a particular compiler or their combination. The constraint functions
- // deliberately ignore the fact that there may also be methods installed
- // through JVMCI (where the JVMCI compiler was invoked not through the broker). Be sure
- // to check for those (using is_jvmci()) in situations where it matters.
+ // forced to use a particular compiler or their combination.
inline static bool is_tiered();
inline static bool is_c1_enabled();
inline static bool is_c1_only();
inline static bool is_c1_simple_only();
- inline static bool is_c1_or_interpreter_only_no_jvmci();
- inline static bool is_c1_only_no_jvmci();
+ inline static bool is_c1_or_interpreter_only();
inline static bool is_c1_profiling();
- inline static bool is_jvmci_compiler_enabled();
- inline static bool is_jvmci_compiler_only();
inline static bool is_c2_only();
inline static bool is_c2_enabled();
- inline static bool is_c2_or_jvmci_compiler_only();
- inline static bool is_c2_or_jvmci_compiler_enabled();
-
- inline static CompilerType compiler_type();
private:
static void set_compilation_policy_flags();
- static void set_jvmci_specific_flags();
static void set_legacy_emulation_flags();
};
diff --git a/src/hotspot/share/compiler/compilerDefinitions.inline.hpp b/src/hotspot/share/compiler/compilerDefinitions.inline.hpp
index 8bf70477cb3..e9f164fa7f0 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.inline.hpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,20 +35,13 @@ inline bool CompilerConfig::is_interpreter_only() {
return Arguments::is_interpreter_only() || TieredStopAtLevel == CompLevel_none;
}
-inline bool CompilerConfig::is_jvmci_compiler() { return JVMCI_ONLY(has_jvmci() && UseJVMCICompiler) NOT_JVMCI(false); }
-inline bool CompilerConfig::is_jvmci() { return JVMCI_ONLY(has_jvmci() && EnableJVMCI ) NOT_JVMCI(false); }
-
// is_*_only() functions describe situations in which the JVM is in one way or another
-// forced to use a particular compiler or their combination. The constraint functions
-// deliberately ignore the fact that there may also be methods installed
-// through JVMCI (where the JVMCI compiler was invoked not through the broker). Be sure
-// to check for those (using is_jvmci()) in situations where it matters.
-//
+// forced to use a particular compiler or their combination.
// Is the JVM in a configuration that permits only c1-compiled methods (level 1,2,3)?
inline bool CompilerConfig::is_c1_only() {
if (!is_interpreter_only() && has_c1()) {
- const bool c1_only = !has_c2() && !is_jvmci_compiler();
+ const bool c1_only = !has_c2();
const bool tiered_degraded_to_c1_only = TieredCompilation && TieredStopAtLevel >= CompLevel_simple && TieredStopAtLevel < CompLevel_full_optimization;
const bool c1_only_compilation_mode = CompilationModeFlag::quick_only();
return c1_only || tiered_degraded_to_c1_only || c1_only_compilation_mode;
@@ -56,13 +49,8 @@ inline bool CompilerConfig::is_c1_only() {
return false;
}
-inline bool CompilerConfig::is_c1_or_interpreter_only_no_jvmci() {
- assert(!is_jvmci_compiler() || is_jvmci(), "JVMCI compiler implies enabled JVMCI");
- return !is_jvmci() && (is_interpreter_only() || is_c1_only());
-}
-
-inline bool CompilerConfig::is_c1_only_no_jvmci() {
- return is_c1_only() && !is_jvmci();
+inline bool CompilerConfig::is_c1_or_interpreter_only() {
+ return is_interpreter_only() || is_c1_only();
}
// Is the JVM in a configuration that permits only c1-compiled methods at level 1?
@@ -77,18 +65,14 @@ inline bool CompilerConfig::is_c1_simple_only() {
}
inline bool CompilerConfig::is_c2_enabled() {
- return has_c2() && !is_interpreter_only() && !is_c1_only() && !is_jvmci_compiler();
+ return has_c2() && !is_interpreter_only() && !is_c1_only();
}
-inline bool CompilerConfig::is_jvmci_compiler_enabled() {
- return is_jvmci_compiler() && !is_interpreter_only() && !is_c1_only();
-}
// Is the JVM in a configuration that permits only c2-compiled methods?
inline bool CompilerConfig::is_c2_only() {
if (is_c2_enabled()) {
const bool c2_only = !has_c1();
- // There is no JVMCI compiler to replace C2 in the broker, and the user (or ergonomics)
- // is forcing C1 off.
+ // The user (or ergonomics) is forcing C1 off.
const bool c2_only_compilation_mode = CompilationModeFlag::high_only();
const bool tiered_off = !TieredCompilation;
return c2_only || c2_only_compilation_mode || tiered_off;
@@ -96,30 +80,14 @@ inline bool CompilerConfig::is_c2_only() {
return false;
}
-// Is the JVM in a configuration that permits only jvmci-compiled methods?
-inline bool CompilerConfig::is_jvmci_compiler_only() {
- if (is_jvmci_compiler_enabled()) {
- const bool jvmci_compiler_only = !has_c1();
- // JVMCI compiler replaced C2 and the user (or ergonomics) is forcing C1 off.
- const bool jvmci_only_compilation_mode = CompilationModeFlag::high_only();
- const bool tiered_off = !TieredCompilation;
- return jvmci_compiler_only || jvmci_only_compilation_mode || tiered_off;
- }
- return false;
-}
-
-inline bool CompilerConfig::is_c2_or_jvmci_compiler_only() {
- return is_c2_only() || is_jvmci_compiler_only();
-}
-
-// Tiered is basically C1 & (C2 | JVMCI) minus all the odd cases with restrictions.
+// Tiered is basically C1 & C2 minus all the odd cases with restrictions.
inline bool CompilerConfig::is_tiered() {
assert(!is_c1_simple_only() || is_c1_only(), "c1 simple mode must imply c1-only mode");
- return has_tiered() && !is_interpreter_only() && !is_c1_only() && !is_c2_or_jvmci_compiler_only();
+ return has_tiered() && !is_interpreter_only() && !is_c1_only() && !is_c2_only();
}
inline bool CompilerConfig::is_c1_enabled() {
- return has_c1() && !is_interpreter_only() && !is_c2_or_jvmci_compiler_only();
+ return has_c1() && !is_interpreter_only() && !is_c2_only();
}
inline bool CompilerConfig::is_c1_profiling() {
@@ -128,21 +96,4 @@ inline bool CompilerConfig::is_c1_profiling() {
return c1_only_profiling || tiered;
}
-inline bool CompilerConfig::is_c2_or_jvmci_compiler_enabled() {
- return is_c2_enabled() || is_jvmci_compiler_enabled();
-}
-
-// Return type of most optimizing compiler which is used
-inline CompilerType CompilerConfig::compiler_type() {
- CompilerType compiler_type = CompilerType::compiler_none; // Interpreter only
- if (CompilerConfig::is_c2_enabled()) {
- compiler_type = CompilerType::compiler_c2;
- } else if (CompilerConfig::is_jvmci_compiler_enabled()) {
- compiler_type = CompilerType::compiler_jvmci;
- } else if (CompilerConfig::is_c1_enabled()) {
- compiler_type = CompilerType::compiler_c1;
- }
- return compiler_type;
-}
-
#endif // SHARE_COMPILER_COMPILERDEFINITIONS_INLINE_HPP
diff --git a/src/hotspot/share/compiler/compilerDirectives.cpp b/src/hotspot/share/compiler/compilerDirectives.cpp
index d0042d0e16c..1b0bdc99f08 100644
--- a/src/hotspot/share/compiler/compilerDirectives.cpp
+++ b/src/hotspot/share/compiler/compilerDirectives.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
#include "ci/ciMethod.hpp"
#include "ci/ciUtilities.inline.hpp"
#include "compiler/abstractCompiler.hpp"
+#include "compiler/compileBroker.hpp"
#include "compiler/compilerDefinitions.inline.hpp"
#include "compiler/compilerDirectives.hpp"
#include "compiler/compilerOracle.hpp"
@@ -190,7 +191,7 @@ DirectiveSet* CompilerDirectives::get_for(AbstractCompiler *comp) {
return _c2_store;
} else {
// use c1_store as default
- assert(comp->is_c1() || comp->is_jvmci(), "");
+ assert(comp->is_c1(), "");
return _c1_store;
}
}
@@ -378,7 +379,7 @@ class DirectiveSetPtr {
// - if some option is changed we need to copy directiveset since it no longer can be shared
// - Need to free copy after use
// - Requires a modified bit so we don't overwrite options that is set by directives
-DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle& method) {
+DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle& method, int comp_level) {
// Early bail out - checking all options is expensive - we rely on them not being used
// Only set a flag if it has not been modified and value changes.
// Only copy set if a flag needs to be set
@@ -397,7 +398,7 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle
// All CompileCommands are not equal so this gets a bit verbose
// When CompileCommands have been refactored less clutter will remain.
- if (CompilerOracle::should_break_at(method)) {
+ if (CompilerOracle::should_break_at(method, static_cast(comp_level))) {
// If the directives didn't have 'BreakAtCompile' or 'BreakAtExecute',
// the sub-command 'Break' of the 'CompileCommand' would become effective.
if (!_modified[BreakAtCompileIndex]) {
@@ -414,13 +415,13 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle
}
}
- if (CompilerOracle::should_print(method)) {
+ if (CompilerOracle::should_print(method, static_cast(comp_level))) {
if (!_modified[PrintAssemblyIndex]) {
set.cloned()->PrintAssemblyOption = true;
}
}
// Exclude as in should not compile == Enabled
- if (CompilerOracle::should_exclude(method)) {
+ if (CompilerOracle::should_exclude(method, static_cast(comp_level))) {
if (!_modified[ExcludeIndex]) {
set.cloned()->ExcludeOption = true;
}
@@ -547,7 +548,7 @@ bool DirectiveSet::should_inline(ciMethod* inlinee) {
return false;
}
-bool DirectiveSet::should_not_inline(ciMethod* inlinee) {
+bool DirectiveSet::should_not_inline(ciMethod* inlinee, int comp_level) {
inlinee->check_is_loaded();
VM_ENTRY_MARK;
methodHandle mh(THREAD, inlinee->get_Method());
@@ -556,7 +557,7 @@ bool DirectiveSet::should_not_inline(ciMethod* inlinee) {
return matches_inline(mh, InlineMatcher::dont_inline);
}
if (!CompilerDirectivesIgnoreCompileCommandsOption) {
- return CompilerOracle::should_not_inline(mh);
+ return CompilerOracle::should_not_inline(mh, static_cast(comp_level));
}
return false;
}
@@ -670,9 +671,9 @@ void DirectivesStack::init() {
char str[] = "*.*";
const char* error_msg = nullptr;
_default_directives->add_match(str, error_msg);
-#if defined(COMPILER1) || INCLUDE_JVMCI
+#ifdef COMPILER1
_default_directives->_c1_store->EnableOption = true;
-#endif
+#endif // COMPILER1
#ifdef COMPILER2
if (CompilerConfig::is_c2_enabled()) {
_default_directives->_c2_store->EnableOption = true;
@@ -755,7 +756,7 @@ void DirectivesStack::release(DirectiveSet* set) {
assert(set != nullptr, "Never nullptr");
MutexLocker locker(DirectivesStack_lock, Mutex::_no_safepoint_check_flag);
if (set->is_exclusive_copy()) {
- // Old CompilecCmmands forced us to create an exclusive copy
+ // Old CompileCommands forced us to create an exclusive copy
delete set;
} else {
assert(set->directive() != nullptr, "Never nullptr");
@@ -772,8 +773,9 @@ void DirectivesStack::release(CompilerDirectives* dir) {
}
}
-DirectiveSet* DirectivesStack::getMatchingDirective(const methodHandle& method, AbstractCompiler *comp) {
+DirectiveSet* DirectivesStack::getMatchingDirective(const methodHandle& method, int comp_level) {
assert(_depth > 0, "Must never be empty");
+ AbstractCompiler* comp = CompileBroker::compiler(comp_level);
DirectiveSet* match = nullptr;
{
@@ -798,5 +800,5 @@ DirectiveSet* DirectivesStack::getMatchingDirective(const methodHandle& method,
guarantee(match != nullptr, "There should always be a default directive that matches");
// Check for legacy compile commands update, without DirectivesStack_lock
- return match->compilecommand_compatibility_init(method);
+ return match->compilecommand_compatibility_init(method, comp_level);
}
diff --git a/src/hotspot/share/compiler/compilerDirectives.hpp b/src/hotspot/share/compiler/compilerDirectives.hpp
index 04873aab664..ae814cdc491 100644
--- a/src/hotspot/share/compiler/compilerDirectives.hpp
+++ b/src/hotspot/share/compiler/compilerDirectives.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -115,7 +115,7 @@ private:
static int _depth;
static void pop_inner(); // no lock version of pop
- static DirectiveSet* getMatchingDirective(const methodHandle& mh, AbstractCompiler* comp);
+ static DirectiveSet* getMatchingDirective(const methodHandle& mh, int comp_level);
static DirectiveSet* getDefaultDirective(AbstractCompiler* comp);
static void release(DirectiveSet* set);
static void release(CompilerDirectives* dir);
@@ -145,10 +145,10 @@ public:
bool parse_and_add_inline(char* str, const char*& error_msg);
void append_inline(InlineMatcher* m);
bool should_inline(ciMethod* inlinee);
- bool should_not_inline(ciMethod* inlinee);
+ bool should_not_inline(ciMethod* inlinee, int comp_level);
bool should_delay_inline(ciMethod* inlinee);
void print_inline(outputStream* st);
- DirectiveSet* compilecommand_compatibility_init(const methodHandle& method);
+ DirectiveSet* compilecommand_compatibility_init(const methodHandle& method, int comp_level);
bool is_exclusive_copy() { return _directive == nullptr; }
bool matches_inline(const methodHandle& method, int inline_action);
static DirectiveSet* clone(DirectiveSet const* src);
@@ -335,21 +335,35 @@ public:
class CompilerDirectiveMatcher {
private:
DirectiveSet* _match;
+
+ void release_match() {
+ if (_match != nullptr) {
+ DirectivesStack::release(_match);
+ _match = nullptr;
+ }
+ }
+
public:
// Use this constructor to get default directive
CompilerDirectiveMatcher(AbstractCompiler* comp) {
_match = DirectivesStack::getDefaultDirective(comp);
}
- CompilerDirectiveMatcher(const methodHandle& mh, AbstractCompiler* comp) {
- _match = DirectivesStack::getMatchingDirective(mh, comp);
+ CompilerDirectiveMatcher(const methodHandle& mh, int comp_level) {
+ _match = DirectivesStack::getMatchingDirective(mh, comp_level);
}
~CompilerDirectiveMatcher() {
- DirectivesStack::release(_match);
+ release_match();
}
DirectiveSet* directive_set() const { return _match; }
+
+ void transfer_from(CompilerDirectiveMatcher& src) {
+ release_match();
+ _match = src._match;
+ src._match = nullptr;
+ }
};
#endif // SHARE_COMPILER_COMPILERDIRECTIVES_HPP
diff --git a/src/hotspot/share/compiler/compilerOracle.cpp b/src/hotspot/share/compiler/compilerOracle.cpp
index 5bcd01a4d09..7d2b4de1182 100644
--- a/src/hotspot/share/compiler/compilerOracle.cpp
+++ b/src/hotspot/share/compiler/compilerOracle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,37 @@ static const char* const default_compile_commands[] = {
#endif
nullptr };
+// CompLevel | -XX:CompileCommand bitmask
+// ----------------------------------------------------
+// 0 (interpreter) | N/A
+// 1 (C1) | 1
+// 2 (C1 + counters) | 10
+// 3 (C1 + counters + mdo) | 100
+// 4 (C2) | 1000
+// All C1 levels | 111
+// All levels | 1111
+
+static const int comp_level_bitmask[CompLevel_count] = {0, 1, 10, 100, 1000};
+static const int comp_level_bitmask_all_levels = 1111;
+static const intx default_comp_level_argument = comp_level_bitmask_all_levels;
+
+inline bool bitmask_applies_to_comp_level(int bitmask, int comp_level) {
+ assert(comp_level > CompLevel_none && comp_level < CompLevel_count, "CompLevel out of bounds");
+ return (bitmask / comp_level_bitmask[comp_level]) % 10 == 1;
+}
+
+static bool is_valid_comp_level_bitmask(intx bitmask) {
+ if (bitmask < 0 || bitmask > comp_level_bitmask_all_levels) {
+ return false;
+ }
+ for (; bitmask != 0; bitmask /= 10) {
+ if (bitmask % 10 > 1) {
+ return false;
+ }
+ }
+ return true;
+}
+
static const char* optiontype_names[] = {
#define enum_of_types(type, name) name,
OPTION_TYPES(enum_of_types)
@@ -456,36 +487,56 @@ template bool CompilerOracle::option_matches_type(CompileCommandEnum optio
template bool CompilerOracle::option_matches_type(CompileCommandEnum option, ccstr& value);
template bool CompilerOracle::option_matches_type(CompileCommandEnum option, double& value);
+bool CompilerOracle::applies_to_comp_level(const methodHandle& method, CompileCommandEnum command, CompLevel current_level) {
+ if (current_level == CompLevel_none) {
+ return false;
+ }
+
+ intx bitmask = 0;
+ if (!has_option_value(method, command, bitmask)) {
+ return false;
+ }
+
+ // Since we don't have bitmask for interpreter level (0), but still need to call CompilerOracle::should_print()
+ // from collect_profiled_methods() in java.cpp, a special value of CompLevel_any produces a match with any bitmask, even 0
+ return current_level == CompLevel_any
+ || bitmask_applies_to_comp_level(bitmask, current_level);
+}
+
bool CompilerOracle::has_option(const methodHandle& method, CompileCommandEnum option) {
bool value = false;
has_option_value(method, option, value);
return value;
}
-bool CompilerOracle::should_exclude(const methodHandle& method) {
- if (check_predicate(CompileCommandEnum::Exclude, method)) {
+bool CompilerOracle::should_exclude(const methodHandle& method, const CompLevel level) {
+ if (has_exclude(method, level)) {
return true;
}
if (has_command(CompileCommandEnum::CompileOnly)) {
- return !check_predicate(CompileCommandEnum::CompileOnly, method);
+ return !applies_to_comp_level(method, CompileCommandEnum::CompileOnly, level);
}
return false;
}
+bool CompilerOracle::has_exclude(const methodHandle& method, const CompLevel level) {
+ return applies_to_comp_level(method, CompileCommandEnum::Exclude, level);
+}
+
bool CompilerOracle::should_inline(const methodHandle& method) {
return (check_predicate(CompileCommandEnum::Inline, method));
}
-bool CompilerOracle::should_not_inline(const methodHandle& method) {
- return check_predicate(CompileCommandEnum::DontInline, method) || check_predicate(CompileCommandEnum::Exclude, method);
+bool CompilerOracle::should_not_inline(const methodHandle& method, const CompLevel level) {
+ return check_predicate(CompileCommandEnum::DontInline, method) || has_exclude(method, level);
}
bool CompilerOracle::should_delay_inline(const methodHandle& method) {
return (check_predicate(CompileCommandEnum::DelayInline, method));
}
-bool CompilerOracle::should_print(const methodHandle& method) {
- return check_predicate(CompileCommandEnum::Print, method);
+bool CompilerOracle::should_print(const methodHandle& method, const CompLevel level) {
+ return applies_to_comp_level(method, CompileCommandEnum::Print, level);
}
bool CompilerOracle::should_print_methods() {
@@ -505,8 +556,8 @@ bool CompilerOracle::should_log(const methodHandle& method) {
return (check_predicate(CompileCommandEnum::Log, method));
}
-bool CompilerOracle::should_break_at(const methodHandle& method) {
- return check_predicate(CompileCommandEnum::Break, method);
+bool CompilerOracle::should_break_at(const methodHandle& method, const CompLevel level) {
+ return applies_to_comp_level(method, CompileCommandEnum::Break, level);
}
void CompilerOracle::tag_blackhole_if_possible(const methodHandle& method) {
@@ -678,6 +729,19 @@ static void usage() {
tty->print_cr("from inlining, whereas the 'compileonly' command only excludes methods from");
tty->print_cr("top-level compilations (i.e. they can still be inlined into other compilation units).");
tty->cr();
+ tty->print_cr("Compilation levels can be specified in the 'compileonly', 'exclude', 'print',");
+ tty->print_cr("and 'break' commands using a binary bitmask as an optional value:");
+ tty->print_cr(" -XX:CompileCommand=exclude,java/*.*,1011 -XX:CompileCommand=print,java/*.*,100");
+ tty->cr();
+ tty->print_cr("The bitmask is calculated by summing the desired compilation level values:");
+ tty->print_cr(" C1 without profiling = 1");
+ tty->print_cr(" C1 with limited profiling = 10");
+ tty->print_cr(" C1 with full profiling = 100");
+ tty->print_cr(" C2 = 1000");
+ tty->cr();
+ tty->print_cr("Note: Excluding specific compilation levels may disrupt normal state transitions");
+ tty->print_cr("between the levels, as the VM will not automatically work around the excluded ones.");
+ tty->cr();
};
static int skip_whitespace(char* &line) {
@@ -712,7 +776,7 @@ static bool parseMemLimit(const char* line, intx& value, int& bytes_read, char*
size_t s = 0;
char* end;
if (!parse_integer(line, &end, &s)) {
- jio_snprintf(errorbuf, buf_size, "MemLimit: invalid value");
+ jio_snprintf(errorbuf, buf_size, ": invalid integer: '%.20s'", line);
return false;
}
bytes_read = (int)(end - line);
@@ -726,7 +790,7 @@ static bool parseMemLimit(const char* line, intx& value, int& bytes_read, char*
// ok, this is the default
bytes_read += 5;
} else {
- jio_snprintf(errorbuf, buf_size, "MemLimit: invalid option");
+ jio_snprintf(errorbuf, buf_size, ": invalid suffix: '%.6s'", end);
return false;
}
}
@@ -751,7 +815,7 @@ static bool parseMemStat(const char* line, uintx& value, int& bytes_read, char*
});
#undef IF_ENUM_STRING
- jio_snprintf(errorbuf, buf_size, "MemStat: invalid option");
+ jio_snprintf(errorbuf, buf_size, ": invalid option: '%.8s'", line);
return false;
}
@@ -763,21 +827,42 @@ static bool scan_value(enum OptionType type, char* line, int& total_bytes_read,
const char* type_str = optiontype2name(type);
int skipped = skip_whitespace(line);
total_bytes_read += skipped;
+ char parse_error_buf[80] = {};
+
if (type == OptionType::Intx) {
intx value;
bool success = false;
- if (option == CompileCommandEnum::MemLimit) {
- // Special parsing for MemLimit
- success = parseMemLimit(line, value, bytes_read, errorbuf, buf_size);
- } else {
- // Is it a raw number?
- success = sscanf(line, "%zd%n", &value, &bytes_read) == 1;
+ switch (option) {
+ case CompileCommandEnum::MemLimit:
+ // Special parsing for MemLimit
+ success = parseMemLimit(line, value, bytes_read, parse_error_buf, sizeof(parse_error_buf));
+ break;
+ case CompileCommandEnum::Break:
+ case CompileCommandEnum::CompileOnly:
+ case CompileCommandEnum::Exclude:
+ case CompileCommandEnum::Print:
+ // In the commands above the parameter used to be a boolean. Now it is an int (a compilation level mask).
+ // For compatibility with previous versions we keep it optional. If user did not specify the mask, assume default value
+ if (*line == '\0') {
+ value = default_comp_level_argument;
+ success = true;
+ } else {
+ success = sscanf(line, "%zd%n", &value, &bytes_read) == 1;
+ if (success && !is_valid_comp_level_bitmask(value)) {
+ jio_snprintf(parse_error_buf, sizeof(parse_error_buf), ": invalid compilation level bitmask '%.*s'", bytes_read, line);
+ success = false;
+ }
+ }
+ break;
+ default:
+ // Is it a raw number?
+ success = sscanf(line, "%zd%n", &value, &bytes_read) == 1;
}
if (success) {
total_bytes_read += bytes_read;
return register_command(matcher, option, errorbuf, buf_size, value);
} else {
- jio_snprintf(errorbuf, buf_size, "Value cannot be read for option '%s' of type '%s'", ccname, type_str);
+ jio_snprintf(errorbuf, buf_size, "Value cannot be read for option '%s' of type '%s'%s", ccname, type_str, parse_error_buf);
return false;
}
} else if (type == OptionType::Uintx) {
@@ -785,7 +870,7 @@ static bool scan_value(enum OptionType type, char* line, int& total_bytes_read,
bool success = false;
if (option == CompileCommandEnum::MemStat) {
// Special parsing for MemStat
- success = parseMemStat(line, value, bytes_read, errorbuf, buf_size);
+ success = parseMemStat(line, value, bytes_read, parse_error_buf, sizeof(parse_error_buf));
} else {
// parse as raw number
success = sscanf(line, "%zu%n", &value, &bytes_read) == 1;
@@ -794,7 +879,7 @@ static bool scan_value(enum OptionType type, char* line, int& total_bytes_read,
total_bytes_read += bytes_read;
return register_command(matcher, option, errorbuf, buf_size, value);
} else {
- jio_snprintf(errorbuf, buf_size, "Value cannot be read for option '%s' of type '%s'", ccname, type_str);
+ jio_snprintf(errorbuf, buf_size, "Value cannot be read for option '%s' of type '%s'%s", ccname, type_str, parse_error_buf);
return false;
}
} else if (type == OptionType::Ccstr) {
@@ -1089,17 +1174,25 @@ bool CompilerOracle::parse_from_line(char* line) {
return false;
}
return true;
- } else if (option == CompileCommandEnum::MemStat) {
- // MemStat default action is to collect data but to not print
- if (!register_command(matcher, option, error_buf, sizeof(error_buf), (uintx)MemStatAction::collect)) {
+ }
+
+ switch (option) {
+ case CompileCommandEnum::Break:
+ case CompileCommandEnum::CompileOnly:
+ case CompileCommandEnum::Exclude:
+ case CompileCommandEnum::Print:
+ break;
+ case CompileCommandEnum::MemStat:
+ // MemStat default action is to collect data but to not print
+ if (!register_command(matcher, option, error_buf, sizeof(error_buf), (uintx)MemStatAction::collect)) {
+ print_parse_error(error_buf, original.get());
+ return false;
+ }
+ return true;
+ default:
+ jio_snprintf(error_buf, sizeof(error_buf), " Option '%s' is not followed by a value", option2name(option));
print_parse_error(error_buf, original.get());
return false;
- }
- return true;
- } else {
- jio_snprintf(error_buf, sizeof(error_buf), " Option '%s' is not followed by a value", option2name(option));
- print_parse_error(error_buf, original.get());
- return false;
}
}
if (!scan_value(type, line, bytes_read, matcher, option, error_buf, sizeof(error_buf))) {
@@ -1209,7 +1302,7 @@ bool CompilerOracle::parse_compile_only(char* line) {
if (method_pattern != nullptr) {
TypedMethodOptionMatcher* matcher = TypedMethodOptionMatcher::parse_method_pattern(method_pattern, error_buf, sizeof(error_buf));
if (matcher != nullptr) {
- if (register_command(matcher, CompileCommandEnum::CompileOnly, error_buf, sizeof(error_buf), true)) {
+ if (register_command(matcher, CompileCommandEnum::CompileOnly, error_buf, sizeof(error_buf), default_comp_level_argument)) {
continue;
}
}
diff --git a/src/hotspot/share/compiler/compilerOracle.hpp b/src/hotspot/share/compiler/compilerOracle.hpp
index 5615a2cf1fc..bfed52f12e7 100644
--- a/src/hotspot/share/compiler/compilerOracle.hpp
+++ b/src/hotspot/share/compiler/compilerOracle.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
#ifndef SHARE_COMPILER_COMPILERORACLE_HPP
#define SHARE_COMPILER_COMPILERORACLE_HPP
+#include "compiler/compilerDirectives.hpp"
#include "memory/allStatic.hpp"
#include "oops/oopsHierarchy.hpp"
#include "utilities/istream.hpp"
@@ -49,14 +50,14 @@ class methodHandle;
option(Help, "help", Unknown) \
option(Quiet, "quiet", Unknown) \
option(Log, "log", Bool) \
- option(Print, "print", Bool) \
+ option(Print, "print", Intx) \
option(Inline, "inline", Bool) \
option(DelayInline, "delayinline", Bool) \
option(DontInline, "dontinline", Bool) \
option(Blackhole, "blackhole", Bool) \
- option(CompileOnly, "compileonly", Bool)\
- option(Exclude, "exclude", Bool) \
- option(Break, "break", Bool) \
+ option(CompileOnly, "compileonly", Intx) \
+ option(Exclude, "exclude", Intx) \
+ option(Break, "break", Intx) \
option(BreakAtExecute, "BreakAtExecute", Bool) \
option(BreakAtCompile, "BreakAtCompile", Bool) \
option(MemLimit, "MemLimit", Intx) \
@@ -135,6 +136,9 @@ class CompilerOracle : AllStatic {
static bool parse_from_input(inputStream::Input* input,
parse_from_line_fn_t* parse_from_line);
+ static bool has_exclude(const methodHandle& method, CompLevel level);
+ static bool applies_to_comp_level(const methodHandle& method, CompileCommandEnum command, CompLevel current_level);
+
public:
// True if the command file has been specified or is implicit
static bool has_command_file();
@@ -143,14 +147,15 @@ class CompilerOracle : AllStatic {
static bool parse_from_file();
// Tells whether we to exclude compilation of method
- static bool should_exclude(const methodHandle& method);
+ static bool should_exclude(const methodHandle & method, CompLevel level);
+
static bool be_quiet() { return _quiet; }
// Tells whether we want to inline this method
static bool should_inline(const methodHandle& method);
// Tells whether we want to disallow inlining of this method
- static bool should_not_inline(const methodHandle& method);
+ static bool should_not_inline(const methodHandle& method, CompLevel level);
// Tells whether we want to delay inlining of this method
static bool should_delay_inline(const methodHandle& method);
@@ -159,13 +164,14 @@ class CompilerOracle : AllStatic {
static bool changes_current_thread(const methodHandle& method);
// Tells whether we should print the assembly for this method
- static bool should_print(const methodHandle& method);
+ // If level == CompLevel_none or CompLevel_any, returns true if there is a print command with any mask
+ static bool should_print(const methodHandle& method, CompLevel level);
// Tells whether we should log the compilation data for this method
static bool should_log(const methodHandle& method);
// Tells whether to break when compiling method
- static bool should_break_at(const methodHandle& method);
+ static bool should_break_at(const methodHandle& method, CompLevel level);
// Tells whether there are any methods to print for print_method_statistics()
static bool should_print_methods();
diff --git a/src/hotspot/share/compiler/compilerThread.cpp b/src/hotspot/share/compiler/compilerThread.cpp
index 7cf494aad56..0139f864e12 100644
--- a/src/hotspot/share/compiler/compilerThread.cpp
+++ b/src/hotspot/share/compiler/compilerThread.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,6 @@ CompilerThread::CompilerThread(CompileQueue* queue,
_queue = queue;
_counters = counters;
_buffer_blob = nullptr;
- _can_call_java = false;
_compiler = nullptr;
_arena_stat = nullptr;
_timeout = nullptr;
@@ -56,11 +55,6 @@ CompilerThread::~CompilerThread() {
}
void CompilerThread::set_compiler(AbstractCompiler* c) {
- /*
- * Compiler threads need to make Java upcalls to the jargraal compiler.
- * Java upcalls are also needed by the InterpreterRuntime when using jargraal.
- */
- _can_call_java = c != nullptr && c->is_jvmci() JVMCI_ONLY(&& !UseJVMCINativeLibrary);
_compiler = c;
}
@@ -68,8 +62,3 @@ void CompilerThread::thread_entry(JavaThread* thread, TRAPS) {
assert(thread->is_Compiler_thread(), "must be compiler thread");
CompileBroker::compiler_thread_loop();
}
-
-// Hide native compiler threads from external view.
-bool CompilerThread::is_hidden_from_external_view() const {
- return _compiler == nullptr || _compiler->is_hidden_from_external_view();
-}
diff --git a/src/hotspot/share/compiler/compilerThread.hpp b/src/hotspot/share/compiler/compilerThread.hpp
index e5b14560872..255b1dc0a37 100644
--- a/src/hotspot/share/compiler/compilerThread.hpp
+++ b/src/hotspot/share/compiler/compilerThread.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -59,7 +59,6 @@ class CompilerThreadTimeoutGeneric : public CHeapObj {
// A thread used for Compilation.
class CompilerThread : public JavaThread {
friend class VMStructs;
- JVMCI_ONLY(friend class CompilerThreadCanCallJava;)
#ifdef LINUX
typedef CompilerThreadTimeoutLinux Timeout;
@@ -75,7 +74,6 @@ class CompilerThread : public JavaThread {
CompileTask* volatile _task; // print_threads_compiling can read this concurrently.
CompileQueue* _queue;
BufferBlob* _buffer_blob;
- bool _can_call_java;
AbstractCompiler* _compiler;
TimeStamp _idle_time;
@@ -99,11 +97,9 @@ class CompilerThread : public JavaThread {
bool is_Compiler_thread() const { return true; }
- virtual bool can_call_java() const { return _can_call_java; }
-
- // Returns true if this CompilerThread is hidden from JVMTI and FlightRecorder. C1 and C2 are
- // always hidden but JVMCI compiler threads might be hidden.
- virtual bool is_hidden_from_external_view() const;
+ // Compiler threads are hidden by default.
+ virtual bool is_hidden_from_external_view() const { return true; }
+ virtual bool can_call_java() const { return false; }
void set_compiler(AbstractCompiler* c);
AbstractCompiler* compiler() const { return _compiler; }
diff --git a/src/hotspot/share/compiler/compiler_globals.hpp b/src/hotspot/share/compiler/compiler_globals.hpp
index 98590abc15f..e1f8d9f8922 100644
--- a/src/hotspot/share/compiler/compiler_globals.hpp
+++ b/src/hotspot/share/compiler/compiler_globals.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,9 +33,6 @@
#ifdef COMPILER2
#include "opto/c2_globals.hpp"
#endif // COMPILER2
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci_globals.hpp"
-#endif
// TODO -- currently, even if all JIT compilers are disabled, the following flags
// are still available in HotSpot. This should eventually be fixed ...
@@ -279,9 +276,7 @@
"Compilation modes: " \
"default: normal tiered compilation; " \
"quick-only: C1-only mode; " \
- "high-only: C2/JVMCI-only mode; " \
- "high-only-quick-internal: C2/JVMCI-only mode, " \
- "with JVMCI compiler compiled with C1.") \
+ "high-only: C2-only mode.") \
\
product(bool, PrintTieredEvents, false, \
"Print tiered events notifications") \
diff --git a/src/hotspot/share/compiler/compiler_globals_pd.hpp b/src/hotspot/share/compiler/compiler_globals_pd.hpp
index 8ac4b53d6cd..55a3855f7f0 100644
--- a/src/hotspot/share/compiler/compiler_globals_pd.hpp
+++ b/src/hotspot/share/compiler/compiler_globals_pd.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,12 +39,7 @@
#include "opto/c2_globals.hpp"
#endif // COMPILER2
-// JVMCI has no platform-specific global definitions
-//#if INCLUDE_JVMCI
-//#include "jvmci/jvmci_globals_pd.hpp"
-//#endif
-
-#if !defined(COMPILER1) && !defined(COMPILER2) && !INCLUDE_JVMCI
+#if !defined(COMPILER1) && !defined(COMPILER2)
define_pd_global(bool, BackgroundCompilation, false);
define_pd_global(bool, CICompileOSR, false);
define_pd_global(bool, UseTypeProfile, false);
@@ -72,11 +67,11 @@ define_pd_global(size_t, CodeCacheMinimumUseSpace, 200*K);
#define CI_COMPILER_COUNT 0
#else
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
#define CI_COMPILER_COUNT 2
#else
#define CI_COMPILER_COUNT 1
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
#endif // no compilers
diff --git a/src/hotspot/share/compiler/oopMap.cpp b/src/hotspot/share/compiler/oopMap.cpp
index c8d0c5d22ba..a447f59b2cd 100644
--- a/src/hotspot/share/compiler/oopMap.cpp
+++ b/src/hotspot/share/compiler/oopMap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -47,9 +47,6 @@
#ifdef COMPILER2
#include "opto/optoreg.hpp"
#endif
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci_globals.hpp"
-#endif
static_assert(sizeof(oop) == sizeof(intptr_t), "Derived pointer sanity check");
@@ -395,9 +392,9 @@ class AddDerivedOop : public DerivedOopClosure {
};
virtual void do_derived_oop(derived_base* base, derived_pointer* derived) {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::add(derived, base);
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
};
@@ -874,7 +871,7 @@ void ImmutableOopMapSet::operator delete(void* p) {
//------------------------------DerivedPointerTable---------------------------
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
class DerivedPointerTable::Entry : public CHeapObj {
derived_pointer* _location; // Location of derived pointer, also pointing to base
@@ -973,4 +970,4 @@ void DerivedPointerTable::update_pointers() {
_active = false;
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
diff --git a/src/hotspot/share/compiler/oopMap.hpp b/src/hotspot/share/compiler/oopMap.hpp
index 0d9dbc188e8..04abcc54fc6 100644
--- a/src/hotspot/share/compiler/oopMap.hpp
+++ b/src/hotspot/share/compiler/oopMap.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -479,7 +479,7 @@ private:
// oops, it is filled in with references to all locations that contains a
// derived oop (assumed to be very few). When the GC is complete, the derived
// pointers are updated based on their base pointers new value and an offset.
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
class DerivedPointerTable : public AllStatic {
private:
class Entry;
@@ -515,6 +515,6 @@ class DerivedPointerTableDeactivate: public StackObj {
}
}
};
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
#endif // SHARE_COMPILER_OOPMAP_HPP
diff --git a/src/hotspot/share/compiler/oopMap.inline.hpp b/src/hotspot/share/compiler/oopMap.inline.hpp
index 327c2ebaa3f..1542eb2571d 100644
--- a/src/hotspot/share/compiler/oopMap.inline.hpp
+++ b/src/hotspot/share/compiler/oopMap.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,10 +32,6 @@
#include "runtime/globals.hpp"
#include "utilities/ostream.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci_globals.hpp"
-#endif
-
inline const ImmutableOopMap* ImmutableOopMapSet::find_map_at_slot(int slot, int pc_offset) const {
assert(slot >= 0 && slot < _count, "bounds count: %d slot: %d", _count, slot);
ImmutableOopMapPair* pairs = get_pairs();
@@ -66,13 +62,6 @@ void OopMapDo::iterate_oops_do(const frame
if (omv.type() != OopMapValue::derived_oop_value)
continue;
- #ifndef COMPILER2
- #if INCLUDE_JVMCI
- if (!EnableJVMCI)
- #endif
- ShouldNotReachHere();
- #endif // !COMPILER2
-
address loc = fr->oopmapreg_to_location(omv.reg(), reg_map);
DEBUG_ONLY(if (loc == nullptr && reg_map->should_skip_missing()) continue;)
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
index 3bf26bf46c9..5398a95b957 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
@@ -503,7 +503,7 @@ int G1BarrierSetC2::get_store_barrier(C2Access& access) const {
return barriers;
}
-void G1BarrierSetC2::elide_dominated_barrier(MachNode* mach) const {
+void G1BarrierSetC2::elide_dominated_barrier(MachNode* mach, MachNode* dominator) const {
uint8_t barrier_data = mach->barrier_data();
barrier_data &= ~G1C2BarrierPre;
if (CardTableBarrierSetC2::use_ReduceInitialCardMarks()) {
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
index e8a0e797dfa..454e76fd94b 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -103,7 +103,7 @@ public:
ArrayCopyNode* ac) const;
virtual void* create_barrier_state(Arena* comp_arena) const;
virtual void emit_stubs(CodeBuffer& cb) const;
- virtual void elide_dominated_barrier(MachNode* mach) const;
+ virtual void elide_dominated_barrier(MachNode* mach, MachNode* dominator) const;
virtual void late_barrier_analysis() const;
#ifndef PRODUCT
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
index 0fb2e65f1b9..d0e549c9b11 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@@ -2489,9 +2489,9 @@ void G1CollectedHeap::gc_epilogue(bool full) {
increment_old_marking_cycles_completed(false /* concurrent */, true /* liveness_completed */);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
assert(DerivedPointerTable::is_empty(), "derived pointer present");
-#endif
+#endif // COMPILER2
// We have just completed a GC. Update the soft reference
// policy with the new heap occupancy
@@ -2650,6 +2650,7 @@ void G1CollectedHeap::verify_after_young_collection(G1HeapVerifier::G1VerifyType
if (collector_state()->is_in_concurrent_start_gc()) {
log_debug(gc, verify)("Marking state");
_verifier->verify_marking_state();
+ _verifier->verify_bitmap_clear(true /* above_tams_only */);
}
_verifier->verify_free_regions_card_tables_clean();
@@ -2733,12 +2734,12 @@ void G1CollectedHeap::do_collection_pause_at_safepoint(size_t allocation_word_si
// Perform the collection.
G1YoungCollector collector(gc_cause(), allocation_word_size);
collector.collect();
-
+ // Update collector state.
+ _collector_state = collector.next_state();
// It should now be safe to tell the concurrent mark thread to start
// without its logging output interfering with the logging output
// that came from the pause.
if (should_start_concurrent_mark_operation) {
- verifier()->verify_bitmap_clear(true /* above_tams_only */);
// CAUTION: after the start_concurrent_cycle() call below, the concurrent marking
// thread(s) could be running concurrently with us. Make sure that anything
// after this point does not assume that we are the only GC thread running.
diff --git a/src/hotspot/share/gc/g1/g1CollectorState.hpp b/src/hotspot/share/gc/g1/g1CollectorState.hpp
index 42aaeab03b2..762ddb1fc8f 100644
--- a/src/hotspot/share/gc/g1/g1CollectorState.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectorState.hpp
@@ -111,11 +111,6 @@ public:
// Pause kind queries
inline static void assert_is_young_pause(Pause type);
- inline static bool is_young_only_pause(Pause type);
- inline static bool is_concurrent_start_pause(Pause type);
- inline static bool is_prepare_mixed_pause(Pause type);
- inline static bool is_mixed_pause(Pause type);
-
inline static bool is_concurrent_cycle_pause(Pause type);
};
diff --git a/src/hotspot/share/gc/g1/g1CollectorState.inline.hpp b/src/hotspot/share/gc/g1/g1CollectorState.inline.hpp
index 0c6c9c879c3..b2d3dfcc489 100644
--- a/src/hotspot/share/gc/g1/g1CollectorState.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectorState.inline.hpp
@@ -98,29 +98,6 @@ inline void G1CollectorState::assert_is_young_pause(Pause type) {
assert(type != Pause::Cleanup, "must be");
}
-inline bool G1CollectorState::is_young_only_pause(Pause type) {
- assert_is_young_pause(type);
- return type == Pause::ConcurrentStartUndo ||
- type == Pause::ConcurrentStartFull ||
- type == Pause::PrepareMixed ||
- type == Pause::Normal;
-}
-
-inline bool G1CollectorState::is_mixed_pause(Pause type) {
- assert_is_young_pause(type);
- return type == Pause::Mixed;
-}
-
-inline bool G1CollectorState::is_prepare_mixed_pause(Pause type) {
- assert_is_young_pause(type);
- return type == Pause::PrepareMixed;
-}
-
-inline bool G1CollectorState::is_concurrent_start_pause(Pause type) {
- assert_is_young_pause(type);
- return type == Pause::ConcurrentStartFull || type == Pause::ConcurrentStartUndo;
-}
-
inline bool G1CollectorState::is_concurrent_cycle_pause(Pause type) {
return type == Pause::Cleanup || type == Pause::Remark;
}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.inline.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.inline.hpp
index 8cb7881e000..64441ccac65 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.inline.hpp
@@ -32,7 +32,7 @@
// Total virtual time so far.
inline double G1ConcurrentMarkThread::total_mark_cpu_time_s() {
- return static_cast(os::thread_cpu_time(this)) + worker_threads_cpu_time_s();
+ return static_cast(os::thread_cpu_time(this)) / NANOSECS_PER_SEC + worker_threads_cpu_time_s();
}
// Marking virtual time so far
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp
index 7da0066e2f1..4fa32b388bd 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.cpp
@@ -25,18 +25,15 @@
#include "gc/g1/g1ConcurrentRefineStats.inline.hpp"
#include "runtime/timer.hpp"
-void G1ConcurrentRefineStats::add_atomic(G1ConcurrentRefineStats* other) {
- _sweep_duration.add_then_fetch(other->_sweep_duration.load_relaxed(), memory_order_relaxed);
- _yield_during_sweep_duration.add_then_fetch(other->yield_during_sweep_duration(), memory_order_relaxed);
+void G1ConcurrentRefineStats::add_atomic(const G1LocalRefineStats* other) {
+ _cards_scanned.add_then_fetch(other->_cards_scanned, memory_order_relaxed);
+ _cards_clean.add_then_fetch(other->_cards_clean, memory_order_relaxed);
+ _cards_not_parsable.add_then_fetch(other->_cards_not_parsable, memory_order_relaxed);
+ _cards_already_refer_to_cset.add_then_fetch(other->_cards_already_refer_to_cset, memory_order_relaxed);
+ _cards_refer_to_cset.add_then_fetch(other->_cards_refer_to_cset, memory_order_relaxed);
+ _cards_no_cross_region.add_then_fetch(other->_cards_no_cross_region, memory_order_relaxed);
- _cards_scanned.add_then_fetch(other->cards_scanned(), memory_order_relaxed);
- _cards_clean.add_then_fetch(other->cards_clean(), memory_order_relaxed);
- _cards_not_parsable.add_then_fetch(other->cards_not_parsable(), memory_order_relaxed);
- _cards_already_refer_to_cset.add_then_fetch(other->cards_already_refer_to_cset(), memory_order_relaxed);
- _cards_refer_to_cset.add_then_fetch(other->cards_refer_to_cset(), memory_order_relaxed);
- _cards_no_cross_region.add_then_fetch(other->cards_no_cross_region(), memory_order_relaxed);
-
- _refine_duration.add_then_fetch(other->refine_duration(), memory_order_relaxed);
+ _refine_duration.add_then_fetch(other->_refine_duration, memory_order_relaxed);
}
void G1ConcurrentRefineStats::reset() {
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp
index a91ad0eb2e4..6f4af71081b 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.hpp
@@ -29,9 +29,27 @@
#include "runtime/atomic.hpp"
#include "utilities/globalDefinitions.hpp"
-// Collection of statistics for concurrent refinement processing.
-// Used for collecting per-thread statistics and for summaries over a
-// collection of threads.
+// Thread-local refinement statistics.
+struct G1LocalRefineStats {
+ size_t _cards_scanned;
+ size_t _cards_clean;
+ size_t _cards_not_parsable;
+ size_t _cards_already_refer_to_cset;
+ size_t _cards_refer_to_cset;
+ size_t _cards_no_cross_region;
+ jlong _refine_duration;
+
+ G1LocalRefineStats() :
+ _cards_scanned(0),
+ _cards_clean(0),
+ _cards_not_parsable(0),
+ _cards_already_refer_to_cset(0),
+ _cards_refer_to_cset(0),
+ _cards_no_cross_region(0),
+ _refine_duration(0) {}
+};
+
+// Global statistics for concurrent refinement processing.
class G1ConcurrentRefineStats : public CHeapObj {
Atomic _sweep_duration; // Time spent sweeping the table finding non-clean cards
// and refining them.
@@ -69,18 +87,10 @@ public:
inline size_t cards_to_cset() const;
- inline void inc_sweep_time(jlong t);
+ void add_atomic(const G1LocalRefineStats* other);
+
+ inline void inc_sweep_duration(jlong t);
inline void inc_yield_during_sweep_duration(jlong t);
- inline void inc_refine_duration(jlong t);
-
- inline void inc_cards_scanned(size_t increment);
- inline void inc_cards_clean(size_t increment);
- inline void inc_cards_not_parsable();
- inline void inc_cards_already_refer_to_cset();
- inline void inc_cards_refer_to_cset();
- inline void inc_cards_no_cross_region();
-
- void add_atomic(G1ConcurrentRefineStats* other);
void reset();
};
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.inline.hpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.inline.hpp
index e1a296c6494..2ef35caab08 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineStats.inline.hpp
@@ -79,40 +79,12 @@ inline size_t G1ConcurrentRefineStats::cards_to_cset() const {
return cards_already_refer_to_cset() + cards_refer_to_cset();
}
-inline void G1ConcurrentRefineStats::inc_sweep_time(jlong t) {
- _sweep_duration.store_relaxed(_sweep_duration.load_relaxed() + t);
+inline void G1ConcurrentRefineStats::inc_sweep_duration(jlong t) {
+ _sweep_duration.fetch_then_add(t, memory_order_relaxed);
}
inline void G1ConcurrentRefineStats::inc_yield_during_sweep_duration(jlong t) {
- _yield_during_sweep_duration.store_relaxed(yield_during_sweep_duration() + t);
-}
-
-inline void G1ConcurrentRefineStats::inc_refine_duration(jlong t) {
- _refine_duration.store_relaxed(refine_duration() + t);
-}
-
-inline void G1ConcurrentRefineStats::inc_cards_scanned(size_t increment) {
- _cards_scanned.store_relaxed(cards_scanned() + increment);
-}
-
-inline void G1ConcurrentRefineStats::inc_cards_clean(size_t increment) {
- _cards_clean.store_relaxed(cards_clean() + increment);
-}
-
-inline void G1ConcurrentRefineStats::inc_cards_not_parsable() {
- _cards_not_parsable.store_relaxed(cards_not_parsable() + 1);
-}
-
-inline void G1ConcurrentRefineStats::inc_cards_already_refer_to_cset() {
- _cards_already_refer_to_cset.store_relaxed(cards_already_refer_to_cset() + 1);
-}
-
-inline void G1ConcurrentRefineStats::inc_cards_refer_to_cset() {
- _cards_refer_to_cset.store_relaxed(cards_refer_to_cset() + 1);
-}
-
-inline void G1ConcurrentRefineStats::inc_cards_no_cross_region() {
- _cards_no_cross_region.store_relaxed(cards_no_cross_region() + 1);
+ _yield_during_sweep_duration.fetch_then_add(t, memory_order_relaxed);
}
#endif // SHARE_GC_G1_G1CONCURRENTREFINESTATS_INLINE_HPP
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp b/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
index e522163f980..2f99611bb99 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
@@ -60,22 +60,22 @@ class G1RefineRegionClosure : public G1HeapRegionClosure {
switch (res) {
case G1RemSet::HasRefToCSet: {
*dest_card = G1CardTable::g1_to_cset_card;
- _refine_stats.inc_cards_refer_to_cset();
+ _per_worker_refine_data._cards_refer_to_cset++;
break;
}
case G1RemSet::AlreadyToCSet: {
*dest_card = G1CardTable::g1_to_cset_card;
- _refine_stats.inc_cards_already_refer_to_cset();
+ _per_worker_refine_data._cards_already_refer_to_cset++;
break;
}
case G1RemSet::NoCrossRegion: {
- _refine_stats.inc_cards_no_cross_region();
+ _per_worker_refine_data._cards_no_cross_region++;
break;
}
case G1RemSet::CouldNotParse: {
// Could not refine - redirty with the original value.
*dest_card = *source_card;
- _refine_stats.inc_cards_not_parsable();
+ _per_worker_refine_data._cards_not_parsable++;
break;
}
case G1RemSet::HasRefToOld : break; // Nothing special to do.
@@ -92,7 +92,7 @@ class G1RefineRegionClosure : public G1HeapRegionClosure {
public:
bool _completed;
- G1ConcurrentRefineStats _refine_stats;
+ G1LocalRefineStats _per_worker_refine_data;
G1RefineRegionClosure(uint worker_id, G1CardTableClaimTable* scan_state) :
G1HeapRegionClosure(),
@@ -100,7 +100,7 @@ public:
_scan_state(scan_state),
_worker_id(worker_id),
_completed(true),
- _refine_stats() { }
+ _per_worker_refine_data() { }
bool do_heap_region(G1HeapRegion* r) override {
@@ -141,7 +141,7 @@ public:
do_claimed_block(dirty_l, dirty_r, dest_card + pointer_delta(dirty_l, start_card, sizeof(CardValue)));
num_dirty_cards += pointer_delta(dirty_r, dirty_l, sizeof(CardValue));
- _refine_stats.inc_refine_duration(os::elapsed_counter() - refine_start);
+ _per_worker_refine_data._refine_duration += os::elapsed_counter() - refine_start;
});
if (VerifyDuringGC) {
@@ -150,8 +150,8 @@ public:
}
}
- _refine_stats.inc_cards_scanned(claim.size());
- _refine_stats.inc_cards_clean(claim.size() - num_dirty_cards);
+ _per_worker_refine_data._cards_scanned += claim.size();
+ _per_worker_refine_data._cards_clean += claim.size() - num_dirty_cards;
if (SuspendibleThreadSet::should_yield()) {
_completed = false;
@@ -183,8 +183,8 @@ void G1ConcurrentRefineSweepTask::work(uint worker_id) {
_sweep_completed = false;
}
- sweep_cl._refine_stats.inc_sweep_time(os::elapsed_counter() - start);
- _stats->add_atomic(&sweep_cl._refine_stats);
+ _stats->inc_sweep_duration(os::elapsed_counter() - start);
+ _stats->add_atomic(&sweep_cl._per_worker_refine_data);
}
bool G1ConcurrentRefineSweepTask::sweep_completed() const { return _sweep_completed; }
diff --git a/src/hotspot/share/gc/g1/g1FullCollector.cpp b/src/hotspot/share/gc/g1/g1FullCollector.cpp
index 8b38509d1d8..cf153226920 100644
--- a/src/hotspot/share/gc/g1/g1FullCollector.cpp
+++ b/src/hotspot/share/gc/g1/g1FullCollector.cpp
@@ -48,21 +48,21 @@
#include "utilities/debug.hpp"
static void clear_and_activate_derived_pointers() {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::clear();
-#endif
+#endif // COMPILER2
}
static void deactivate_derived_pointers() {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::set_active(false);
-#endif
+#endif // COMPILER2
}
static void update_derived_pointers() {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::update_pointers();
-#endif
+#endif // COMPILER2
}
G1CMBitMap* G1FullCollector::mark_bitmap() {
@@ -542,9 +542,9 @@ void G1FullCollector::verify_after_marking() {
return;
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTableDeactivate dpt_deact;
-#endif
+#endif // COMPILER2
_heap->prepare_for_verify();
// Note: we can verify only the heap here. When an object is
// marked, the previous value of the mark word (including
diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
index e13b9d91bc5..a04b50ec1e7 100644
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -99,9 +99,9 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[RemoveSelfForwards] = new WorkerDataArray("RemoveSelfForwards", "Remove Self Forwards (ms):", max_gc_threads);
_gc_par_phases[ClearCardTable] = new WorkerDataArray("ClearPendingCards", "Clear Pending Cards (ms):", max_gc_threads);
_gc_par_phases[RecalculateUsed] = new WorkerDataArray("RecalculateUsed", "Recalculate Used Memory (ms):", max_gc_threads);
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
_gc_par_phases[UpdateDerivedPointers] = new WorkerDataArray("UpdateDerivedPointers", "Update Derived Pointers (ms):", max_gc_threads);
-#endif
+#endif // COMPILER2
_gc_par_phases[EagerlyReclaimHumongousObjects] = new WorkerDataArray("EagerlyReclaimHumongousObjects", "Eagerly Reclaim Humongous Objects (ms):", max_gc_threads);
_gc_par_phases[ResetPartialArrayStateManager] = new WorkerDataArray("ResetPartialArrayStateManager", "Reset Partial Array State Manager (ms):", max_gc_threads);
_gc_par_phases[ProcessEvacuationFailedRegions] = new WorkerDataArray("ProcessEvacuationFailedRegions", "Process Evacuation Failed Regions (ms):", max_gc_threads);
@@ -508,9 +508,9 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
debug_phase(_gc_par_phases[RecalculateUsed], 1);
debug_phase(_gc_par_phases[ProcessEvacuationFailedRegions], 1);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
debug_phase(_gc_par_phases[UpdateDerivedPointers], 1);
-#endif
+#endif // COMPILER2
debug_phase(_gc_par_phases[EagerlyReclaimHumongousObjects], 1);
trace_phase(_gc_par_phases[ResetPartialArrayStateManager]);
diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
index eb51b340da3..31bfd38ddb9 100644
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -81,9 +81,9 @@ class G1GCPhaseTimes : public CHeapObj {
RemoveSelfForwards,
ClearCardTable,
RecalculateUsed,
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
UpdateDerivedPointers,
-#endif
+#endif // COMPILER2
EagerlyReclaimHumongousObjects,
ResetPartialArrayStateManager,
ProcessEvacuationFailedRegions,
diff --git a/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp b/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp
index e3eabff5a50..80e26a3411e 100644
--- a/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp
+++ b/src/hotspot/share/gc/g1/g1ParallelCleaning.cpp
@@ -24,45 +24,16 @@
#include "gc/g1/g1ParallelCleaning.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
-
-#if INCLUDE_JVMCI
-JVMCICleaningTask::JVMCICleaningTask() :
- _cleaning_claimed(false) {
-}
-
-bool JVMCICleaningTask::claim_cleaning_task() {
- if (_cleaning_claimed.load_relaxed()) {
- return false;
- }
-
- return _cleaning_claimed.compare_set(false, true);
-}
-
-void JVMCICleaningTask::work(bool unloading_occurred) {
- // One worker will clean JVMCI metadata handles.
- if (unloading_occurred && EnableJVMCI && claim_cleaning_task()) {
- JVMCI::do_unloading(unloading_occurred);
- }
-}
-#endif // INCLUDE_JVMCI
G1ParallelCleaningTask::G1ParallelCleaningTask(bool unloading_occurred) :
WorkerTask("G1 Parallel Cleaning"),
_unloading_occurred(unloading_occurred),
_code_cache_task(unloading_occurred),
- JVMCI_ONLY(_jvmci_cleaning_task() COMMA)
_klass_cleaning_task() {
}
// The parallel work done by all worker threads.
void G1ParallelCleaningTask::work(uint worker_id) {
- // Clean JVMCI metadata handles.
- // Execute this task first because it is serial task.
- JVMCI_ONLY(_jvmci_cleaning_task.work(_unloading_occurred);)
-
// Do first pass of code cache cleaning.
_code_cache_task.work(worker_id);
diff --git a/src/hotspot/share/gc/g1/g1ParallelCleaning.hpp b/src/hotspot/share/gc/g1/g1ParallelCleaning.hpp
index 815b0883e16..a0d60306147 100644
--- a/src/hotspot/share/gc/g1/g1ParallelCleaning.hpp
+++ b/src/hotspot/share/gc/g1/g1ParallelCleaning.hpp
@@ -26,23 +26,6 @@
#define SHARE_GC_G1_G1PARALLELCLEANING_HPP
#include "gc/shared/parallelCleaning.hpp"
-#if INCLUDE_JVMCI
-#include "runtime/atomic.hpp"
-#endif
-
-#if INCLUDE_JVMCI
-class JVMCICleaningTask : public StackObj {
- Atomic _cleaning_claimed;
-
-public:
- JVMCICleaningTask();
- // Clean JVMCI metadata handles.
- void work(bool unloading_occurred);
-
-private:
- bool claim_cleaning_task();
-};
-#endif
// Do cleanup of some weakly held data in the same parallel task.
// Assumes a non-moving context.
@@ -50,9 +33,6 @@ class G1ParallelCleaningTask : public WorkerTask {
private:
bool _unloading_occurred;
CodeCacheUnloadingTask _code_cache_task;
-#if INCLUDE_JVMCI
- JVMCICleaningTask _jvmci_cleaning_task;
-#endif
KlassCleaningTask _klass_cleaning_task;
public:
diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp
index 01afb6a5c77..71a482e2505 100644
--- a/src/hotspot/share/gc/g1/g1Policy.cpp
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp
@@ -160,7 +160,7 @@ void G1Policy::record_new_heap_size(uint new_number_of_regions) {
double reserve_regions_d = (double) new_number_of_regions * _reserve_factor;
// We use ceiling so that if reserve_regions_d is > 0.0 (but
// smaller than 1.0) we'll get 1.
- _reserve_regions = (uint) ceil(reserve_regions_d);
+ _reserve_regions.store_relaxed((uint) ceil(reserve_regions_d));
_young_gen_sizer.heap_size_changed(new_number_of_regions);
@@ -186,8 +186,22 @@ void G1Policy::update_young_length_bounds() {
void G1Policy::update_young_length_bounds(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length) {
uint old_young_list_target_length = young_list_target_length();
- uint new_young_list_desired_length = calculate_young_desired_length(pending_cards, card_rs_length, code_root_rs_length);
- uint new_young_list_target_length = calculate_young_target_length(new_young_list_desired_length);
+ uint min_young_length_by_sizer = _young_gen_sizer.min_desired_young_length();
+ uint max_young_length_by_sizer = _young_gen_sizer.max_desired_young_length();
+
+ if (max_young_length_by_sizer < min_young_length_by_sizer) {
+ // This can happen due to races with heap_size_changed() at mutator time. Do not update the young gen
+ // lengths. Will be updated on the next regular call anyway.
+ assert(!SafepointSynchronize::is_at_safepoint(), "must be");
+ return;
+ }
+
+ uint new_young_list_desired_length = calculate_young_desired_length(pending_cards,
+ card_rs_length,
+ code_root_rs_length,
+ min_young_length_by_sizer,
+ max_young_length_by_sizer);
+ uint new_young_list_target_length = calculate_young_target_length(new_young_list_desired_length, min_young_length_by_sizer);
log_trace(gc, ergo, heap)("Young list length update: pending cards %zu card_rs_length %zu old target %u desired: %u target: %u",
pending_cards,
@@ -224,9 +238,9 @@ void G1Policy::update_young_length_bounds(size_t pending_cards, size_t card_rs_l
//
uint G1Policy::calculate_young_desired_length(size_t pending_cards,
size_t card_rs_length,
- size_t code_root_rs_length) const {
- uint min_young_length_by_sizer = _young_gen_sizer.min_desired_young_length();
- uint max_young_length_by_sizer = _young_gen_sizer.max_desired_young_length();
+ size_t code_root_rs_length,
+ uint min_young_length_by_sizer,
+ uint max_young_length_by_sizer) const {
assert(min_young_length_by_sizer >= 1, "invariant");
assert(max_young_length_by_sizer >= min_young_length_by_sizer, "invariant");
@@ -302,7 +316,7 @@ uint G1Policy::calculate_young_desired_length(size_t pending_cards,
// Limit the desired (wished) young length by current free regions. If the request
// can be satisfied without using up reserve regions, do so, otherwise eat into
// the reserve, giving away at most what the heap sizer allows.
-uint G1Policy::calculate_young_target_length(uint desired_young_length) const {
+uint G1Policy::calculate_young_target_length(uint desired_young_length, uint min_young_length_by_sizer) const {
uint allocated_young_length = _g1h->young_regions_count();
uint receiving_additional_eden;
@@ -319,8 +333,14 @@ uint G1Policy::calculate_young_target_length(uint desired_young_length) const {
// do, we at most eat the sizer's minimum regions into the reserve or half the
// reserve rounded up (if possible; this is an arbitrary value).
- uint max_to_eat_into_reserve = MIN2(_young_gen_sizer.min_desired_young_length(),
- (_reserve_regions + 1) / 2);
+ // The heap reserve needs to be snapshotted for consistent use in the following.
+ // It can be concurrently modified by the mutator as it expands the heap. It can
+ // only increase at that time, so this is a conservative snapshot. So at worst this
+ // method will return a too small young gen length in that case.
+ uint reserve_regions = _reserve_regions.load_relaxed();
+
+ uint max_to_eat_into_reserve = MIN2(min_young_length_by_sizer,
+ (reserve_regions + 1) / 2);
log_trace(gc, ergo, heap)("Young target length: Common "
"free regions at end of collection %u "
@@ -329,14 +349,14 @@ uint G1Policy::calculate_young_target_length(uint desired_young_length) const {
"max to eat into reserve %u",
_free_regions_at_end_of_collection,
desired_young_length,
- _reserve_regions,
+ reserve_regions,
max_to_eat_into_reserve);
uint survivor_regions_count = _g1h->survivor_regions_count();
uint desired_eden_length = desired_young_length - survivor_regions_count;
uint allocated_eden_length = allocated_young_length - survivor_regions_count;
- if (_free_regions_at_end_of_collection <= _reserve_regions) {
+ if (_free_regions_at_end_of_collection <= reserve_regions) {
// Fully eat (or already eating) into the reserve, hand back at most absolute_min_length regions.
uint receiving_eden = MIN3(_free_regions_at_end_of_collection,
desired_eden_length,
@@ -351,9 +371,9 @@ uint G1Policy::calculate_young_target_length(uint desired_young_length) const {
log_trace(gc, ergo, heap)("Young target length: Fully eat into reserve "
"receiving eden %u receiving additional eden %u",
receiving_eden, receiving_additional_eden);
- } else if (_free_regions_at_end_of_collection < (desired_eden_length + _reserve_regions)) {
+ } else if (_free_regions_at_end_of_collection < (desired_eden_length + reserve_regions)) {
// Partially eat into the reserve, at most max_to_eat_into_reserve regions.
- uint free_outside_reserve = _free_regions_at_end_of_collection - _reserve_regions;
+ uint free_outside_reserve = _free_regions_at_end_of_collection - reserve_regions;
assert(free_outside_reserve < desired_eden_length,
"must be %u %u",
free_outside_reserve, desired_eden_length);
@@ -732,13 +752,16 @@ bool G1Policy::need_to_start_conc_mark(const char* source, size_t allocation_wor
if (about_to_start_mixed_phase()) {
return false;
}
+ return need_to_start_conc_mark(source, *collector_state(), allocation_word_size);
+}
+bool G1Policy::need_to_start_conc_mark(const char* source, const G1CollectorState& state, size_t allocation_word_size) const {
size_t marking_initiating_old_gen_threshold = _ihop_control->old_gen_threshold_for_conc_mark_start();
size_t non_young_occupancy = _g1h->non_young_occupancy_after_allocation(allocation_word_size);
bool result = false;
if (non_young_occupancy > marking_initiating_old_gen_threshold) {
- result = collector_state()->is_in_young_only_phase();
+ result = state.is_in_young_only_phase();
log_debug(gc, ergo, ihop)("%s non-young occupancy: %zuB allocation request: %zuB threshold: %zuB (%1.2f) source: %s",
result ? "Request concurrent cycle initiation (occupancy higher than threshold)" : "Do not request concurrent cycle initiation (still doing mixed collections)",
non_young_occupancy, allocation_word_size * HeapWordSize, marking_initiating_old_gen_threshold, (double) marking_initiating_old_gen_threshold / _g1h->capacity() * 100, source);
@@ -777,23 +800,24 @@ double G1Policy::pending_cards_processing_time() const {
// Anything below that is considered to be zero
#define MIN_TIMER_GRANULARITY 0.0000001
-void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mark,
- bool allocation_failure,
- size_t allocation_word_size) {
+G1CollectorState G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mark,
+ bool allocation_failure,
+ size_t allocation_word_size) {
G1GCPhaseTimes* p = phase_times();
double start_time_sec = cur_pause_start_sec();
double end_time_sec = Ticks::now().seconds();
double pause_time_ms = (end_time_sec - start_time_sec) * 1000.0;
- Pause this_pause = collector_state()->gc_pause_type(concurrent_operation_is_full_mark);
- bool is_young_only_pause = G1CollectorState::is_young_only_pause(this_pause);
+ G1CollectorState next_state = *collector_state();
- if (G1CollectorState::is_concurrent_start_pause(this_pause)) {
+ bool is_young_only_pause = collector_state()->is_in_young_only_phase();
+
+ if (collector_state()->is_in_concurrent_start_gc()) {
assert(!collector_state()->initiate_conc_mark_if_possible(), "we should have cleared it by now");
- collector_state()->set_in_normal_young_gc();
- } else {
- maybe_start_marking(allocation_word_size);
+ next_state.set_in_normal_young_gc();
+ } else if (!about_to_start_mixed_phase() && need_to_start_conc_mark("end of GC", next_state, allocation_word_size)) {
+ next_state.set_initiate_conc_mark_if_possible(true);
}
double app_time_ms = (start_time_sec * 1000.0 - _analytics->prev_collection_pause_end_ms());
@@ -930,26 +954,28 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSToYoungGenCards));
}
- record_pause(this_pause, start_time_sec, end_time_sec);
+ record_pause(collector_state()->gc_pause_type(concurrent_operation_is_full_mark), start_time_sec, end_time_sec);
- if (G1CollectorState::is_prepare_mixed_pause(this_pause)) {
- assert(!G1CollectorState::is_concurrent_start_pause(this_pause),
+ if (collector_state()->is_in_prepare_mixed_gc()) {
+ assert(!collector_state()->is_in_concurrent_start_gc(),
"The young GC before mixed is not allowed to be concurrent start GC");
// This has been the young GC before we start doing mixed GCs. We already
// decided to start mixed GCs much earlier, so there is nothing to do except
// advancing the state.
- collector_state()->set_in_space_reclamation_phase();
- } else if (G1CollectorState::is_mixed_pause(this_pause)) {
+ next_state.set_in_space_reclamation_phase();
+ } else if (collector_state()->is_in_mixed_phase()) {
// This is a mixed GC. Here we decide whether to continue doing more
// mixed GCs or not.
if (!next_gc_should_be_mixed()) {
log_debug(gc, ergo)("do not continue mixed GCs (candidate old regions not available)");
- collector_state()->set_in_normal_young_gc();
+ next_state.set_in_normal_young_gc();
assert(!candidates()->has_more_marking_candidates(),
"only end mixed if all candidates from marking were processed");
- maybe_start_marking(allocation_word_size);
+ if (need_to_start_conc_mark("end of GC", next_state, allocation_word_size)) {
+ next_state.set_initiate_conc_mark_if_possible(true);
+ }
}
} else {
assert(is_young_only_pause, "must be");
@@ -957,7 +983,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
_eden_surv_rate_group->start_adding_regions();
- assert(!(G1CollectorState::is_concurrent_start_pause(this_pause) && collector_state()->is_in_concurrent_cycle()),
+ assert(!(collector_state()->is_in_concurrent_start_gc() && collector_state()->is_in_concurrent_cycle()),
"If the last pause has been concurrent start, we should not have been in the marking cycle");
_free_regions_at_end_of_collection = _g1h->num_free_regions();
@@ -1002,6 +1028,8 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
cr->adjust_after_gc(pending_cards_time_ms,
pending_cards,
pending_cards_time_goal_ms);
+
+ return next_state;
}
G1IHOPControl* G1Policy::create_ihop_control(const G1OldGenAllocationTracker* old_gen_alloc_tracker,
@@ -1346,15 +1374,6 @@ void G1Policy::abandon_collection_set_candidates() {
_collection_set->abandon_all_candidates();
}
-void G1Policy::maybe_start_marking(size_t allocation_word_size) {
- if (need_to_start_conc_mark("end of GC", allocation_word_size)) {
- // Note: this might have already been set, if during the last
- // pause we decided to start a cycle but at the beginning of
- // this pause we decided to postpone it. That's OK.
- collector_state()->set_initiate_conc_mark_if_possible(true);
- }
-}
-
void G1Policy::update_gc_pause_time_ratios(Pause gc_type, double start_time_sec, double end_time_sec) {
double pause_time_sec = end_time_sec - start_time_sec;
diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp
index 0aa15be9cae..0a472dd0527 100644
--- a/src/hotspot/share/gc/g1/g1Policy.hpp
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp
@@ -91,9 +91,11 @@ class G1Policy: public CHeapObj {
G1SurvRateGroup* _survivor_surv_rate_group;
double _reserve_factor;
- // This will be set when the heap is expanded
- // for the first time during initialization.
- uint _reserve_regions;
+ // The allocation reserve in number of regions that we try to keep free.
+ // G1 allocation of new regions for eden is restrained when allocating into that reserve.
+ // This intentionally slows down the allocation when the heap is close to full to allow
+ // concurrent marking to finish and hopefully avoid a Full GC.
+ Atomic _reserve_regions;
G1YoungGenSizer _young_gen_sizer;
@@ -224,9 +226,13 @@ private:
// Calculate desired young length based on current situation without taking actually
// available free regions into account.
- uint calculate_young_desired_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length) const;
+ uint calculate_young_desired_length(size_t pending_cards,
+ size_t card_rs_length,
+ size_t code_root_rs_length,
+ uint min_young_length_by_sizer,
+ uint max_young_length_by_sizer) const;
// Limit the given desired young length to available free regions.
- uint calculate_young_target_length(uint desired_young_length) const;
+ uint calculate_young_target_length(uint desired_young_length, uint min_young_length_by_sizer) const;
double predict_survivor_regions_evac_time() const;
double predict_retained_regions_evac_time() const;
@@ -258,8 +264,6 @@ public:
private:
void abandon_collection_set_candidates();
- // Sets up marking if proper conditions are met.
- void maybe_start_marking(size_t allocation_word_size);
// Manage time-to-mixed tracking.
void update_time_to_mixed_tracking(Pause gc_type, double start, double end);
// Record the given STW pause with the given start and end times (in s).
@@ -297,6 +301,7 @@ public:
void record_young_gc_pause_end(bool evacuation_failed);
bool need_to_start_conc_mark(const char* source, size_t allocation_word_size) const;
+ bool need_to_start_conc_mark(const char* source, const G1CollectorState& state, size_t allocation_word_size) const;
bool concurrent_operation_is_full_mark(const char* msg, size_t allocation_word_size);
@@ -305,9 +310,10 @@ public:
// Record the start and end of the actual collection part of the evacuation pause.
void record_pause_start_time();
void record_young_collection_start();
- void record_young_collection_end(bool concurrent_operation_is_full_mark,
- bool allocation_failure,
- size_t allocation_word_size);
+ // Returns the next CollectorState based on current state without modifying the latter.
+ G1CollectorState record_young_collection_end(bool concurrent_operation_is_full_mark,
+ bool allocation_failure,
+ size_t allocation_word_size);
// Record the start and end of a full collection.
void record_full_collection_start();
diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.cpp b/src/hotspot/share/gc/g1/g1YoungCollector.cpp
index d26bcc23c08..810b54ec587 100644
--- a/src/hotspot/share/gc/g1/g1YoungCollector.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungCollector.cpp
@@ -70,7 +70,6 @@
class G1YoungGCTraceTime {
G1YoungCollector* _collector;
- G1CollectorState::Pause _pause_type;
GCCause::Cause _pause_cause;
static const uint MaxYoungGCNameLength = 128;
@@ -90,10 +89,11 @@ class G1YoungGCTraceTime {
_collector->evacuation_alloc_failed() && _collector->evacuation_pinned() ? " / " : "",
_collector->evacuation_pinned() ? "Pinned" : "");
}
+ G1CollectorState::Pause pause = _collector->collector_state()->gc_pause_type(_collector->concurrent_operation_is_full_mark());
os::snprintf_checked(_young_gc_name_data,
MaxYoungGCNameLength,
"Pause Young (%s) (%s)%s",
- G1CollectorState::to_string(_pause_type),
+ G1CollectorState::to_string(pause),
GCCause::to_string(_pause_cause),
evacuation_failed_string);
return _young_gc_name_data;
@@ -102,10 +102,6 @@ class G1YoungGCTraceTime {
public:
G1YoungGCTraceTime(G1YoungCollector* collector, GCCause::Cause cause) :
_collector(collector),
- // Take snapshot of current pause type at start as it may be modified during gc.
- // The strings for all Concurrent Start pauses are the same, so the parameter
- // does not matter here.
- _pause_type(_collector->collector_state()->gc_pause_type(false /* concurrent_operation_is_full_mark */)),
_pause_cause(cause),
// Fake a "no cause" and manually add the correct string in update_young_gc_name()
// to make the string look more natural.
@@ -131,24 +127,23 @@ public:
};
class G1YoungGCJFRTracerMark : public G1JFRTracerMark {
+ G1YoungCollector* _young_collector;
G1EvacInfo _evacuation_info;
G1NewTracer* tracer() const { return (G1NewTracer*)_tracer; }
public:
-
G1EvacInfo* evacuation_info() { return &_evacuation_info; }
- G1YoungGCJFRTracerMark(STWGCTimer* gc_timer_stw, G1NewTracer* gc_tracer_stw, GCCause::Cause cause) :
- G1JFRTracerMark(gc_timer_stw, gc_tracer_stw), _evacuation_info() { }
-
- void report_pause_type(G1CollectorState::Pause type) {
- tracer()->report_young_gc_pause(type);
- }
+ G1YoungGCJFRTracerMark(G1YoungCollector* young_collector) :
+ G1JFRTracerMark(young_collector->gc_timer_stw(), young_collector->gc_tracer_stw()),
+ _young_collector(young_collector),
+ _evacuation_info() { }
~G1YoungGCJFRTracerMark() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ tracer()->report_young_gc_pause(g1h->collector_state()->gc_pause_type(_young_collector->concurrent_operation_is_full_mark()));
tracer()->report_evacuation_info(&_evacuation_info);
tracer()->report_tenuring_threshold(g1h->policy()->tenuring_threshold());
}
@@ -563,9 +558,9 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info)
assert(_g1h->verifier()->check_region_attr_table(), "Inconsistency in the region attributes table.");
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::clear();
-#endif
+#endif // COMPILER2
allocation_failure_injector()->arm_if_needed();
}
@@ -1107,6 +1102,7 @@ G1YoungCollector::G1YoungCollector(GCCause::Cause gc_cause,
_g1h(G1CollectedHeap::heap()),
_gc_cause(gc_cause),
_allocation_word_size(allocation_word_size),
+ _next_state(),
_concurrent_operation_is_full_mark(false),
_evac_failure_regions()
{
@@ -1122,7 +1118,7 @@ void G1YoungCollector::collect() {
G1YoungGCTraceTime tm(this, _gc_cause);
// JFR
- G1YoungGCJFRTracerMark jtm(gc_timer_stw(), gc_tracer_stw(), _gc_cause);
+ G1YoungGCJFRTracerMark jtm(this);
// JStat/MXBeans
G1YoungGCMonitoringScope ms(monitoring_support(),
!collection_set()->candidates()->is_empty() /* all_memory_pools_affected */);
@@ -1168,10 +1164,6 @@ void G1YoungCollector::collect() {
// evacuation, eventually aborting it.
_concurrent_operation_is_full_mark = policy()->concurrent_operation_is_full_mark("Revise IHOP", _allocation_word_size);
- // Need to report the collection pause now since record_collection_pause_end()
- // modifies it to the next state.
- jtm.report_pause_type(collector_state()->gc_pause_type(_concurrent_operation_is_full_mark));
-
- policy()->record_young_collection_end(_concurrent_operation_is_full_mark, evacuation_alloc_failed(), _allocation_word_size);
+ _next_state = policy()->record_young_collection_end(_concurrent_operation_is_full_mark, evacuation_alloc_failed(), _allocation_word_size);
}
}
diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.hpp b/src/hotspot/share/gc/g1/g1YoungCollector.hpp
index ab32ca770a4..7415bc83827 100644
--- a/src/hotspot/share/gc/g1/g1YoungCollector.hpp
+++ b/src/hotspot/share/gc/g1/g1YoungCollector.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
#ifndef SHARE_GC_G1_G1YOUNGCOLLECTOR_HPP
#define SHARE_GC_G1_G1YOUNGCOLLECTOR_HPP
+#include "gc/g1/g1CollectorState.hpp"
#include "gc/g1/g1EvacFailureRegions.hpp"
#include "gc/g1/g1YoungGCAllocationFailureInjector.hpp"
#include "gc/shared/gcCause.hpp"
@@ -54,6 +55,7 @@ class WorkerThreads;
class outputStream;
class G1YoungCollector {
+ friend class G1YoungGCJFRTracerMark;
friend class G1YoungGCNotifyPauseMark;
friend class G1YoungGCTraceTime;
friend class G1YoungGCVerifierMark;
@@ -80,6 +82,7 @@ class G1YoungCollector {
GCCause::Cause _gc_cause;
size_t _allocation_word_size;
+ G1CollectorState _next_state;
bool _concurrent_operation_is_full_mark;
// Evacuation failure tracking.
@@ -141,6 +144,7 @@ public:
size_t allocation_word_size);
void collect();
+ G1CollectorState next_state() const { return _next_state; }
bool concurrent_operation_is_full_mark() const { return _concurrent_operation_is_full_mark; }
};
diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
index 11da3cb8263..d0c843aa5d6 100644
--- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
@@ -444,7 +444,7 @@ public:
}
};
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
class G1PostEvacuateCollectionSetCleanupTask2::UpdateDerivedPointersTask : public G1AbstractSubTask {
public:
UpdateDerivedPointersTask() : G1AbstractSubTask(G1GCPhaseTimes::UpdateDerivedPointers) { }
@@ -452,7 +452,7 @@ public:
double worker_cost() const override { return 1.0; }
void do_work(uint worker_id) override { DerivedPointerTable::update_pointers(); }
};
-#endif
+#endif // COMPILER2
class G1PostEvacuateCollectionSetCleanupTask2::EagerlyReclaimHumongousObjectsTask : public G1AbstractSubTask {
uint _humongous_regions_reclaimed;
@@ -888,9 +888,9 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
G1EvacFailureRegions* evac_failure_regions) :
G1BatchedTask("Post Evacuate Cleanup 2", G1CollectedHeap::heap()->phase_times())
{
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
add_serial_task(new UpdateDerivedPointersTask());
-#endif
+#endif // COMPILER2
if (G1CollectedHeap::heap()->has_humongous_reclaim_candidates()) {
add_serial_task(new EagerlyReclaimHumongousObjectsTask());
}
diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
index bc3a08e2080..557ce454c78 100644
--- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
+++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,9 +60,9 @@ public:
// - Reset the reusable PartialArrayStateManager.
class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
class EagerlyReclaimHumongousObjectsTask;
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
class UpdateDerivedPointersTask;
-#endif
+#endif // COMPILER2
class ProcessEvacuationFailedRegionsTask;
class FreeCollectionSetTask;
diff --git a/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp b/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp
index ffa573c68cc..60c79ec28df 100644
--- a/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,7 +30,7 @@
#include "runtime/globals_extension.hpp"
G1YoungGenSizer::G1YoungGenSizer() : _sizer_kind(SizerDefaults),
- _use_adaptive_sizing(true), _min_desired_young_length(0), _max_desired_young_length(0) {
+ _use_adaptive_sizing(true), _min_desired_young_length(), _max_desired_young_length(0) {
precond(!FLAG_IS_ERGO(NewRatio));
precond(!FLAG_IS_ERGO(NewSize));
@@ -100,16 +100,16 @@ G1YoungGenSizer::G1YoungGenSizer() : _sizer_kind(SizerDefaults),
}
if (user_specified_NewSize) {
- _min_desired_young_length = MAX2((uint)(NewSize / G1HeapRegion::GrainBytes), 1U);
+ _min_desired_young_length.store_relaxed(MAX2((uint)(NewSize / G1HeapRegion::GrainBytes), 1U));
}
if (user_specified_MaxNewSize) {
- _max_desired_young_length = MAX2((uint)(MaxNewSize / G1HeapRegion::GrainBytes), 1U);
+ _max_desired_young_length.store_relaxed(MAX2((uint)(MaxNewSize / G1HeapRegion::GrainBytes), 1U));
}
if (user_specified_NewSize && user_specified_MaxNewSize) {
_sizer_kind = SizerMaxAndNewSize;
- _use_adaptive_sizing = _min_desired_young_length != _max_desired_young_length;
+ _use_adaptive_sizing = min_desired_young_length() != max_desired_young_length();
} else if (user_specified_NewSize) {
_sizer_kind = SizerNewSizeOnly;
} else {
@@ -159,20 +159,22 @@ void G1YoungGenSizer::recalculate_min_max_young_length(uint number_of_heap_regio
}
void G1YoungGenSizer::adjust_max_new_size(uint number_of_heap_regions) {
-
// We need to pass the desired values because recalculation may not update these
// values in some cases.
- uint temp = _min_desired_young_length;
- uint result = _max_desired_young_length;
- recalculate_min_max_young_length(number_of_heap_regions, &temp, &result);
+ uint unused_new_min = min_desired_young_length();
+ uint new_max = max_desired_young_length();
+ recalculate_min_max_young_length(number_of_heap_regions, &unused_new_min, &new_max);
- size_t max_young_size = result * G1HeapRegion::GrainBytes;
+ size_t max_young_size = new_max * G1HeapRegion::GrainBytes;
if (max_young_size != MaxNewSize) {
FLAG_SET_ERGO(MaxNewSize, max_young_size);
}
}
void G1YoungGenSizer::heap_size_changed(uint new_number_of_heap_regions) {
- recalculate_min_max_young_length(new_number_of_heap_regions, &_min_desired_young_length,
- &_max_desired_young_length);
+ uint min = min_desired_young_length();
+ uint max = max_desired_young_length();
+ recalculate_min_max_young_length(new_number_of_heap_regions, &min, &max);
+ _min_desired_young_length.store_relaxed(min);
+ _max_desired_young_length.store_relaxed(max);
}
diff --git a/src/hotspot/share/gc/g1/g1YoungGenSizer.hpp b/src/hotspot/share/gc/g1/g1YoungGenSizer.hpp
index 138989d4d94..c60c3c373a9 100644
--- a/src/hotspot/share/gc/g1/g1YoungGenSizer.hpp
+++ b/src/hotspot/share/gc/g1/g1YoungGenSizer.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
#ifndef SHARE_GC_G1_G1YOUNGGENSIZER_HPP
#define SHARE_GC_G1_G1YOUNGGENSIZER_HPP
+#include "runtime/atomic.hpp"
#include "utilities/globalDefinitions.hpp"
// There are three command line options related to the young gen size:
@@ -78,8 +79,8 @@ private:
// true otherwise.
bool _use_adaptive_sizing;
- uint _min_desired_young_length;
- uint _max_desired_young_length;
+ Atomic _min_desired_young_length;
+ Atomic _max_desired_young_length;
uint calculate_default_min_length(uint new_number_of_heap_regions);
uint calculate_default_max_length(uint new_number_of_heap_regions);
@@ -96,10 +97,10 @@ public:
virtual void heap_size_changed(uint new_number_of_heap_regions);
uint min_desired_young_length() const {
- return _min_desired_young_length;
+ return _min_desired_young_length.load_relaxed();
}
uint max_desired_young_length() const {
- return _max_desired_young_length;
+ return _max_desired_young_length.load_relaxed();
}
bool use_adaptive_young_list_length() const {
diff --git a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
index ff7a0aee088..f59733d2019 100644
--- a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
+++ b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
@@ -41,7 +41,8 @@ PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t space_alignment,
AdaptiveSizePolicy(gc_pause_goal_sec),
_avg_promoted(new AdaptivePaddedNoZeroDevAverage(AdaptiveSizePolicyWeight, PromotedPadding)),
_space_alignment(space_alignment),
- _young_gen_size_increment_supplement(YoungGenerationSizeSupplement) {}
+ _young_gen_size_increment_supplement(YoungGenerationSizeSupplement),
+ _tenuring_threshold_gc_count(0) {}
void PSAdaptiveSizePolicy::major_collection_begin() {
_major_timer.reset();
@@ -223,36 +224,63 @@ size_t PSAdaptiveSizePolicy::eden_decrement_aligned_down(size_t cur_eden) {
return align_down(eden_heap_delta, _space_alignment);
}
-uint PSAdaptiveSizePolicy::compute_tenuring_threshold(bool is_survivor_overflowing,
+static const char* sizing_state_to_string(PSYoungGen::SizingState sizing_state) {
+ switch (sizing_state) {
+ case PSYoungGen::SizingState::balanced:
+ return "balanced";
+ case PSYoungGen::SizingState::constrained:
+ return "constrained";
+ case PSYoungGen::SizingState::surplus:
+ return "surplus";
+ default:
+ ShouldNotReachHere();
+ return "unknown";
+ }
+}
+
+uint PSAdaptiveSizePolicy::compute_tenuring_threshold(PSYoungGen::SizingState sizing_state,
uint tenuring_threshold) {
- if (!young_gen_policy_is_ready()) {
+ if (AlwaysTenure || NeverTenure) {
return tenuring_threshold;
}
- if (is_survivor_overflowing) {
- return tenuring_threshold;
+ const uint original_threshold = tenuring_threshold;
+ constexpr uint min_tenuring_threshold = 1;
+ constexpr uint tenuring_threshold_gc_limit = 5;
+
+ switch (sizing_state) {
+ case PSYoungGen::SizingState::constrained:
+ _tenuring_threshold_gc_count = 0;
+ if (tenuring_threshold > min_tenuring_threshold) {
+ tenuring_threshold--;
+ }
+ break;
+ case PSYoungGen::SizingState::surplus:
+ if (_tenuring_threshold_gc_count < tenuring_threshold_gc_limit) {
+ _tenuring_threshold_gc_count++;
+ }
+
+ if (_tenuring_threshold_gc_count >= tenuring_threshold_gc_limit &&
+ tenuring_threshold < MaxTenuringThreshold) {
+ tenuring_threshold++;
+ _tenuring_threshold_gc_count = 0;
+ }
+ break;
+ case PSYoungGen::SizingState::balanced:
+ _tenuring_threshold_gc_count = 0;
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
}
- bool incr_tenuring_threshold = false;
-
- const double major_cost = major_gc_time_sum();
- const double minor_cost = minor_gc_time_sum();
-
- if (minor_cost > major_cost * _threshold_tolerance_percent) {
- // nothing; we prefer young-gc over full-gc
- } else if (major_cost > minor_cost * _threshold_tolerance_percent) {
- // Major times are too long, so we want less promotion.
- incr_tenuring_threshold = true;
- }
-
- // Finally, increment or decrement the tenuring threshold, as decided above.
- // We test for decrementing first, as we might have hit the target size
- // limit.
- if (!(AlwaysTenure || NeverTenure)) {
- if (incr_tenuring_threshold && tenuring_threshold < MaxTenuringThreshold) {
- tenuring_threshold++;
- }
- }
+ log_debug(gc, age)("Adaptive tenuring threshold %u -> %u (max %u, young gen state: %s, increase count: %u/%u)",
+ original_threshold,
+ tenuring_threshold,
+ MaxTenuringThreshold,
+ sizing_state_to_string(sizing_state),
+ _tenuring_threshold_gc_count,
+ tenuring_threshold_gc_limit);
return tenuring_threshold;
}
diff --git a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp
index 596ac231a97..54d7185b062 100644
--- a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp
+++ b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp
@@ -25,6 +25,7 @@
#ifndef SHARE_GC_PARALLEL_PSADAPTIVESIZEPOLICY_HPP
#define SHARE_GC_PARALLEL_PSADAPTIVESIZEPOLICY_HPP
+#include "gc/parallel/psYoungGen.hpp"
#include "gc/shared/adaptiveSizePolicy.hpp"
#include "gc/shared/gcUtil.hpp"
#include "utilities/align.hpp"
@@ -46,6 +47,10 @@ class PSAdaptiveSizePolicy : public AdaptiveSizePolicy {
// with increasing collections.
uint _young_gen_size_increment_supplement;
+ // Count eligible (where eden is not squeezed by survivors) young GCs before
+ // raising the tenuring threshold.
+ uint _tenuring_threshold_gc_count;
+
size_t decrease_eden_for_minor_pause_time(size_t current_eden_size);
size_t increase_eden(size_t current_eden_size);
@@ -85,11 +90,11 @@ class PSAdaptiveSizePolicy : public AdaptiveSizePolicy {
size_t compute_desired_survivor_size(size_t current_survivor_size, size_t max_gen_size);
- size_t compute_old_gen_shrink_bytes(size_t old_gen_free_bytes, size_t max_shrink_bytes);
-
- uint compute_tenuring_threshold(bool is_survivor_overflowing,
+ uint compute_tenuring_threshold(PSYoungGen::SizingState sizing_state,
uint tenuring_threshold);
+ size_t compute_old_gen_shrink_bytes(size_t old_gen_free_bytes, size_t max_shrink_bytes);
+
// Return the maximum size of a survivor space if the young generation were of
// size gen_size.
size_t max_survivor_size(size_t gen_size) {
diff --git a/src/hotspot/share/gc/parallel/psCardTable.cpp b/src/hotspot/share/gc/parallel/psCardTable.cpp
index 6429766309a..226ccf72a68 100644
--- a/src/hotspot/share/gc/parallel/psCardTable.cpp
+++ b/src/hotspot/share/gc/parallel/psCardTable.cpp
@@ -269,7 +269,7 @@ void PSCardTable::process_range(Func&& object_start,
}
// Finished a dirty chunk.
- pm->drain_stacks_cond_depth();
+ pm->trim_stacks();
}
}
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
index ca1fd2c120b..a4a2bfe72c2 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@@ -96,9 +96,6 @@
#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#include "utilities/stack.inline.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
#include
@@ -280,7 +277,6 @@ void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
assert(beg_region <= _region_count, "beg_region out of range");
assert(end_region <= _region_count, "end_region out of range");
- const size_t region_cnt = end_region - beg_region;
for (size_t i = beg_region; i < end_region; i++) {
::new (&_region_data[i]) RegionData{};
}
@@ -702,9 +698,9 @@ void PSParallelCompact::post_compact()
heap->prune_scavengable_nmethods();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::update_pointers();
-#endif
+#endif // COMPILER2
// Signal that we have completed a visit to all live objects.
Universe::heap()->record_whole_heap_examined_timestamp();
@@ -978,9 +974,9 @@ bool PSParallelCompact::invoke(bool clear_all_soft_refs, bool should_do_max_comp
// Let the size policy know we're starting
size_policy->major_collection_begin();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::clear();
-#endif
+#endif // COMPILER2
ref_processor()->start_discovery(clear_all_soft_refs);
@@ -988,10 +984,10 @@ bool PSParallelCompact::invoke(bool clear_all_soft_refs, bool should_do_max_comp
summary_phase(should_do_max_compaction);
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
assert(DerivedPointerTable::is_active(), "Sanity");
DerivedPointerTable::set_active(false);
-#endif
+#endif // COMPILER2
forward_to_new_addr();
@@ -1172,14 +1168,6 @@ public:
_klass_cleaning_task() {}
void work(uint worker_id) {
-#if INCLUDE_JVMCI
- if (EnableJVMCI && worker_id == 0) {
- // Serial work; only first worker.
- // Clean JVMCI metadata handles.
- JVMCI::do_unloading(_unloading_occurred);
- }
-#endif
-
// Do first pass of code cache cleaning.
_code_cache_task.work(worker_id);
@@ -1378,11 +1366,9 @@ public:
_weak_proc_task(nworkers),
_oop_storage_iter(),
_nworkers(nworkers),
- _code_cache_claimed(false) {
+ _code_cache_claimed(false),
+ _claim_counters{} {
- for (unsigned int i = PSParallelCompact::old_space_id; i < PSParallelCompact::last_space_id; ++i) {
- ::new (&_claim_counters[i]) Atomic{};
- }
ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_stw_fullgc_adjust);
}
diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.cpp b/src/hotspot/share/gc/parallel/psPromotionManager.cpp
index 6a0905e82f3..4c8319a3c07 100644
--- a/src/hotspot/share/gc/parallel/psPromotionManager.cpp
+++ b/src/hotspot/share/gc/parallel/psPromotionManager.cpp
@@ -29,6 +29,7 @@
#include "gc/parallel/psPromotionManager.inline.hpp"
#include "gc/parallel/psScavenge.hpp"
#include "gc/shared/continuationGCSupport.inline.hpp"
+#include "gc/shared/gc_globals.hpp"
#include "gc/shared/gcTrace.hpp"
#include "gc/shared/partialArraySplitter.inline.hpp"
#include "gc/shared/partialArrayState.hpp"
@@ -163,12 +164,6 @@ PSPromotionManager::PSPromotionManager()
// We set the old lab's start array.
_old_lab.set_start_array(old_gen()->start_array());
- if (ParallelGCThreads == 1) {
- _target_stack_size = 0;
- } else {
- _target_stack_size = GCDrainStackTargetSize;
- }
-
// let's choose 1.5x the chunk size
_min_array_size_for_chunking = (3 * ParGCArrayScanChunk / 2);
@@ -204,10 +199,7 @@ void PSPromotionManager::restore_preserved_marks() {
_preserved_marks_set->restore(&ParallelScavengeHeap::heap()->workers());
}
-void PSPromotionManager::drain_stacks(bool totally_drain) {
- const uint threshold = totally_drain ? 0
- : _target_stack_size;
-
+void PSPromotionManager::trim_stacks_to_threshold(uint threshold) {
PSScannerTasksQueue* const tq = claimed_stack_depth();
do {
ScannerTask task;
@@ -225,11 +217,24 @@ void PSPromotionManager::drain_stacks(bool totally_drain) {
}
} while (!tq->overflow_empty());
- assert(!totally_drain || tq->taskqueue_empty(), "Sanity");
- assert(totally_drain || tq->size() <= _target_stack_size, "Sanity");
+ assert(tq->size() <= threshold, "Sanity");
assert(tq->overflow_empty(), "Sanity");
}
+void PSPromotionManager::trim_stacks() {
+ const uint target_stack_size = GCDrainStackTargetSize;
+ const uint max_stack_size = target_stack_size * 2 + 1;
+
+ PSScannerTasksQueue* const tq = claimed_stack_depth();
+ if (!tq->overflow_empty() || tq->size() > max_stack_size) {
+ trim_stacks_to_threshold(target_stack_size);
+ }
+}
+
+void PSPromotionManager::drain_stacks() {
+ trim_stacks_to_threshold(0);
+}
+
void PSPromotionManager::flush_labs() {
assert(stacks_empty(), "Attempt to flush lab with live stack");
diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.hpp
index cd59fa578d1..edce4861d4d 100644
--- a/src/hotspot/share/gc/parallel/psPromotionManager.hpp
+++ b/src/hotspot/share/gc/parallel/psPromotionManager.hpp
@@ -80,8 +80,6 @@ class PSPromotionManager {
PSScannerTasksQueue _claimed_stack_depth;
- uint _target_stack_size;
-
static PartialArrayStateManager* _partial_array_state_manager;
PartialArraySplitter _partial_array_splitter;
uint _min_array_size_for_chunking;
@@ -97,6 +95,8 @@ class PSPromotionManager {
inline static PSPromotionManager* manager_array(uint index);
+ void trim_stacks_to_threshold(uint threshold);
+
void process_array_chunk(PartialArrayState* state, bool stolen);
void process_array_chunk(objArrayOop obj, size_t start, size_t end);
void push_objArray(oop old_obj, oop new_obj);
@@ -147,12 +147,8 @@ class PSPromotionManager {
void flush_labs();
void flush_string_dedup_requests() { _string_dedup_requests.flush(); }
- void drain_stacks_cond_depth() {
- if (claimed_stack_depth()->size() > _target_stack_size) {
- drain_stacks(false);
- }
- }
- void drain_stacks(bool totally_drain);
+ void trim_stacks();
+ void drain_stacks();
bool stacks_empty() {
return claimed_stack_depth()->is_empty();
diff --git a/src/hotspot/share/gc/parallel/psScavenge.cpp b/src/hotspot/share/gc/parallel/psScavenge.cpp
index d1d595df529..8dbd2485e76 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -110,7 +110,7 @@ static void scavenge_roots_work(ParallelRootType::Value root_type, uint worker_i
}
// Do the real work
- pm->drain_stacks(false);
+ pm->trim_stacks();
}
static void steal_work(TaskTerminator& terminator, uint worker_id) {
@@ -118,15 +118,13 @@ static void steal_work(TaskTerminator& terminator, uint worker_id) {
PSPromotionManager* pm =
PSPromotionManager::gc_thread_promotion_manager(worker_id);
- pm->drain_stacks(true);
- guarantee(pm->stacks_empty(),
- "stacks should be empty at this point");
+ guarantee(pm->stacks_empty(), "precondition");
while (true) {
ScannerTask task;
if (PSPromotionManager::steal_depth(worker_id, task)) {
pm->process_popped_location_depth(task, true);
- pm->drain_stacks(true);
+ pm->drain_stacks();
} else {
if (terminator.offer_termination()) {
break;
@@ -181,9 +179,7 @@ class PSEvacuateFollowersClosure: public VoidClosure {
virtual void do_void() {
assert(_promotion_manager != nullptr, "Sanity");
- _promotion_manager->drain_stacks(true);
- guarantee(_promotion_manager->stacks_empty(),
- "stacks should be empty at this point");
+ _promotion_manager->drain_stacks();
if (_terminator != nullptr) {
steal_work(*_terminator, _worker_id);
@@ -227,7 +223,7 @@ public:
thread->oops_do(&roots_closure, nullptr);
// Do the real work
- _pm->drain_stacks(false);
+ _pm->trim_stacks();
}
};
@@ -278,7 +274,7 @@ public:
_active_workers);
// Do the real work
- pm->drain_stacks(false);
+ pm->trim_stacks();
}
}
@@ -295,14 +291,11 @@ public:
_oop_storage_strong_par_state.oops_do(&root_closure);
// Do the real work
- pm->drain_stacks(false);
+ pm->trim_stacks();
}
- // If active_workers can exceed 1, add a steal_work().
- // PSPromotionManager::drain_stacks_depth() does not fully drain its
- // stacks and expects a steal_work() to complete the draining if
- // ParallelGCThreads is > 1.
-
+ // Drain worker local stacks and perform work stealing if more than one worker.
+ pm->drain_stacks();
if (_active_workers > 1) {
steal_work(_terminator, worker_id);
}
@@ -340,9 +333,6 @@ bool PSScavenge::invoke(bool clear_soft_refs) {
heap->print_before_gc();
heap->trace_heap_before_gc(&_gc_tracer);
- assert(!NeverTenure || _tenuring_threshold == markWord::max_age + 1, "Sanity");
- assert(!AlwaysTenure || _tenuring_threshold == 0, "Sanity");
-
// Fill in TLABs
heap->ensure_parsability(true); // retire TLABs
@@ -365,9 +355,9 @@ bool PSScavenge::invoke(bool clear_soft_refs) {
// Let the size policy know we're starting
size_policy->minor_collection_begin();
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::clear();
-#endif
+#endif // COMPILER2
reference_processor()->start_discovery(clear_soft_refs);
@@ -437,14 +427,11 @@ bool PSScavenge::invoke(bool clear_soft_refs) {
size_policy->sample_old_gen_used_bytes(old_gen->used_in_bytes());
if (UseAdaptiveSizePolicy) {
- _tenuring_threshold = size_policy->compute_tenuring_threshold(_survivor_overflow,
- _tenuring_threshold);
-
- log_debug(gc, age)("New threshold %u (max threshold %u)", _tenuring_threshold, MaxTenuringThreshold);
-
if (young_gen->is_from_to_layout()) {
size_policy->print_stats(_survivor_overflow);
heap->resize_after_young_gc(_survivor_overflow);
+ _tenuring_threshold = size_policy->compute_tenuring_threshold(young_gen->sizing_state(),
+ _tenuring_threshold);
}
if (UsePerfData) {
@@ -475,9 +462,9 @@ bool PSScavenge::invoke(bool clear_soft_refs) {
heap->gc_epilogue(false);
}
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::update_pointers();
-#endif
+#endif // COMPILER2
size_policy->record_gc_pause_end_instant();
@@ -529,9 +516,9 @@ void PSScavenge::initialize() {
"MaxTenuringThreshold should be 0 or markWord::max_age + 1, but is %d", (int) MaxTenuringThreshold);
_tenuring_threshold = MaxTenuringThreshold;
} else {
- // We want to smooth out our startup times for the AdaptiveSizePolicy
- _tenuring_threshold = (UseAdaptiveSizePolicy) ? InitialTenuringThreshold :
- MaxTenuringThreshold;
+ // We want to smooth out startup times for AdaptiveSizePolicy.
+ _tenuring_threshold = UseAdaptiveSizePolicy ? InitialTenuringThreshold
+ : MaxTenuringThreshold;
}
ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
diff --git a/src/hotspot/share/gc/parallel/psYoungGen.cpp b/src/hotspot/share/gc/parallel/psYoungGen.cpp
index 870e912f51e..297fdde09ae 100644
--- a/src/hotspot/share/gc/parallel/psYoungGen.cpp
+++ b/src/hotspot/share/gc/parallel/psYoungGen.cpp
@@ -43,6 +43,7 @@ PSYoungGen::PSYoungGen(ReservedSpace rs, size_t initial_size, size_t min_size, s
_to_space(nullptr),
_min_gen_size(min_size),
_max_gen_size(max_size),
+ _sizing_state(SizingState::balanced),
_gen_counters(nullptr),
_eden_counters(nullptr),
_from_counters(nullptr),
@@ -352,37 +353,92 @@ void PSYoungGen::compute_desired_sizes(bool is_survivor_overflowing,
eden_size = align_up(eden_size, SpaceAlignment);
assert(eden_size >= SpaceAlignment, "inv");
+ // from-space; survivor
+ const size_t survivor_used = from_space()->used_in_bytes();
+ // When survivor usage is below this ratio, consider survivor space sparse.
+ constexpr double survivor_sparse_threshold = 0.8;
+
survivor_size = size_policy->compute_desired_survivor_size(current_survivor_size, max_gen_size());
survivor_size = MAX3(survivor_size,
- from_space()->used_in_bytes(),
+ survivor_used,
SpaceAlignment);
survivor_size = align_up(survivor_size, SpaceAlignment);
- log_debug(gc, ergo)("Desired size eden: %zu K, survivor: %zu K", eden_size/K, survivor_size/K);
+ log_debug(gc, ergo)("Desired size eden: %zu K, survivor: %zu K",
+ eden_size / K,
+ survivor_size / K);
+
+ _sizing_state = SizingState::balanced;
+
+ if (max_gen_size() < eden_size + 2 * survivor_size) {
+ log_info(gc, ergo)("Requested sizes exceed MaxNewSize (K): %zu vs %zu",
+ (eden_size + 2 * survivor_size) / K,
+ max_gen_size() / K);
+ // Must reduce eden/survivor to satisfy the max_gen_size constraint. Prioritize survivor_space to reduce promotion.
+ // Check if survivor is actually using its requested size.
+ if (!is_survivor_overflowing && survivor_used < survivor_sparse_threshold * survivor_size) {
+ // When survivor usage is sparse, trim survivor reservation and keep more room for eden.
+ size_t target_survivor_size = survivor_used + survivor_used / 4;
+ target_survivor_size = align_up(target_survivor_size, SpaceAlignment);
+ target_survivor_size = MAX2(target_survivor_size, SpaceAlignment);
+
+ if (target_survivor_size < survivor_size) {
+ // Decrease survivor gradually to avoid abrupt sizing swings.
+ // Simplified: new_survivor_size = survivor_size / 2 + 3 * survivor_used / 8.
+ const size_t survivor_delta = survivor_size - target_survivor_size;
+ const size_t survivor_decrement = align_up(survivor_delta / 2, SpaceAlignment);
+ survivor_size = MAX2(target_survivor_size, survivor_size - survivor_decrement);
+ log_debug(gc, ergo)("Trim survivor under MaxNewSize pressure (used: %zu K, target: %zu K, new: %zu K)",
+ survivor_used / K,
+ target_survivor_size / K,
+ survivor_size / K);
+ }
+ }
+
+ // Recheck after potential survivor_size adjustment.
+ if (max_gen_size() < eden_size + 2 * survivor_size) {
+ if (2 * survivor_size >= max_gen_size()) {
+ // If requested survivor size is too large
+ survivor_size = align_down((max_gen_size() - SpaceAlignment) / 2, SpaceAlignment);
+ }
- const size_t new_gen_size = eden_size + 2 * survivor_size;
- if (new_gen_size < min_gen_size()) {
- // Keep survivor and adjust eden to meet min-gen-size
- eden_size = min_gen_size() - 2 * survivor_size;
- } else if (max_gen_size() < new_gen_size) {
- log_info(gc, ergo)("Requested sizes exceeds MaxNewSize (K): %zu vs %zu", new_gen_size/K, max_gen_size()/K);
- // New capacity would exceed max; need to revise these desired sizes.
- // Favor survivor over eden in order to reduce promotion (overflow).
- if (2 * survivor_size >= max_gen_size()) {
- // If requested survivor size is too large
- survivor_size = align_down((max_gen_size() - SpaceAlignment) / 2, SpaceAlignment);
- eden_size = max_gen_size() - 2 * survivor_size;
- } else {
// Respect survivor size and reduce eden
eden_size = max_gen_size() - 2 * survivor_size;
+
+ _sizing_state = SizingState::constrained;
}
}
- assert(eden_size >= SpaceAlignment, "inv");
- assert(survivor_size >= SpaceAlignment, "inv");
+ if (eden_size + 2 * survivor_size < min_gen_size()) {
+ // Keep survivor and adjust eden to meet min-gen-size.
+ eden_size = min_gen_size() - 2 * survivor_size;
- assert(is_aligned(eden_size, SpaceAlignment), "inv");
- assert(is_aligned(survivor_size, SpaceAlignment), "inv");
+ _sizing_state = SizingState::surplus;
+ }
+
+ const size_t final_gen_size = eden_size + 2 * survivor_size;
+ // A balanced result fills max_gen_size; otherwise there is surplus young-gen headroom.
+ if (_sizing_state == SizingState::balanced) {
+ if (final_gen_size < max_gen_size()) {
+ _sizing_state = SizingState::surplus;
+ }
+ }
+
+#ifdef ASSERT
+ {
+ assert(eden_size >= SpaceAlignment, "inv");
+ assert(survivor_size >= SpaceAlignment, "inv");
+
+ assert(is_aligned(eden_size, SpaceAlignment), "inv");
+ assert(is_aligned(survivor_size, SpaceAlignment), "inv");
+
+ assert(final_gen_size >= min_gen_size(), "inv");
+ assert(final_gen_size <= max_gen_size(), "inv");
+ if (final_gen_size < max_gen_size()) {
+ assert(_sizing_state == SizingState::surplus, "inv");
+ }
+ }
+#endif
}
void PSYoungGen::resize_inner(size_t desired_eden_size,
diff --git a/src/hotspot/share/gc/parallel/psYoungGen.hpp b/src/hotspot/share/gc/parallel/psYoungGen.hpp
index ed10806ac99..d2c483f638a 100644
--- a/src/hotspot/share/gc/parallel/psYoungGen.hpp
+++ b/src/hotspot/share/gc/parallel/psYoungGen.hpp
@@ -37,6 +37,22 @@ class PSYoungGen : public CHeapObj {
friend class VMStructs;
friend class ParallelScavengeHeap;
+ public:
+ // Young generation sizing state from the latest sizing pass. It records how
+ // the desired eden/survivor sizes relate to the young-gen size bounds.
+ // Consumers such as the tenuring-threshold heuristic can use this as sizing
+ // feedback.
+ enum class SizingState : int {
+ // Desired young-gen size means "eden + 2 * survivor".
+ // Its relation to max_gen_size is:
+ // exactly equal.
+ balanced = 0,
+ // greater than.
+ constrained,
+ // less than.
+ surplus
+ };
+
private:
MemRegion _reserved;
PSVirtualSpace* _virtual_space;
@@ -50,6 +66,9 @@ class PSYoungGen : public CHeapObj {
const size_t _min_gen_size;
const size_t _max_gen_size;
+ // Current young-gen sizing state, updated by compute_desired_sizes().
+ SizingState _sizing_state;
+
// Performance counters
GenerationCounters* _gen_counters;
HSpaceCounters* _eden_counters;
@@ -127,6 +146,8 @@ class PSYoungGen : public CHeapObj {
size_t min_gen_size() const { return _min_gen_size; }
size_t max_gen_size() const { return _max_gen_size; }
+ SizingState sizing_state() const { return _sizing_state; }
+
// Allocation
HeapWord* cas_allocate(size_t word_size) {
HeapWord* result = eden_space()->cas_allocate(word_size);
diff --git a/src/hotspot/share/gc/serial/serialFullGC.cpp b/src/hotspot/share/gc/serial/serialFullGC.cpp
index 0c8ca51fc99..13532dea07d 100644
--- a/src/hotspot/share/gc/serial/serialFullGC.cpp
+++ b/src/hotspot/share/gc/serial/serialFullGC.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -70,9 +70,6 @@
#include "utilities/copy.hpp"
#include "utilities/events.hpp"
#include "utilities/stack.inline.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
Stack SerialFullGC::_marking_stack;
Stack SerialFullGC::_objarray_stack;
@@ -553,9 +550,6 @@ void SerialFullGC::phase1_mark(bool clear_all_softrefs) {
// Prune dead klasses from subklass/sibling/implementor lists.
Klass::clean_weak_klass_links(unloading_occurred);
-
- // Clean JVMCI metadata handles.
- JVMCI_ONLY(JVMCI::do_unloading(unloading_occurred));
}
{
@@ -726,10 +720,10 @@ void SerialFullGC::invoke_at_safepoint(bool clear_all_softrefs) {
}
// Don't add any more derived pointers during phase3
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
assert(DerivedPointerTable::is_active(), "Sanity");
DerivedPointerTable::set_active(false);
-#endif
+#endif // COMPILER2
{
// Adjust the pointers to reflect the new locations
diff --git a/src/hotspot/share/gc/serial/serialHeap.cpp b/src/hotspot/share/gc/serial/serialHeap.cpp
index 8eafdfdcc82..5d068ff67e0 100644
--- a/src/hotspot/share/gc/serial/serialHeap.cpp
+++ b/src/hotspot/share/gc/serial/serialHeap.cpp
@@ -80,9 +80,6 @@
#include "utilities/macros.hpp"
#include "utilities/stack.inline.hpp"
#include "utilities/vmError.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
SerialHeap* SerialHeap::heap() {
return named_heap(CollectedHeap::Serial);
@@ -394,13 +391,13 @@ bool SerialHeap::do_young_collection(bool clear_soft_refs) {
Universe::verify("Before GC");
}
gc_prologue();
- COMPILER2_OR_JVMCI_PRESENT(DerivedPointerTable::clear());
+ COMPILER2_PRESENT(DerivedPointerTable::clear());
save_marks();
bool result = _young_gen->collect(clear_soft_refs);
- COMPILER2_OR_JVMCI_PRESENT(DerivedPointerTable::update_pointers());
+ COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
// Only update stats for successful young-gc
if (result) {
@@ -573,7 +570,7 @@ void SerialHeap::do_full_collection(bool clear_all_soft_refs) {
}
gc_prologue();
- COMPILER2_OR_JVMCI_PRESENT(DerivedPointerTable::clear());
+ COMPILER2_PRESENT(DerivedPointerTable::clear());
CodeCache::on_gc_marking_cycle_start();
STWGCTimer* gc_timer = SerialFullGC::gc_timer();
@@ -593,7 +590,7 @@ void SerialHeap::do_full_collection(bool clear_all_soft_refs) {
gc_tracer->report_gc_end(gc_timer->gc_end(), gc_timer->time_partitions());
CodeCache::on_gc_marking_cycle_finish();
CodeCache::arm_all_nmethods();
- COMPILER2_OR_JVMCI_PRESENT(DerivedPointerTable::update_pointers());
+ COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
// Adjust generation sizes.
_old_gen->compute_new_size();
@@ -784,9 +781,9 @@ void SerialHeap::gc_prologue() {
};
void SerialHeap::gc_epilogue(bool full) {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
assert(DerivedPointerTable::is_empty(), "derived pointer present");
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
resize_all_tlabs();
diff --git a/src/hotspot/share/gc/shared/adaptiveSizePolicy.cpp b/src/hotspot/share/gc/shared/adaptiveSizePolicy.cpp
index b048c7dd79a..db616686d4b 100644
--- a/src/hotspot/share/gc/shared/adaptiveSizePolicy.cpp
+++ b/src/hotspot/share/gc/shared/adaptiveSizePolicy.cpp
@@ -43,8 +43,7 @@ AdaptiveSizePolicy::AdaptiveSizePolicy(double gc_pause_goal_sec) :
_peak_old_used_bytes_seq(seq_default_alpha_value),
_minor_pause_young_estimator(new LinearLeastSquareFit(AdaptiveSizePolicyWeight)),
_threshold_tolerance_percent(1.0 + ThresholdTolerance/100.0),
- _gc_pause_goal_sec(gc_pause_goal_sec),
- _young_gen_policy_is_ready(false) {}
+ _gc_pause_goal_sec(gc_pause_goal_sec) {}
void AdaptiveSizePolicy::minor_collection_begin() {
_minor_timer.reset();
@@ -61,12 +60,6 @@ void AdaptiveSizePolicy::minor_collection_end(size_t eden_capacity_in_bytes) {
record_gc_duration(minor_pause_in_seconds);
_trimmed_minor_gc_time_seconds.add(minor_pause_in_seconds);
- if (!_young_gen_policy_is_ready) {
- // The policy does not have enough data until at least some
- // young collections have been done.
- _young_gen_policy_is_ready = GCId::current() >= AdaptiveSizePolicyReadyThreshold;
- }
-
{
double eden_size_in_mbytes = ((double)eden_capacity_in_bytes)/((double)M);
_minor_pause_young_estimator->update(eden_size_in_mbytes, minor_pause_in_ms);
diff --git a/src/hotspot/share/gc/shared/adaptiveSizePolicy.hpp b/src/hotspot/share/gc/shared/adaptiveSizePolicy.hpp
index 280c406faa7..0860b28ba39 100644
--- a/src/hotspot/share/gc/shared/adaptiveSizePolicy.hpp
+++ b/src/hotspot/share/gc/shared/adaptiveSizePolicy.hpp
@@ -133,9 +133,6 @@ class AdaptiveSizePolicy : public CHeapObj {
const double _gc_pause_goal_sec; // Goal for maximum GC pause
- // Flag indicating that the adaptive policy is ready to use
- bool _young_gen_policy_is_ready;
-
// Accessors
double gc_pause_goal_sec() const { return _gc_pause_goal_sec; }
@@ -160,8 +157,6 @@ class AdaptiveSizePolicy : public CHeapObj {
return gc_percent;
}
- bool young_gen_policy_is_ready() { return _young_gen_policy_is_ready; }
-
size_t eden_increment(size_t cur_eden);
size_t eden_increment(size_t cur_eden, uint percent_change);
diff --git a/src/hotspot/share/gc/shared/barrierSetNMethod.cpp b/src/hotspot/share/gc/shared/barrierSetNMethod.cpp
index a1f03a4bf50..2f7b79beab0 100644
--- a/src/hotspot/share/gc/shared/barrierSetNMethod.cpp
+++ b/src/hotspot/share/gc/shared/barrierSetNMethod.cpp
@@ -39,9 +39,6 @@
#include "runtime/threads.hpp"
#include "runtime/threadWXSetters.inline.hpp"
#include "utilities/debug.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciRuntime.hpp"
-#endif
int BarrierSetNMethod::disarmed_guard_value() const {
return *disarmed_guard_value_address();
@@ -65,7 +62,7 @@ bool BarrierSetNMethod::supports_entry_barrier(nmethod* nm) {
return false;
}
- if (nm->is_native_method() || nm->is_compiled_by_c2() || nm->is_compiled_by_c1() || nm->is_compiled_by_jvmci()) {
+ if (nm->is_native_method() || nm->is_compiled_by_c2() || nm->is_compiled_by_c1()) {
return true;
}
diff --git a/src/hotspot/share/gc/shared/barrierSetNMethod.hpp b/src/hotspot/share/gc/shared/barrierSetNMethod.hpp
index 88bae4d5c1c..812763e429d 100644
--- a/src/hotspot/share/gc/shared/barrierSetNMethod.hpp
+++ b/src/hotspot/share/gc/shared/barrierSetNMethod.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -73,10 +73,6 @@ public:
virtual oop oop_load_no_keepalive(const nmethod* nm, int index);
virtual oop oop_load_phantom(const nmethod* nm, int index);
-
-#if INCLUDE_JVMCI
- bool verify_barrier(nmethod* nm, FormatBuffer<>& msg);
-#endif
};
diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
index 239cce16aa3..4b6a9ed3f33 100644
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
@@ -129,6 +129,12 @@ void BarrierStubC2::dont_preserve(Register r) {
} while (vm_reg->is_Register() && !vm_reg->is_concrete());
}
+bool BarrierStubC2::is_preserved(Register r) const {
+ const VMReg vm_reg = r->as_VMReg();
+ assert(vm_reg->is_Register(), "r must be a general-purpose register");
+ return _preserve.member(OptoReg::as_OptoReg(vm_reg));
+}
+
const RegMask& BarrierStubC2::preserve_set() const {
return _preserve;
}
@@ -1111,7 +1117,7 @@ void BarrierSetC2::elide_dominated_barriers(Node_List& accesses, Node_List& acce
if (access_block == mem_block) {
// Earlier accesses in the same block
if (mem_index < access_index && !block_has_safepoint(mem_block, mem_index + 1, access_index)) {
- elide_dominated_barrier(access);
+ elide_dominated_barrier(access, mem->is_Mach() ? mem->as_Mach() : nullptr);
}
} else if (mem_block->dominates(access_block)) {
// Dominating block? Look around for safepoints
@@ -1141,7 +1147,7 @@ void BarrierSetC2::elide_dominated_barriers(Node_List& accesses, Node_List& acce
}
if (!safepoint_found) {
- elide_dominated_barrier(access);
+ elide_dominated_barrier(access, mem->is_Mach() ? mem->as_Mach() : nullptr);
}
}
}
diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
index a486a88c48f..635d63ff4c6 100644
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -261,6 +261,8 @@ public:
void preserve(Register reg);
// Do not preserve the value in reg across runtime calls in this barrier.
void dont_preserve(Register reg);
+ // Check if register is in preserved set.
+ bool is_preserved(Register reg) const;
// Set of registers whose value needs to be preserved across runtime calls in this barrier.
const RegMask& preserve_set() const;
};
@@ -339,6 +341,7 @@ public:
// If the BarrierSetC2 state has barrier nodes in its compilation
// unit state to be expanded later, then now is the time to do so.
virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const { return false; }
+ virtual void final_refinement(Compile* C) const { }
virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const { return false; }
virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return false; }
virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; }
@@ -370,7 +373,7 @@ public:
// Whether the given phi node joins OOPs from fast and slow allocation paths.
static bool is_allocation(const Node* node);
// Elide GC barriers from a Mach node according to elide_dominated_barriers().
- virtual void elide_dominated_barrier(MachNode* mach) const { }
+ virtual void elide_dominated_barrier(MachNode* mach, MachNode* dominator) const { }
// Elide GC barriers from instructions in 'accesses' if they are dominated by
// instructions in 'access_dominators' (according to elide_mach_barrier()) and
// there is no safepoint poll in between.
diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
index f7445ff254f..381a9f65295 100644
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -126,7 +126,7 @@ Node* CardTableBarrierSetC2::byte_map_base_node(IdealKit* kit) const {
#endif
CardTable::CardValue* card_table_base = ci_card_table_address_const();
if (card_table_base != nullptr) {
- return kit->makecon(TypeRawPtr::make((address)card_table_base));
+ return kit->makecon(TypeRawPtr::make((address)card_table_base, relocInfo::none));
} else {
return kit->makecon(Type::get_zero_type(T_ADDRESS));
}
diff --git a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
index d6541198858..28bdaa80a27 100644
--- a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
+++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
@@ -112,7 +112,7 @@ void CardTableBarrierSet::print_on(outputStream* st) const {
// that specific collector in mind, and the documentation above suitably
// extended and updated.
void CardTableBarrierSet::on_slowpath_allocation_exit(JavaThread* thread, oop new_obj) {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (!ReduceInitialCardMarks) {
return;
}
@@ -124,5 +124,5 @@ void CardTableBarrierSet::on_slowpath_allocation_exit(JavaThread* thread, oop ne
// Do the card mark
write_region(mr);
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp
index 100866bb528..0cf47b5fafb 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp
@@ -91,7 +91,6 @@ public:
class CollectedHeap : public CHeapObj {
friend class CPUTimeUsage::GC;
friend class VMStructs;
- friend class JVMCIVMStructs;
friend class IsSTWGCActiveMark; // Block structured external access to _is_stw_gc_active
friend class MemAllocator;
diff --git a/src/hotspot/share/gc/shared/gcConfig.cpp b/src/hotspot/share/gc/shared/gcConfig.cpp
index 402bd0caacd..c31bce35354 100644
--- a/src/hotspot/share/gc/shared/gcConfig.cpp
+++ b/src/hotspot/share/gc/shared/gcConfig.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -95,10 +95,11 @@ void GCConfig::fail_if_non_included_gc_is_selected() {
}
void GCConfig::select_gc_ergonomically() {
- if (os::is_server_class_machine()) {
#if INCLUDE_G1GC
- FLAG_SET_ERGO_IF_DEFAULT(UseG1GC, true);
-#elif INCLUDE_PARALLELGC
+ FLAG_SET_ERGO_IF_DEFAULT(UseG1GC, true);
+#else
+ if (os::is_server_class_machine()) {
+#if INCLUDE_PARALLELGC
FLAG_SET_ERGO_IF_DEFAULT(UseParallelGC, true);
#elif INCLUDE_SERIALGC
FLAG_SET_ERGO_IF_DEFAULT(UseSerialGC, true);
@@ -108,6 +109,7 @@ void GCConfig::select_gc_ergonomically() {
FLAG_SET_ERGO_IF_DEFAULT(UseSerialGC, true);
#endif
}
+#endif
}
bool GCConfig::is_no_gc_selected() {
diff --git a/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp b/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp
index 096bee9e4a0..68d3da32d35 100644
--- a/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp
+++ b/src/hotspot/share/gc/shared/jvmFlagConstraintsGC.cpp
@@ -28,13 +28,17 @@
#include "gc/shared/gcConfig.hpp"
#include "gc/shared/jvmFlagConstraintsGC.hpp"
#include "gc/shared/plab.hpp"
+#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/threadLocalAllocBuffer.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "runtime/arguments.hpp"
+#include "runtime/flags/jvmFlagLimit.hpp"
#include "runtime/globals.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/javaThread.hpp"
#include "utilities/align.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/integerCast.hpp"
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
#if INCLUDE_G1GC
@@ -120,16 +124,28 @@ JVMFlag::Error MaxHeapFreeRatioConstraintFunc(uintx value, bool verbose) {
}
static JVMFlag::Error CheckMaxHeapSizeAndSoftRefLRUPolicyMSPerMB(size_t maxHeap, intx softRef, bool verbose) {
- if ((softRef > 0) && ((maxHeap / M) > (max_uintx / softRef))) {
+ // SoftRefLRUPolicyMSPerMB option's range constraint
+ precond(softRef >= 0);
+
+ if (softRef == 0) {
+ // SoftRefLRUPolicyMSPerMB == 0 is always valid
+ return JVMFlag::SUCCESS;
+ }
+
+ // These are constrains to avoid overflows in the AbstractLRUReferencePolicy arithmetic
+ const size_t maxHeap_in_mb = maxHeap / M;
+ const uint64_t size_in_mb_for_maximum_max_interval = AbstractLRUReferencePolicy::MaximumMaxInterval / integer_cast(softRef);
+
+ if (maxHeap_in_mb > size_in_mb_for_maximum_max_interval) {
JVMFlag::printError(verbose,
"Desired lifetime of SoftReferences cannot be expressed correctly. "
"MaxHeapSize (%zu) or SoftRefLRUPolicyMSPerMB "
"(%zd) is too large\n",
maxHeap, softRef);
return JVMFlag::VIOLATES_CONSTRAINT;
- } else {
- return JVMFlag::SUCCESS;
}
+
+ return JVMFlag::SUCCESS;
}
JVMFlag::Error SoftRefLRUPolicyMSPerMBConstraintFunc(intx value, bool verbose) {
@@ -265,12 +281,12 @@ JVMFlag::Error InitialHeapSizeConstraintFunc(size_t value, bool verbose) {
}
JVMFlag::Error MaxHeapSizeConstraintFunc(size_t value, bool verbose) {
- JVMFlag::Error status = MaxSizeForHeapAlignment("MaxHeapSize", value, verbose);
-
- if (status == JVMFlag::SUCCESS) {
- status = CheckMaxHeapSizeAndSoftRefLRUPolicyMSPerMB(value, SoftRefLRUPolicyMSPerMB, verbose);
+ if (JVMFlagLimit::validating_phase() >= JVMFlagConstraintPhase::AfterMemoryInit) {
+ JVMFlag::printError(verbose, "MaxHeapSize must not change after memory initialization\n");
+ return JVMFlag::VIOLATES_CONSTRAINT;
}
- return status;
+
+ return MaxSizeForHeapAlignment("MaxHeapSize", value, verbose);
}
JVMFlag::Error SoftMaxHeapSizeConstraintFunc(size_t value, bool verbose) {
diff --git a/src/hotspot/share/gc/shared/referencePolicy.cpp b/src/hotspot/share/gc/shared/referencePolicy.cpp
index 6c5f459ebb4..86fd4b37771 100644
--- a/src/hotspot/share/gc/shared/referencePolicy.cpp
+++ b/src/hotspot/share/gc/shared/referencePolicy.cpp
@@ -28,19 +28,19 @@
#include "gc/shared/referencePolicy.hpp"
#include "memory/universe.hpp"
#include "runtime/globals.hpp"
+#include "utilities/globalDefinitions.hpp"
#include "utilities/integerCast.hpp"
-void AbstractLRUReferencePolicy::set_max_interval(jlong max_interval) {
- assert(max_interval >= 0, "Sanity check");
+void AbstractLRUReferencePolicy::set_max_interval(uint64_t max_interval) {
+ assert(max_interval <= MaximumMaxInterval, "Sanity check");
_max_interval = max_interval;
}
// The oop passed in is the SoftReference object, and not
// the object the SoftReference points to.
bool AbstractLRUReferencePolicy::should_clear_reference(oop p, jlong timestamp_clock) {
- assert(_max_interval >= 0, "Forgot to call setup");
- jlong interval = timestamp_clock - java_lang_ref_SoftReference::timestamp(p);
- assert(interval >= 0, "Sanity check");
+ assert(_max_interval <= MaximumMaxInterval, "Forgot to call setup");
+ const uint64_t interval = integer_cast(java_subtract(timestamp_clock, java_lang_ref_SoftReference::timestamp(p)));
// The interval will be zero if the ref was accessed since the last scavenge/gc.
if(interval <= _max_interval) {
@@ -52,14 +52,16 @@ bool AbstractLRUReferencePolicy::should_clear_reference(oop p, jlong timestamp_c
// Capture state (of-the-VM) information needed to evaluate the policy
void LRUCurrentHeapPolicy::setup() {
- set_max_interval(integer_cast(Universe::heap()->free_at_last_gc() / M) * SoftRefLRUPolicyMSPerMB);
+ // How much of the current heap was not used at the last gc
+ const uint64_t current_heap = Universe::heap()->free_at_last_gc() / M;
+
+ set_max_interval(current_heap * integer_cast(SoftRefLRUPolicyMSPerMB));
}
// Capture state (of-the-VM) information needed to evaluate the policy
void LRUMaxHeapPolicy::setup() {
- size_t max_heap = MaxHeapSize;
- max_heap -= Universe::heap()->used_at_last_gc();
- max_heap /= M;
+ // How much of the max heap was not used at the last gc
+ const uint64_t max_heap = (MaxHeapSize - Universe::heap()->used_at_last_gc()) / M;
- set_max_interval(integer_cast(max_heap) * SoftRefLRUPolicyMSPerMB);
+ set_max_interval(max_heap * integer_cast(SoftRefLRUPolicyMSPerMB));
}
diff --git a/src/hotspot/share/gc/shared/referencePolicy.hpp b/src/hotspot/share/gc/shared/referencePolicy.hpp
index 0fd918fa723..33d9bf2bb31 100644
--- a/src/hotspot/share/gc/shared/referencePolicy.hpp
+++ b/src/hotspot/share/gc/shared/referencePolicy.hpp
@@ -25,7 +25,12 @@
#ifndef SHARE_GC_SHARED_REFERENCEPOLICY_HPP
#define SHARE_GC_SHARED_REFERENCEPOLICY_HPP
+#include "cppstdlib/limits.hpp"
+#include "memory/allocation.hpp"
+#include "nmt/memTag.hpp"
#include "oops/oopsHierarchy.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/integerCast.hpp"
// referencePolicy is used to determine when soft reference objects
// should be cleared.
@@ -58,14 +63,18 @@ class AlwaysClearPolicy : public ReferencePolicy {
class AbstractLRUReferencePolicy : public ReferencePolicy {
private:
- jlong _max_interval = -1;
+ static constexpr uint64_t UninitializedMaxInterval = std::numeric_limits::max();
+ uint64_t _max_interval = UninitializedMaxInterval;
protected:
- void set_max_interval(jlong max_interval);
+ void set_max_interval(uint64_t max_interval);
public:
bool should_clear_reference(oop p, jlong timestamp_clock) final;
void setup() override = 0;
+
+ static constexpr uint64_t MaximumMaxInterval = integer_cast(max_jlong);
+ static_assert(UninitializedMaxInterval > MaximumMaxInterval, "Used to catch uninitialized _max_interval");
};
class LRUCurrentHeapPolicy : public AbstractLRUReferencePolicy {
diff --git a/src/hotspot/share/gc/shared/referenceProcessor.cpp b/src/hotspot/share/gc/shared/referenceProcessor.cpp
index b82db08ecbd..342fd16d588 100644
--- a/src/hotspot/share/gc/shared/referenceProcessor.cpp
+++ b/src/hotspot/share/gc/shared/referenceProcessor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,7 +60,7 @@ void ReferenceProcessor::init_statics() {
java_lang_ref_SoftReference::set_clock(_soft_ref_timestamp_clock);
_always_clear_soft_ref_policy = new AlwaysClearPolicy();
- if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) {
+ if (CompilerConfig::is_c2_enabled()) {
_default_soft_ref_policy = new LRUMaxHeapPolicy();
} else {
_default_soft_ref_policy = new LRUCurrentHeapPolicy();
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
index 59d7befb32d..fae0365719a 100644
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
@@ -35,6 +35,7 @@
#include "runtime/perfData.hpp"
#include "runtime/threadSMR.hpp"
#include "utilities/copy.hpp"
+#include "utilities/integerCast.hpp"
size_t ThreadLocalAllocBuffer::_max_size = 0;
unsigned int ThreadLocalAllocBuffer::_target_num_refills = 0;
@@ -75,43 +76,54 @@ size_t ThreadLocalAllocBuffer::remaining() {
}
void ThreadLocalAllocBuffer::accumulate_and_reset_statistics(ThreadLocalAllocStats* stats) {
- size_t capacity = Universe::heap()->tlab_capacity();
- size_t used = Universe::heap()->tlab_used();
-
_gc_waste += (unsigned)remaining();
- uint64_t total_allocated = thread()->allocated_bytes();
- uint64_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc;
- _allocated_before_last_gc = total_allocated;
+ const uint64_t allocated_bytes = thread()->allocated_bytes();
- print_stats("gc");
+ const size_t allocated_since_last_gc = integer_cast_permit_tautology(allocated_bytes - _allocated_before_last_gc);
+ _allocated_before_last_gc = allocated_bytes;
- if (_num_refills > 0) {
- // Update allocation history if a reasonable amount of eden was allocated.
- bool update_allocation_history = used > 0.5 * capacity;
-
- if (update_allocation_history) {
- // Average the fraction of eden allocated in a tlab by this
- // thread for use in the next resize operation.
- // _gc_waste is not subtracted because it's included in
- // "used".
- // The result can be larger than 1.0 due to direct to old allocations.
- // These allocations should ideally not be counted but since it is not possible
- // to filter them out here we just cap the fraction to be at most 1.0.
- // Keep alloc_frac as float and not double to avoid the double to float conversion
- float alloc_frac = MIN2(1.0f, allocated_since_last_gc / (float) used);
- _allocation_fraction.sample(alloc_frac);
+ if (allocated_since_last_gc > 0) {
+ const size_t tlab_capacity = Universe::heap()->tlab_capacity();
+ const size_t tlab_used = Universe::heap()->tlab_used();
+ if (tlab_used > 0.5 * tlab_capacity) {
+ // To avoid divide-by-zero
+ const size_t effective_tlab_capacity = MAX2(tlab_capacity, size_t(1));
+ const float alloc_frac = (float)allocated_since_last_gc / effective_tlab_capacity;
+ _allocation_fraction.sample(MIN2(alloc_frac, 1.0f));
}
-
- stats->update_fast_allocations(_num_refills,
- _allocated_size,
- _gc_waste,
- _refill_waste);
+ stats->update_current_thread_stats(_num_refills,
+ allocated_since_last_gc,
+ _allocated_size,
+ _gc_waste,
+ _refill_waste,
+ _num_slow_allocations);
} else {
- assert(_num_refills == 0 && _refill_waste == 0 && _gc_waste == 0,
+ assert(_num_refills == 0 && _refill_waste == 0
+ && _gc_waste == 0 && _num_slow_allocations == 0,
"tlab stats == 0");
}
- stats->update_num_slow_allocations(_num_slow_allocations);
+ {
+ Log(gc, tlab) log;
+ if (log.is_trace()) {
+ Thread* thrd = thread();
+ size_t waste = _gc_waste + _refill_waste;
+ double waste_percent = percent_of(waste, _allocated_size);
+ log.trace("TLAB GC: thread: " PTR_FORMAT " [id: %2d]"
+ " desired: %zuK"
+ " allocated: %zuK"
+ " slow allocs: %d refill waste: %zuB"
+ " refills: %d waste %4.1f%% gc: %dB"
+ " slow: %dB",
+ p2i(thrd), thrd->osthread()->thread_id(),
+ _desired_size*HeapWordSize/K,
+ allocated_since_last_gc/K,
+ _num_slow_allocations, _refill_waste_limit * HeapWordSize,
+ _num_refills, waste_percent,
+ _gc_waste * HeapWordSize,
+ _refill_waste * HeapWordSize);
+ }
+ }
reset_statistics();
}
@@ -147,20 +159,27 @@ void ThreadLocalAllocBuffer::record_refill_waste() {
}
void ThreadLocalAllocBuffer::resize() {
- // Compute the next tlab size using expected allocation amount
assert(ResizeTLAB, "Should not call this otherwise");
- size_t alloc = (size_t)(_allocation_fraction.average() *
- (Universe::heap()->tlab_capacity() / HeapWordSize));
+ size_t capacity_in_words = Universe::heap()->tlab_capacity() / HeapWordSize;
+ float alloc_fraction = _allocation_fraction.average();
+ if (alloc_fraction == 0.0) {
+ // No samples, use global alloc fraction as an approximation.
+ const float total_frac = ThreadLocalAllocStats::total_requested_size_fraction_avg();
+ const uint num_threads = ThreadLocalAllocStats::num_allocating_threads_avg();
+ alloc_fraction = total_frac / num_threads;
+ }
+ size_t alloc = (size_t)(alloc_fraction * capacity_in_words);
size_t new_size = alloc / _target_num_refills;
new_size = clamp(new_size, min_size(), max_size());
size_t aligned_new_size = align_object_size(new_size);
- log_trace(gc, tlab)("TLAB new size: thread: " PTR_FORMAT " [id: %2d]"
- " refills %d alloc: %8.6f desired_size: %zu -> %zu",
+ log_trace(gc, tlab)("TLAB resize: thread: " PTR_FORMAT " [id: %2d]"
+ " alloc-fraction: %.3f desired_size: %zuK -> %zuK",
p2i(thread()), thread()->osthread()->thread_id(),
- _target_num_refills, _allocation_fraction.average(), desired_size(), aligned_new_size);
+ alloc_fraction,
+ desired_size() * HeapWordSize/K, aligned_new_size * HeapWordSize/K);
set_desired_size(aligned_new_size);
set_refill_waste_limit(initial_refill_waste_limit());
@@ -179,11 +198,24 @@ void ThreadLocalAllocBuffer::fill(HeapWord* start,
size_t new_size) {
_num_refills++;
_allocated_size += new_size;
- print_stats("fill");
+
assert(top <= start + new_size - alignment_reserve(), "size too small");
initialize(start, top, start + new_size - alignment_reserve());
-
+ {
+ Log(gc, tlab) log;
+ if (log.is_trace()) {
+ Thread* thrd = thread();
+ log.trace("TLAB fill: thread: " PTR_FORMAT " [id: %2d]"
+ " capacity: %zuK"
+ " slow allocs: %d "
+ " refills: %d",
+ p2i(thrd), thrd->osthread()->thread_id(),
+ pointer_delta(_end, _start, sizeof(char)) / K,
+ _num_slow_allocations,
+ _num_refills);
+ }
+ }
// Reset amount of internal fragmentation
set_refill_waste_limit(initial_refill_waste_limit());
}
@@ -206,13 +238,6 @@ void ThreadLocalAllocBuffer::initialize() {
set_desired_size(initial_desired_size());
- size_t capacity = Universe::heap()->tlab_capacity() / HeapWordSize;
- if (capacity > 0) {
- // Keep alloc_frac as float and not double to avoid the double to float conversion
- float alloc_frac = desired_size() * target_num_refills() / (float)capacity;
- _allocation_fraction.sample(alloc_frac);
- }
-
set_refill_waste_limit(initial_refill_waste_limit());
reset_statistics();
@@ -243,11 +268,11 @@ size_t ThreadLocalAllocBuffer::initial_desired_size() {
if (TLABSize > 0) {
init_sz = TLABSize / HeapWordSize;
} else {
- // Initial size is a function of the average number of allocating threads.
- unsigned int num_threads = ThreadLocalAllocStats::num_allocating_threads_avg();
-
- init_sz = (Universe::heap()->tlab_capacity() / HeapWordSize) /
- (num_threads * target_num_refills());
+ const size_t predicted_total_requested_size = (size_t)(ThreadLocalAllocStats::total_requested_size_fraction_avg() * Universe::heap()->tlab_capacity());
+ const uint num_threads = ThreadLocalAllocStats::num_allocating_threads_avg();
+ const size_t per_thread_requested_size = predicted_total_requested_size / num_threads;
+ const size_t tlab_size = per_thread_requested_size / _target_num_refills;
+ init_sz = tlab_size / HeapWordSize;
init_sz = align_object_size(init_sz);
}
// We can't use clamp() between min_size() and max_size() here because some
@@ -258,32 +283,7 @@ size_t ThreadLocalAllocBuffer::initial_desired_size() {
return init_sz;
}
-void ThreadLocalAllocBuffer::print_stats(const char* tag) {
- Log(gc, tlab) log;
- if (!log.is_trace()) {
- return;
- }
-
- Thread* thrd = thread();
- size_t waste = _gc_waste + _refill_waste;
- double waste_percent = percent_of(waste, _allocated_size);
- size_t tlab_used = Universe::heap()->tlab_used();
- log.trace("TLAB: %s thread: " PTR_FORMAT " [id: %2d]"
- " desired_size: %zuKB"
- " slow allocs: %d refill waste: %zuB"
- " alloc:%8.5f %8.0fKB refills: %d waste %4.1f%% gc: %dB"
- " slow: %dB",
- tag, p2i(thrd), thrd->osthread()->thread_id(),
- _desired_size / (K / HeapWordSize),
- _num_slow_allocations, _refill_waste_limit * HeapWordSize,
- _allocation_fraction.average(),
- _allocation_fraction.average() * tlab_used / K,
- _num_refills, waste_percent,
- _gc_waste * HeapWordSize,
- _refill_waste * HeapWordSize);
-}
-
-Thread* ThreadLocalAllocBuffer::thread() {
+Thread* ThreadLocalAllocBuffer::thread() const {
return (Thread*)(((char*)this) + in_bytes(start_offset()) - in_bytes(Thread::tlab_start_offset()));
}
@@ -314,6 +314,7 @@ PerfVariable* ThreadLocalAllocStats::_perf_max_refill_waste;
PerfVariable* ThreadLocalAllocStats::_perf_total_num_slow_allocations;
PerfVariable* ThreadLocalAllocStats::_perf_max_num_slow_allocations;
AdaptiveWeightedAverage ThreadLocalAllocStats::_num_allocating_threads_avg(0);
+AdaptiveWeightedAverage ThreadLocalAllocStats::_total_requested_size_fraction(0);
static PerfVariable* create_perf_variable(const char* name, PerfData::Units unit, TRAPS) {
ResourceMark rm;
@@ -324,6 +325,9 @@ void ThreadLocalAllocStats::initialize() {
_num_allocating_threads_avg = AdaptiveWeightedAverage(TLABAllocationWeight);
_num_allocating_threads_avg.sample(1); // One allocating thread at startup
+ _total_requested_size_fraction = AdaptiveWeightedAverage(TLABAllocationWeight);
+ _total_requested_size_fraction.sample(0.10f); // 10%
+
if (UsePerfData) {
EXCEPTION_MARK;
_perf_num_allocating_threads = create_perf_variable("allocThreads", PerfData::U_None, CHECK);
@@ -344,6 +348,7 @@ ThreadLocalAllocStats::ThreadLocalAllocStats() :
_total_num_refills(0),
_max_num_refills(0),
_total_allocated_size(0),
+ _total_requested_bytes(0),
_total_gc_waste(0),
_max_gc_waste(0),
_total_refill_waste(0),
@@ -355,21 +360,25 @@ unsigned int ThreadLocalAllocStats::num_allocating_threads_avg() {
return MAX2((unsigned int)(_num_allocating_threads_avg.average() + 0.5), 1U);
}
-void ThreadLocalAllocStats::update_fast_allocations(unsigned int num_refills,
- size_t allocated_size,
- size_t gc_waste,
- size_t refill_waste) {
- _num_allocating_threads += 1;
- _total_num_refills += num_refills;
- _max_num_refills = MAX2(_max_num_refills, num_refills);
- _total_allocated_size += allocated_size;
- _total_gc_waste += gc_waste;
- _max_gc_waste = MAX2(_max_gc_waste, gc_waste);
- _total_refill_waste += refill_waste;
- _max_refill_waste = MAX2(_max_refill_waste, refill_waste);
+float ThreadLocalAllocStats::total_requested_size_fraction_avg() {
+ return _total_requested_size_fraction.average();
}
-void ThreadLocalAllocStats::update_num_slow_allocations(unsigned int num_slow_allocations) {
+void ThreadLocalAllocStats::update_current_thread_stats(unsigned int num_refills,
+ size_t requested_bytes,
+ size_t alloc_size_for_tlab,
+ size_t gc_waste,
+ size_t refill_waste,
+ unsigned int num_slow_allocations) {
+ _num_allocating_threads += 1;
+ _total_num_refills += num_refills;
+ _max_num_refills = MAX2(_max_num_refills, num_refills);
+ _total_allocated_size += alloc_size_for_tlab;
+ _total_requested_bytes += requested_bytes;
+ _total_gc_waste += gc_waste;
+ _max_gc_waste = MAX2(_max_gc_waste, gc_waste);
+ _total_refill_waste += refill_waste;
+ _max_refill_waste = MAX2(_max_refill_waste, refill_waste);
_total_num_slow_allocations += num_slow_allocations;
_max_num_slow_allocations = MAX2(_max_num_slow_allocations, num_slow_allocations);
}
@@ -379,6 +388,7 @@ void ThreadLocalAllocStats::update(const ThreadLocalAllocStats& other) {
_total_num_refills += other._total_num_refills;
_max_num_refills = MAX2(_max_num_refills, other._max_num_refills);
_total_allocated_size += other._total_allocated_size;
+ _total_requested_bytes += other._total_requested_bytes;
_total_gc_waste += other._total_gc_waste;
_max_gc_waste = MAX2(_max_gc_waste, other._max_gc_waste);
_total_refill_waste += other._total_refill_waste;
@@ -392,6 +402,7 @@ void ThreadLocalAllocStats::reset() {
_total_num_refills = 0;
_max_num_refills = 0;
_total_allocated_size = 0;
+ _total_requested_bytes = 0;
_total_gc_waste = 0;
_max_gc_waste = 0;
_total_refill_waste = 0;
@@ -401,22 +412,37 @@ void ThreadLocalAllocStats::reset() {
}
void ThreadLocalAllocStats::publish() {
- if (_total_allocated_size == 0) {
+ if (_total_requested_bytes == 0) {
return;
}
_num_allocating_threads_avg.sample(_num_allocating_threads);
+ {
+ const size_t tlab_capacity = Universe::heap()->tlab_capacity();
+ const size_t tlab_used = Universe::heap()->tlab_used();
+ if (tlab_used > 0.5 * tlab_capacity) {
+ // To avoid divide-by-zero
+ const size_t effective_tlab_capacity = MAX2(tlab_capacity, size_t(1));
+ const float requested_size_fraction = (float)_total_requested_bytes / effective_tlab_capacity;
+ _total_requested_size_fraction.sample(MIN2(requested_size_fraction, 1.0f));
+ }
+ }
+
const size_t waste = _total_gc_waste + _total_refill_waste;
const double waste_percent = percent_of(waste, _total_allocated_size);
- log_debug(gc, tlab)("TLAB totals: thrds: %d refills: %d max: %d"
- " slow allocs: %d max %d waste: %4.1f%%"
- " gc: %zuB max: %zuB"
- " slow: %zuB max: %zuB",
- _num_allocating_threads, _total_num_refills, _max_num_refills,
+
+ const double gc_waste_pct = percent_of(_total_gc_waste, _total_allocated_size);
+ const double refill_waste_pct = percent_of(_total_refill_waste, _total_allocated_size);
+
+ log_debug(gc, tlab)("TLAB totals: thrds: %d alloc-frac: %.1f%% refills: %d max: %d"
+ " slow allocs: %d max %d waste: %.1f%%"
+ " gc: %zuB(%.1f%%) max: %zuB"
+ " refill: %zuB(%.1f%%) max: %zuB",
+ _num_allocating_threads, _total_requested_size_fraction.average() * 100, _total_num_refills, _max_num_refills,
_total_num_slow_allocations, _max_num_slow_allocations, waste_percent,
- _total_gc_waste * HeapWordSize, _max_gc_waste * HeapWordSize,
- _total_refill_waste * HeapWordSize, _max_refill_waste * HeapWordSize);
+ _total_gc_waste * HeapWordSize, gc_waste_pct, _max_gc_waste * HeapWordSize,
+ _total_refill_waste * HeapWordSize, refill_waste_pct, _max_refill_waste * HeapWordSize);
if (UsePerfData) {
_perf_num_allocating_threads ->set_value(_num_allocating_threads);
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
index 67bc149013e..f69ceb00b52 100644
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
@@ -44,7 +44,6 @@ class ThreadLocalAllocStats;
// trip the return and sample the allocation.
class ThreadLocalAllocBuffer: public CHeapObj {
friend class VMStructs;
- friend class JVMCIVMStructs;
private:
HeapWord* _start; // address of TLAB
HeapWord* _top; // address after last allocation
@@ -53,6 +52,10 @@ private:
HeapWord* _allocation_end; // end for allocations (actual TLAB end, excluding alignment_reserve)
size_t _desired_size; // desired size (including alignment_reserve)
+
+ // If too many slow allocations (outside TLAB) happen, we increase
+ // _refill_waste_limit. This reduces outside-TLAB allocations at
+ // the expense of wasting more memory, i.e., the TLAB is discarded sooner.
size_t _refill_waste_limit; // hold onto tlab if free() is larger than this
uint64_t _allocated_before_last_gc; // total bytes allocated up until the last gc
@@ -60,12 +63,24 @@ private:
static unsigned _target_num_refills; // expected number of refills between GCs
unsigned _num_refills;
+ // TLAB retirement is invoked in two main contexts:
+ // 1. TLAB refill:
+ // The current TLAB is insufficient to satisfy a pending allocation
+ // request, triggering a refill. The remaining space in the current
+ // TLAB is treated as waste and tracked in _refill_waste.
+ // 2. Before GC:
+ // Invoked at the start of a GC cycle to ensure heap parsability.
+ // The unused space in the current TLAB is treated as waste and
+ // tracked in _gc_waste.
unsigned _refill_waste;
unsigned _gc_waste;
unsigned _num_slow_allocations;
+
+ // Allocated size for filling TLAB in HeapWords
size_t _allocated_size;
- AdaptiveWeightedAverage _allocation_fraction; // fraction of eden allocated in tlabs
+ // Fraction of eden allocated by this thread, used for sizing its TLAB.
+ AdaptiveWeightedAverage _allocation_fraction;
void reset_statistics();
@@ -78,8 +93,6 @@ private:
void set_refill_waste_limit(size_t waste) { _refill_waste_limit = waste; }
size_t initial_refill_waste_limit();
-
- static int target_num_refills() { return _target_num_refills; }
size_t initial_desired_size();
size_t remaining();
@@ -92,9 +105,7 @@ private:
void accumulate_and_reset_statistics(ThreadLocalAllocStats* stats);
- void print_stats(const char* tag);
-
- Thread* thread();
+ Thread* thread() const;
// statistics
@@ -191,11 +202,13 @@ private:
static PerfVariable* _perf_max_num_slow_allocations;
static AdaptiveWeightedAverage _num_allocating_threads_avg;
+ static AdaptiveWeightedAverage _total_requested_size_fraction;
unsigned int _num_allocating_threads;
unsigned int _total_num_refills;
unsigned int _max_num_refills;
size_t _total_allocated_size;
+ size_t _total_requested_bytes;
size_t _total_gc_waste;
size_t _max_gc_waste;
size_t _total_refill_waste;
@@ -206,14 +219,16 @@ private:
public:
static void initialize();
static unsigned int num_allocating_threads_avg();
+ static float total_requested_size_fraction_avg();
ThreadLocalAllocStats();
- void update_fast_allocations(unsigned int num_refills,
- size_t allocated_size,
- size_t gc_waste,
- size_t refill_waste);
- void update_num_slow_allocations(unsigned int num_slow_allocations);
+ void update_current_thread_stats(unsigned int num_refills,
+ size_t requested_bytes,
+ size_t alloc_size_for_tlab,
+ size_t gc_waste,
+ size_t refill_waste,
+ unsigned int num_slow_allocations);
void update(const ThreadLocalAllocStats& other);
void reset();
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.inline.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.inline.hpp
index 727467f98d0..ee1cca56b9e 100644
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.inline.hpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.inline.hpp
@@ -52,10 +52,22 @@ inline HeapWord* ThreadLocalAllocBuffer::allocate(size_t size) {
}
inline size_t ThreadLocalAllocBuffer::compute_size(size_t obj_size) {
- // Compute the size for the new TLAB.
- // The "last" tlab may be smaller to reduce fragmentation.
const size_t available_size = Universe::heap()->unsafe_max_tlab_alloc() / HeapWordSize;
- size_t new_tlab_size = MIN3(available_size, desired_size() + align_object_size(obj_size), max_size());
+ size_t scaled_desired_size = desired_size();
+ if (ResizeTLAB) {
+ // Extra boost if too many refills; 16X at most.
+ if (_num_refills > _target_num_refills) {
+ const uint excess = _num_refills - _target_num_refills;
+ const uint steps = MIN2(excess / 8, 4U);
+ // Cap before shifting to avoid overflow.
+ if (scaled_desired_size > (max_size() >> steps)) {
+ scaled_desired_size = max_size();
+ } else {
+ scaled_desired_size <<= steps;
+ }
+ }
+ }
+ size_t new_tlab_size = MIN3(available_size, scaled_desired_size + align_object_size(obj_size), max_size());
// Make sure there's enough room for object and filler int[].
if (new_tlab_size < compute_min_size(obj_size)) {
diff --git a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
index ad1dca47503..637ed6e6407 100644
--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
+++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
@@ -133,7 +133,6 @@ LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, L
addr = ensure_in_register(gen, addr, T_ADDRESS);
assert(addr->is_register(), "must be a register at this point");
LIR_Opr result = gen->result_register_for(obj->value_type());
- __ move(obj, result);
LIR_Opr tmp1 = gen->new_register(T_ADDRESS);
LIR_Opr tmp2 = gen->new_register(T_ADDRESS);
@@ -164,6 +163,11 @@ LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, L
CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2, decorators);
__ branch(lir_cond_notEqual, slow);
+
+ // No barrier is needed, move obj to result now.
+ __ move(obj, result);
+
+ // Slow-path re-enters here with result set.
__ branch_destination(slow->continuation());
return result;
@@ -199,7 +203,7 @@ void ShenandoahBarrierSetC1::store_at_resolved(LIRAccess& access, LIR_Opr value)
bool precise = is_array || on_anonymous;
LIR_Opr post_addr = precise ? access.resolved_addr() : access.base().opr();
- post_barrier(access, post_addr, value);
+ post_barrier(access, post_addr);
}
}
@@ -314,7 +318,7 @@ bool ShenandoahBarrierSetC1::generate_c1_runtime_stubs(BufferBlob* buffer_blob)
return true;
}
-void ShenandoahBarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val) {
+void ShenandoahBarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr) {
assert(ShenandoahCardBarrier, "Should have been checked by caller");
DecoratorSet decorators = access.decorators();
@@ -368,3 +372,71 @@ void ShenandoahBarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_O
__ move(dirty, card_addr);
}
}
+
+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
+ if (!access.is_oop()) {
+ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
+ }
+
+ LIRGenerator* gen = access.gen();
+
+ LIR_Opr tmp = gen->new_register(T_OBJECT);
+ LIR_Opr addr = access.resolved_addr();
+
+ // Handle the previous value through SATB, as we are about to perform the store.
+ __ load(addr->as_address_ptr(), tmp);
+ if (ShenandoahSATBBarrier) {
+ pre_barrier(gen, access.access_emit_info(), access.decorators(),
+ /* addr_opr (unused) = */ LIR_OprFact::illegalOpr,
+ /* pre_val = */ tmp);
+ }
+
+ // Perform LRB on location to fix it up for this and all following accesses.
+ // This guarantees there are no false negatives due to concurrent evacuation,
+ // and the value loaded later by CAS is sanitized by some LRB, or is null.
+ if (ShenandoahLoadRefBarrier) {
+ load_reference_barrier(gen, /* obj = */ tmp, /* addr = */ addr, access.decorators());
+ }
+
+ LIR_Opr result = BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
+
+ if (ShenandoahCardBarrier) {
+ post_barrier(access, /* addr = */ addr);
+ }
+
+ return result;
+}
+
+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
+ if (!access.is_oop()) {
+ return BarrierSetC1::atomic_xchg_at_resolved(access, value);
+ }
+
+ LIRGenerator* gen = access.gen();
+
+ LIR_Opr tmp = gen->new_register(T_OBJECT);
+ LIR_Opr addr = access.resolved_addr();
+
+ // Handle the previous value through SATB, as we are about to perform the store.
+ __ load(addr->as_address_ptr(), tmp);
+ if (ShenandoahSATBBarrier) {
+ pre_barrier(gen, access.access_emit_info(), access.decorators(),
+ /* addr_opr (unused) = */ LIR_OprFact::illegalOpr,
+ /* pre_val = */ tmp);
+ }
+
+ // Perform LRB on location to fix it up for this and all following accesses.
+ // This is purely opportunistic: we would not have any false negatives here.
+ // This guarantees the value loaded later by XCHG is sanitized by some LRB, or is null.
+ if (ShenandoahLoadRefBarrier) {
+ load_reference_barrier(gen, /* obj = */ tmp, /* addr = */ addr, access.decorators());
+ }
+
+ LIR_Opr result = BarrierSetC1::atomic_xchg_at_resolved(access, value);
+
+ if (ShenandoahCardBarrier) {
+ post_barrier(access, /* addr = */ addr);
+ }
+
+ return result;
+}
diff --git a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp
index 1b4f2c79bd2..413777a61ee 100644
--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp
+++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp
@@ -127,6 +127,7 @@ public:
visitor->do_input(_addr);
visitor->do_temp(_addr);
visitor->do_temp(_result);
+ visitor->do_output(_result);
visitor->do_temp(_tmp1);
visitor->do_temp(_tmp2);
}
@@ -135,61 +136,6 @@ public:
#endif // PRODUCT
};
-class LIR_OpShenandoahCompareAndSwap : public LIR_Op {
- friend class LIR_OpVisitState;
-
-private:
- LIR_Opr _addr;
- LIR_Opr _cmp_value;
- LIR_Opr _new_value;
- LIR_Opr _tmp1;
- LIR_Opr _tmp2;
-
-public:
- LIR_OpShenandoahCompareAndSwap(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
- LIR_Opr t1, LIR_Opr t2, LIR_Opr result)
- : LIR_Op(lir_none, result, nullptr) // no info
- , _addr(addr)
- , _cmp_value(cmp_value)
- , _new_value(new_value)
- , _tmp1(t1)
- , _tmp2(t2) { }
-
- LIR_Opr addr() const { return _addr; }
- LIR_Opr cmp_value() const { return _cmp_value; }
- LIR_Opr new_value() const { return _new_value; }
- LIR_Opr tmp1() const { return _tmp1; }
- LIR_Opr tmp2() const { return _tmp2; }
-
- virtual void visit(LIR_OpVisitState* state) {
- if (_info) state->do_info(_info);
- assert(_addr->is_valid(), "used"); state->do_input(_addr);
- state->do_temp(_addr);
- assert(_cmp_value->is_valid(), "used"); state->do_input(_cmp_value);
- state->do_temp(_cmp_value);
- assert(_new_value->is_valid(), "used"); state->do_input(_new_value);
- state->do_temp(_new_value);
- if (_tmp1->is_valid()) state->do_temp(_tmp1);
- if (_tmp2->is_valid()) state->do_temp(_tmp2);
- if (_result->is_valid()) state->do_output(_result);
- }
-
- virtual void emit_code(LIR_Assembler* masm);
-
- virtual void print_instr(outputStream* out) const {
- addr()->print(out); out->print(" ");
- cmp_value()->print(out); out->print(" ");
- new_value()->print(out); out->print(" ");
- tmp1()->print(out); out->print(" ");
- tmp2()->print(out); out->print(" ");
- }
-#ifndef PRODUCT
- virtual const char* name() const {
- return "shenandoah_cas_obj";
- }
-#endif // PRODUCT
-};
-
class ShenandoahBarrierSetC1 : public BarrierSetC1 {
private:
CodeBlob* _pre_barrier_c1_runtime_code_blob;
@@ -244,7 +190,7 @@ protected:
virtual LIR_Opr atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value);
- void post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val);
+ void post_barrier(LIRAccess& access, LIR_Opr addr);
public:
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
index 4fcc90d7bde..66032944251 100644
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
@@ -23,13 +23,10 @@
*
*/
-#include "classfile/javaClasses.hpp"
+#include "classfile/javaClasses.inline.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
-#include "gc/shenandoah/c2/shenandoahSupport.hpp"
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "gc/shenandoah/shenandoahCardTable.hpp"
#include "gc/shenandoah/shenandoahForwarding.hpp"
#include "gc/shenandoah/shenandoahHeap.hpp"
#include "gc/shenandoah/shenandoahRuntime.hpp"
@@ -39,8 +36,8 @@
#include "opto/graphKit.hpp"
#include "opto/idealKit.hpp"
#include "opto/macro.hpp"
-#include "opto/movenode.hpp"
#include "opto/narrowptrnode.hpp"
+#include "opto/output.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
@@ -48,812 +45,483 @@ ShenandoahBarrierSetC2* ShenandoahBarrierSetC2::bsc2() {
return reinterpret_cast(BarrierSet::barrier_set()->barrier_set_c2());
}
-ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena)
- : _load_reference_barriers(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)) {
+ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena) :
+ BarrierSetC2State(comp_arena),
+ _stubs(new (comp_arena) GrowableArray(comp_arena, 8, 0, nullptr)),
+ _trampoline_stubs_count(0),
+ _stubs_start_offset(0),
+ _stubs_current_total_size(0) {
}
-int ShenandoahBarrierSetC2State::load_reference_barriers_count() const {
- return _load_reference_barriers->length();
-}
-
-ShenandoahLoadReferenceBarrierNode* ShenandoahBarrierSetC2State::load_reference_barrier(int idx) const {
- return _load_reference_barriers->at(idx);
-}
-
-void ShenandoahBarrierSetC2State::add_load_reference_barrier(ShenandoahLoadReferenceBarrierNode * n) {
- assert(!_load_reference_barriers->contains(n), "duplicate entry in barrier list");
- _load_reference_barriers->append(n);
-}
-
-void ShenandoahBarrierSetC2State::remove_load_reference_barrier(ShenandoahLoadReferenceBarrierNode * n) {
- if (_load_reference_barriers->contains(n)) {
- _load_reference_barriers->remove(n);
- }
-}
-
-#define __ kit->
-
-bool ShenandoahBarrierSetC2::satb_can_remove_pre_barrier(GraphKit* kit, PhaseGVN* phase, Node* adr,
- BasicType bt, uint adr_idx) const {
- intptr_t offset = 0;
- Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
- AllocateNode* alloc = AllocateNode::Ideal_allocation(base);
-
- if (offset == Type::OffsetBot) {
- return false; // cannot unalias unless there are precise offsets
- }
-
- if (alloc == nullptr) {
- return false; // No allocation found
- }
-
- intptr_t size_in_bytes = type2aelembytes(bt);
-
- Node* mem = __ memory(adr_idx); // start searching here...
-
- for (int cnt = 0; cnt < 50; cnt++) {
-
- if (mem->is_Store()) {
-
- Node* st_adr = mem->in(MemNode::Address);
- intptr_t st_offset = 0;
- Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
-
- if (st_base == nullptr) {
- break; // inscrutable pointer
- }
-
- // Break we have found a store with same base and offset as ours so break
- if (st_base == base && st_offset == offset) {
- break;
- }
-
- if (st_offset != offset && st_offset != Type::OffsetBot) {
- const int MAX_STORE = BytesPerLong;
- if (st_offset >= offset + size_in_bytes ||
- st_offset <= offset - MAX_STORE ||
- st_offset <= offset - mem->as_Store()->memory_size()) {
- // Success: The offsets are provably independent.
- // (You may ask, why not just test st_offset != offset and be done?
- // The answer is that stores of different sizes can co-exist
- // in the same sequence of RawMem effects. We sometimes initialize
- // a whole 'tile' of array elements with a single jint or jlong.)
- mem = mem->in(MemNode::Memory);
- continue; // advance through independent store memory
- }
- }
-
- if (st_base != base
- && MemNode::detect_ptr_independence(base, alloc, st_base,
- AllocateNode::Ideal_allocation(st_base),
- phase)) {
- // Success: The bases are provably independent.
- mem = mem->in(MemNode::Memory);
- continue; // advance through independent store memory
- }
- } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
- InitializeNode* st_init = mem->in(0)->as_Initialize();
- AllocateNode* st_alloc = st_init->allocation();
-
- // Make sure that we are looking at the same allocation site.
- // The alloc variable is guaranteed to not be null here from earlier check.
- if (alloc == st_alloc) {
- // Check that the initialization is storing null so that no previous store
- // has been moved up and directly write a reference
- Node* captured_store = st_init->find_captured_store(offset,
- type2aelembytes(T_OBJECT),
- phase);
- if (captured_store == nullptr || captured_store == st_init->zero_memory()) {
- return true;
- }
- }
- }
-
- // Unless there is an explicit 'continue', we must bail out here,
- // because 'mem' is an inscrutable memory state (e.g., a call).
- break;
- }
-
- return false;
-}
-
-#undef __
-#define __ ideal.
-
-void ShenandoahBarrierSetC2::satb_write_barrier_pre(GraphKit* kit,
- bool do_load,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const {
- // Some sanity checks
- // Note: val is unused in this routine.
-
- if (do_load) {
- // We need to generate the load of the previous value
- assert(adr != nullptr, "where are loading from?");
- assert(pre_val == nullptr, "loaded already?");
- assert(val_type != nullptr, "need a type");
-
- if (ReduceInitialCardMarks
- && satb_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) {
- return;
- }
-
- } else {
- // In this case both val_type and alias_idx are unused.
- assert(pre_val != nullptr, "must be loaded already");
- // Nothing to be done if pre_val is null.
- if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
- assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
- }
- assert(bt == T_OBJECT, "or we shouldn't be here");
-
- IdealKit ideal(kit, true);
-
- Node* tls = __ thread(); // ThreadLocalStorage
-
- Node* no_base = __ top();
- Node* zero = __ ConI(0);
- Node* zeroX = __ ConX(0);
-
- float likely = PROB_LIKELY(0.999);
- float unlikely = PROB_UNLIKELY(0.999);
-
- // Offsets into the thread
- const int index_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset());
- const int buffer_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset());
-
- // Now the actual pointers into the thread
- Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
- Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset));
-
- // Now some of the values
- Node* marking;
- Node* gc_state = __ AddP(no_base, tls, __ ConX(in_bytes(ShenandoahThreadLocalData::gc_state_offset())));
- Node* ld = __ load(__ ctrl(), gc_state, TypeInt::BYTE, T_BYTE, Compile::AliasIdxRaw);
- marking = __ AndI(ld, __ ConI(ShenandoahHeap::MARKING));
- assert(ShenandoahBarrierC2Support::is_gc_state_load(ld), "Should match the shape");
-
- // if (!marking)
- __ if_then(marking, BoolTest::ne, zero, unlikely); {
- BasicType index_bt = TypeX_X->basic_type();
- assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading Shenandoah SATBMarkQueue::_index with wrong size.");
- Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
-
- if (do_load) {
- // load original value
- // alias_idx correct??
- pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx);
- }
-
- // if (pre_val != nullptr)
- __ if_then(pre_val, BoolTest::ne, kit->null()); {
- Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
- // is the queue for this thread full?
- __ if_then(index, BoolTest::ne, zeroX, likely); {
-
- // decrement the index
- Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
-
- // Now get the buffer location we will log the previous value into and store it
- Node *log_addr = __ AddP(no_base, buffer, next_index);
- __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
- // update the index
- __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
-
- } __ else_(); {
-
- // logging buffer is full, call the runtime
- const TypeFunc *tf = ShenandoahBarrierSetC2::write_barrier_pre_Type();
- __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), "shenandoah_wb_pre", pre_val);
- } __ end_if(); // (!index)
- } __ end_if(); // (pre_val != nullptr)
- } __ end_if(); // (!marking)
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
-
- if (ShenandoahSATBBarrier && adr != nullptr) {
- Node* c = kit->control();
- Node* call = c->in(1)->in(1)->in(1)->in(0);
- assert(is_shenandoah_wb_pre_call(call), "shenandoah_wb_pre call expected");
- call->add_req(adr);
- }
-}
-
-bool ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(Node* call) {
- return call->is_CallLeaf() &&
- call->as_CallLeaf()->entry_point() == CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre);
-}
-
-bool ShenandoahBarrierSetC2::is_shenandoah_clone_call(Node* call) {
- return call->is_CallLeaf() &&
- call->as_CallLeaf()->entry_point() == CAST_FROM_FN_PTR(address, ShenandoahRuntime::clone_barrier);
-}
-
-bool ShenandoahBarrierSetC2::is_shenandoah_lrb_call(Node* call) {
- if (!call->is_CallLeaf()) {
- return false;
- }
-
- address entry_point = call->as_CallLeaf()->entry_point();
- return (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)) ||
- (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)) ||
- (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)) ||
- (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)) ||
- (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)) ||
- (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow));
-}
-
-bool ShenandoahBarrierSetC2::is_shenandoah_marking_if(PhaseValues* phase, Node* n) {
- if (n->Opcode() != Op_If) {
- return false;
- }
-
- Node* bol = n->in(1);
- assert(bol->is_Bool(), "");
- Node* cmpx = bol->in(1);
- if (bol->as_Bool()->_test._test == BoolTest::ne &&
- cmpx->is_Cmp() && cmpx->in(2) == phase->intcon(0) &&
- is_shenandoah_state_load(cmpx->in(1)->in(1)) &&
- cmpx->in(1)->in(2)->is_Con() &&
- cmpx->in(1)->in(2) == phase->intcon(ShenandoahHeap::MARKING)) {
- return true;
- }
-
- return false;
-}
-
-bool ShenandoahBarrierSetC2::is_shenandoah_state_load(Node* n) {
- if (!n->is_Load()) return false;
- const int state_offset = in_bytes(ShenandoahThreadLocalData::gc_state_offset());
- return n->in(2)->is_AddP() && n->in(2)->in(2)->Opcode() == Op_ThreadLocal
- && n->in(2)->in(3)->is_Con()
- && n->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == state_offset;
-}
-
-void ShenandoahBarrierSetC2::shenandoah_write_barrier_pre(GraphKit* kit,
- bool do_load,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const {
- if (ShenandoahSATBBarrier) {
- IdealKit ideal(kit);
- kit->sync_kit(ideal);
-
- satb_write_barrier_pre(kit, do_load, obj, adr, alias_idx, val, val_type, pre_val, bt);
-
- ideal.sync_kit(kit);
- kit->final_sync(ideal);
- }
-}
-
-// Helper that guards and inserts a pre-barrier.
-void ShenandoahBarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
- Node* pre_val, bool need_mem_bar) const {
- // We could be accessing the referent field of a reference object. If so, when Shenandoah
- // is enabled, we need to log the value in the referent field in an SATB buffer.
- // This routine performs some compile time filters and generates suitable
- // runtime filters that guard the pre-barrier code.
- // Also add memory barrier for non volatile load from the referent field
- // to prevent commoning of loads across safepoint.
-
- // Some compile time checks.
-
- // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
- const TypeX* otype = offset->find_intptr_t_type();
- if (otype != nullptr && otype->is_con() &&
- otype->get_con() != java_lang_ref_Reference::referent_offset()) {
- // Constant offset but not the reference_offset so just return
- return;
- }
-
- // We only need to generate the runtime guards for instances.
- const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
- if (btype != nullptr) {
- if (btype->isa_aryptr()) {
- // Array type so nothing to do
- return;
- }
-
- const TypeInstPtr* itype = btype->isa_instptr();
- if (itype != nullptr) {
- // Can the klass of base_oop be statically determined to be
- // _not_ a sub-class of Reference and _not_ Object?
- ciKlass* klass = itype->instance_klass();
- if (klass->is_loaded() &&
- !klass->is_subtype_of(kit->env()->Reference_klass()) &&
- !kit->env()->Object_klass()->is_subtype_of(klass)) {
- return;
- }
- }
- }
-
- // The compile time filters did not reject base_oop/offset so
- // we need to generate the following runtime filters
- //
- // if (offset == java_lang_ref_Reference::_reference_offset) {
- // if (instance_of(base, java.lang.ref.Reference)) {
- // pre_barrier(_, pre_val, ...);
- // }
- // }
-
- float likely = PROB_LIKELY( 0.999);
- float unlikely = PROB_UNLIKELY(0.999);
-
- IdealKit ideal(kit);
-
- Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset());
-
- __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
- // Update graphKit memory and control from IdealKit.
- kit->sync_kit(ideal);
-
- Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass()));
- Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con);
-
- // Update IdealKit memory and control from graphKit.
- __ sync_kit(kit);
-
- Node* one = __ ConI(1);
- // is_instof == 0 if base_oop == nullptr
- __ if_then(is_instof, BoolTest::eq, one, unlikely); {
-
- // Update graphKit from IdeakKit.
- kit->sync_kit(ideal);
-
- // Use the pre-barrier to record the value in the referent field
- satb_write_barrier_pre(kit, false /* do_load */,
- nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */,
- pre_val /* pre_val */,
- T_OBJECT);
- if (need_mem_bar) {
- // Add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change its value.
- kit->insert_mem_bar(Op_MemBarCPUOrder);
- }
- // Update IdealKit from graphKit.
- __ sync_kit(kit);
-
- } __ end_if(); // _ref_type != ref_none
- } __ end_if(); // offset == referent_offset
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
-}
-
-void ShenandoahBarrierSetC2::post_barrier(GraphKit* kit,
- Node* ctl,
- Node* oop_store,
- Node* obj,
- Node* adr,
- uint adr_idx,
- Node* val,
- BasicType bt,
- bool use_precise) const {
- assert(ShenandoahCardBarrier, "Should have been checked by caller");
-
- // No store check needed if we're storing a null.
- if (val != nullptr && val->is_Con()) {
- // must be either an oop or null
- const Type* t = val->bottom_type();
- if (t == TypePtr::NULL_PTR || t == Type::TOP)
- return;
- }
-
- if (ReduceInitialCardMarks && obj == kit->just_allocated_object(kit->control())) {
- // We use card marks to track old to young references in Generational Shenandoah;
- // see flag ShenandoahCardBarrier above.
- // Objects are always allocated in the young generation and initialized
- // before they are promoted. There's always a safepoint (e.g. at final mark)
- // before an object is promoted from young to old. Promotion entails dirtying of
- // the cards backing promoted objects, so they will be guaranteed to be scanned
- // at the next remembered set scan of the old generation.
- // Thus, we can safely skip card-marking of initializing stores on a
- // freshly-allocated object. If any of the assumptions above change in
- // the future, this code will need to be re-examined; see check in
- // ShenandoahCardBarrier::on_slowpath_allocation_exit().
- return;
- }
-
- if (!use_precise) {
- // All card marks for a (non-array) instance are in one place:
- adr = obj;
- }
- // (Else it's an array (or unknown), and we want more precise card marks.)
- assert(adr != nullptr, "");
-
- IdealKit ideal(kit, true);
-
- Node* tls = __ thread(); // ThreadLocalStorage
-
- // Convert the pointer to an int prior to doing math on it
- Node* cast = __ CastPX(__ ctrl(), adr);
-
- Node* curr_ct_holder_offset = __ ConX(in_bytes(ShenandoahThreadLocalData::card_table_offset()));
- Node* curr_ct_holder_addr = __ AddP(__ top(), tls, curr_ct_holder_offset);
- Node* curr_ct_base_addr = __ load( __ ctrl(), curr_ct_holder_addr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
- // Divide by card size
- Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift()) );
-
- // Combine card table base and card offset
- Node* card_adr = __ AddP(__ top(), curr_ct_base_addr, card_offset);
-
- // Get the alias_index for raw card-mark memory
- int adr_type = Compile::AliasIdxRaw;
- Node* zero = __ ConI(0); // Dirty card value
-
- if (UseCondCardMark) {
- // The classic GC reference write barrier is typically implemented
- // as a store into the global card mark table. Unfortunately
- // unconditional stores can result in false sharing and excessive
- // coherence traffic as well as false transactional aborts.
- // UseCondCardMark enables MP "polite" conditional card mark
- // stores. In theory we could relax the load from ctrl() to
- // no_ctrl, but that doesn't buy much latitude.
- Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type);
- __ if_then(card_val, BoolTest::ne, zero);
- }
-
- // Smash zero into card
- __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered);
-
- if (UseCondCardMark) {
- __ end_if();
- }
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
-}
-
-#undef __
-
-const TypeFunc* ShenandoahBarrierSetC2::_write_barrier_pre_Type = nullptr;
-const TypeFunc* ShenandoahBarrierSetC2::_clone_barrier_Type = nullptr;
-const TypeFunc* ShenandoahBarrierSetC2::_load_reference_barrier_Type = nullptr;
-
-inline const TypeFunc* ShenandoahBarrierSetC2::write_barrier_pre_Type() {
- assert(ShenandoahBarrierSetC2::_write_barrier_pre_Type != nullptr, "should be initialized");
- return ShenandoahBarrierSetC2::_write_barrier_pre_Type;
-}
-
-inline const TypeFunc* ShenandoahBarrierSetC2::clone_barrier_Type() {
- assert(ShenandoahBarrierSetC2::_clone_barrier_Type != nullptr, "should be initialized");
- return ShenandoahBarrierSetC2::_clone_barrier_Type;
-}
-
-const TypeFunc* ShenandoahBarrierSetC2::load_reference_barrier_Type() {
- assert(ShenandoahBarrierSetC2::_load_reference_barrier_Type != nullptr, "should be initialized");
- return ShenandoahBarrierSetC2::_load_reference_barrier_Type;
-}
-
-void ShenandoahBarrierSetC2::init() {
- ShenandoahBarrierSetC2::make_write_barrier_pre_Type();
- ShenandoahBarrierSetC2::make_clone_barrier_Type();
- ShenandoahBarrierSetC2::make_load_reference_barrier_Type();
-}
-
-void ShenandoahBarrierSetC2::make_write_barrier_pre_Type() {
- assert(ShenandoahBarrierSetC2::_write_barrier_pre_Type == nullptr, "should be");
- const Type **fields = TypeTuple::fields(1);
- fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(0);
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
-
- ShenandoahBarrierSetC2::_write_barrier_pre_Type = TypeFunc::make(domain, range);
-}
-
-void ShenandoahBarrierSetC2::make_clone_barrier_Type() {
- assert(ShenandoahBarrierSetC2::_clone_barrier_Type == nullptr, "should be");
- const Type **fields = TypeTuple::fields(1);
- fields[TypeFunc::Parms+0] = TypeOopPtr::NOTNULL; // src oop
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(0);
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
-
- ShenandoahBarrierSetC2::_clone_barrier_Type = TypeFunc::make(domain, range);
-}
-
-void ShenandoahBarrierSetC2::make_load_reference_barrier_Type() {
- assert(ShenandoahBarrierSetC2::_load_reference_barrier_Type == nullptr, "should be");
- const Type **fields = TypeTuple::fields(2);
- fields[TypeFunc::Parms+0] = TypeOopPtr::BOTTOM; // original field value
- fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // original load address
-
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(1);
- fields[TypeFunc::Parms+0] = TypeOopPtr::BOTTOM;
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
-
- ShenandoahBarrierSetC2::_load_reference_barrier_Type = TypeFunc::make(domain, range);
-}
-
-Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
- DecoratorSet decorators = access.decorators();
-
- const TypePtr* adr_type = access.addr().type();
- Node* adr = access.addr().node();
-
- bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
-
+static void set_barrier_data(C2Access& access, bool load, bool store) {
if (!access.is_oop()) {
- return BarrierSetC2::store_at_resolved(access, val);
+ return;
}
- if (no_keepalive) {
- // No keep-alive means no need for the pre-barrier.
- return BarrierSetC2::store_at_resolved(access, val);
+ DecoratorSet decorators = access.decorators();
+ bool tightly_coupled = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0;
+ bool in_heap = (decorators & IN_HEAP) != 0;
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+
+ if (tightly_coupled) {
+ access.set_barrier_data(ShenandoahBitElided);
+ return;
}
- if (access.is_parse_access()) {
- C2ParseAccess& parse_access = static_cast(access);
- GraphKit* kit = parse_access.kit();
+ uint8_t barrier_data = 0;
- uint adr_idx = kit->C->get_alias_index(adr_type);
- assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
- shenandoah_write_barrier_pre(kit, true /* do_load */, /*kit->control(),*/ access.base(), adr, adr_idx, val.node(),
- static_cast(val.type()), nullptr /* pre_val */, access.type());
-
- Node* result = BarrierSetC2::store_at_resolved(access, val);
-
- if (ShenandoahCardBarrier) {
- const bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
- const bool is_array = (decorators & IS_ARRAY) != 0;
- const bool use_precise = is_array || anonymous;
- post_barrier(kit, kit->control(), access.raw_access(), access.base(),
- adr, adr_idx, val.node(), access.type(), use_precise);
+ if (load) {
+ if (ShenandoahLoadRefBarrier) {
+ if (on_phantom) {
+ barrier_data |= ShenandoahBitPhantom;
+ } else if (on_weak) {
+ barrier_data |= ShenandoahBitWeak;
+ } else {
+ barrier_data |= ShenandoahBitStrong;
+ }
}
- return result;
- } else {
- assert(access.is_opt_access(), "only for optimization passes");
- assert(((decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0 || !ShenandoahSATBBarrier) && (decorators & C2_ARRAY_COPY) != 0, "unexpected caller of this code");
- return BarrierSetC2::store_at_resolved(access, val);
}
+
+ if (store) {
+ if (ShenandoahSATBBarrier) {
+ barrier_data |= ShenandoahBitKeepAlive;
+ }
+ if (ShenandoahCardBarrier && in_heap) {
+ barrier_data |= ShenandoahBitCardMark;
+ }
+ }
+
+ if (!in_heap) {
+ barrier_data |= ShenandoahBitNative;
+ }
+
+ access.set_barrier_data(barrier_data);
}
Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
- // 1: non-reference load, no additional barrier is needed
+ // 1: Non-reference load, no additional barrier is needed
if (!access.is_oop()) {
return BarrierSetC2::load_at_resolved(access, val_type);
}
- Node* load = BarrierSetC2::load_at_resolved(access, val_type);
+ // 2. Set barrier data for load
+ set_barrier_data(access, /* load = */ true, /* store = */ false);
+
+ // 3. Correction: If we are reading the value of the referent field of
+ // a Reference object, we need to record the referent resurrection.
DecoratorSet decorators = access.decorators();
- BasicType type = access.type();
-
- // 2: apply LRB if needed
- if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
- load = new ShenandoahLoadReferenceBarrierNode(nullptr, load, decorators);
- if (access.is_parse_access()) {
- load = static_cast(access).kit()->gvn().transform(load);
- } else {
- load = static_cast(access).gvn().transform(load);
- }
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+ bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
+ bool needs_keepalive = ((on_weak || on_phantom) && !no_keepalive);
+ if (needs_keepalive) {
+ uint8_t barriers = access.barrier_data() | (ShenandoahSATBBarrier ? ShenandoahBitKeepAlive : 0);
+ access.set_barrier_data(barriers);
}
- // 3: apply keep-alive barrier for java.lang.ref.Reference if needed
- if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
- Node* top = Compile::current()->top();
- Node* adr = access.addr().node();
- Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : top;
- Node* obj = access.base();
+ return BarrierSetC2::load_at_resolved(access, val_type);
+}
- bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
- bool on_weak_ref = (decorators & (ON_WEAK_OOP_REF | ON_PHANTOM_OOP_REF)) != 0;
- bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0;
-
- // If we are reading the value of the referent field of a Reference
- // object (either by using Unsafe directly or through reflection)
- // then, if SATB is enabled, we need to record the referent in an
- // SATB log buffer using the pre-barrier mechanism.
- // Also we need to add memory barrier to prevent commoning reads
- // from this field across safepoint since GC can change its value.
- if (!on_weak_ref || (unknown && (offset == top || obj == top)) || !keep_alive) {
- return load;
- }
-
- assert(access.is_parse_access(), "entry not supported at optimization time");
- C2ParseAccess& parse_access = static_cast(access);
- GraphKit* kit = parse_access.kit();
- bool mismatched = (decorators & C2_MISMATCHED) != 0;
- bool is_unordered = (decorators & MO_UNORDERED) != 0;
- bool in_native = (decorators & IN_NATIVE) != 0;
- bool need_cpu_mem_bar = !is_unordered || mismatched || in_native;
-
- if (on_weak_ref) {
- // Use the pre-barrier to record the value in the referent field
- satb_write_barrier_pre(kit, false /* do_load */,
- nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */,
- load /* pre_val */, T_OBJECT);
- // Add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change its value.
- kit->insert_mem_bar(Op_MemBarCPUOrder);
- } else if (unknown) {
- // We do not require a mem bar inside pre_barrier if need_mem_bar
- // is set: the barriers would be emitted by us.
- insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar);
- }
+Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
+ // 1: Non-reference store, no additional barrier is needed
+ if (!access.is_oop()) {
+ return BarrierSetC2::store_at_resolved(access, val);
}
- return load;
+ // 2. Set barrier data for store
+ set_barrier_data(access, /* load = */ false, /* store = */ true);
+
+ // 3. Correction: avoid keep-alive barriers that should not do keep-alive.
+ DecoratorSet decorators = access.decorators();
+ bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
+ if (no_keepalive) {
+ access.set_barrier_data(access.barrier_data() & ~ShenandoahBitKeepAlive);
+ }
+
+ return BarrierSetC2::store_at_resolved(access, val);
}
Node* ShenandoahBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
Node* new_val, const Type* value_type) const {
- GraphKit* kit = access.kit();
- if (access.is_oop()) {
- shenandoah_write_barrier_pre(kit, false /* do_load */,
- nullptr, nullptr, max_juint, nullptr, nullptr,
- expected_val /* pre_val */, T_OBJECT);
-
- MemNode::MemOrd mo = access.mem_node_mo();
- Node* mem = access.memory();
- Node* adr = access.addr().node();
- const TypePtr* adr_type = access.addr().type();
- Node* load_store = nullptr;
-
-#ifdef _LP64
- if (adr->bottom_type()->is_ptr_to_narrowoop()) {
- Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
- Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
- if (ShenandoahCASBarrier) {
- load_store = kit->gvn().transform(new ShenandoahCompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
- } else {
- load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
- }
- } else
-#endif
- {
- if (ShenandoahCASBarrier) {
- load_store = kit->gvn().transform(new ShenandoahCompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo));
- } else {
- load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo));
- }
- }
-
- access.set_raw_access(load_store);
- pin_atomic_op(access);
-
-#ifdef _LP64
- if (adr->bottom_type()->is_ptr_to_narrowoop()) {
- load_store = kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
- }
-#endif
- load_store = kit->gvn().transform(new ShenandoahLoadReferenceBarrierNode(nullptr, load_store, access.decorators()));
- if (ShenandoahCardBarrier) {
- post_barrier(kit, kit->control(), access.raw_access(), access.base(),
- access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true);
- }
- return load_store;
- }
+ set_barrier_data(access, /* load = */ true, /* store = */ true);
return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
}
Node* ShenandoahBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
Node* new_val, const Type* value_type) const {
- GraphKit* kit = access.kit();
- if (access.is_oop()) {
- shenandoah_write_barrier_pre(kit, false /* do_load */,
- nullptr, nullptr, max_juint, nullptr, nullptr,
- expected_val /* pre_val */, T_OBJECT);
- DecoratorSet decorators = access.decorators();
- MemNode::MemOrd mo = access.mem_node_mo();
- Node* mem = access.memory();
- bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
- Node* load_store = nullptr;
- Node* adr = access.addr().node();
-#ifdef _LP64
- if (adr->bottom_type()->is_ptr_to_narrowoop()) {
- Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
- Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
- if (ShenandoahCASBarrier) {
- if (is_weak_cas) {
- load_store = kit->gvn().transform(new ShenandoahWeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
- } else {
- load_store = kit->gvn().transform(new ShenandoahCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
- }
- } else {
- if (is_weak_cas) {
- load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
- } else {
- load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
- }
- }
- } else
-#endif
- {
- if (ShenandoahCASBarrier) {
- if (is_weak_cas) {
- load_store = kit->gvn().transform(new ShenandoahWeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
- } else {
- load_store = kit->gvn().transform(new ShenandoahCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
- }
- } else {
- if (is_weak_cas) {
- load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
- } else {
- load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
- }
- }
- }
- access.set_raw_access(load_store);
- pin_atomic_op(access);
- if (ShenandoahCardBarrier) {
- post_barrier(kit, kit->control(), access.raw_access(), access.base(),
- access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true);
- }
- return load_store;
- }
+ set_barrier_data(access, /* load = */ true, /* store = */ true);
return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
}
Node* ShenandoahBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* val, const Type* value_type) const {
- GraphKit* kit = access.kit();
- Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, val, value_type);
- if (access.is_oop()) {
- result = kit->gvn().transform(new ShenandoahLoadReferenceBarrierNode(nullptr, result, access.decorators()));
- shenandoah_write_barrier_pre(kit, false /* do_load */,
- nullptr, nullptr, max_juint, nullptr, nullptr,
- result /* pre_val */, T_OBJECT);
- if (ShenandoahCardBarrier) {
- post_barrier(kit, kit->control(), access.raw_access(), access.base(),
- access.addr().node(), access.alias_idx(), val, T_OBJECT, true);
+ set_barrier_data(access, /* load = */ true, /* store = */ true);
+ return BarrierSetC2::atomic_xchg_at_resolved(access, val, value_type);
+}
+
+bool ShenandoahBarrierSetC2::is_Load(int opcode) {
+ switch (opcode) {
+ case Op_LoadN:
+ case Op_LoadP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ShenandoahBarrierSetC2::is_Store(int opcode) {
+ switch (opcode) {
+ case Op_StoreN:
+ case Op_StoreP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ShenandoahBarrierSetC2::is_LoadStore(int opcode) {
+ switch (opcode) {
+ case Op_CompareAndExchangeN:
+ case Op_CompareAndExchangeP:
+ case Op_WeakCompareAndSwapN:
+ case Op_WeakCompareAndSwapP:
+ case Op_CompareAndSwapN:
+ case Op_CompareAndSwapP:
+ case Op_GetAndSetP:
+ case Op_GetAndSetN:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ShenandoahBarrierSetC2::can_remove_load_barrier(Node* root) {
+ // Check if all outs feed into nodes that do not expose the oops to the rest
+ // of the runtime system. In this case, we can elide the LRB barrier. We bail
+ // out with false at the first sight of trouble.
+
+ ResourceMark rm;
+ VectorSet visited;
+ Node_List worklist;
+ worklist.push(root);
+
+ while (worklist.size() > 0) {
+ Node* n = worklist.pop();
+ if (visited.test_set(n->_idx)) {
+ continue;
+ }
+
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* out = n->fast_out(i);
+ switch (out->Opcode()) {
+ case Op_Phi:
+ case Op_EncodeP:
+ case Op_DecodeN:
+ case Op_CastPP:
+ case Op_CheckCastPP:
+ case Op_AddP: {
+ // Transitive node, check if any other outs are doing anything troublesome.
+ worklist.push(out);
+ break;
+ }
+
+ case Op_LoadRange: {
+ // Array length is the same in all copies.
+ break;
+ }
+
+ case Op_LoadKlass: {
+ // Klass is the same in all copies.
+ // We would have liked to assert -UCOH, but there are legitimate klass
+ // loads from native Klass* instances, which are also safe under +UCOH.
+ break;
+ }
+
+ case Op_LoadNKlass: {
+ // Similar to above, but LoadNKlass is only safe without +UCOH.
+ // With +UCOH, it loads from mark word, which clashes with forwarding pointers.
+ if (!UseCompactObjectHeaders) {
+ break;
+ }
+ return false;
+ }
+
+ case Op_CmpN: {
+ if (out->in(1) == n &&
+ out->in(2)->Opcode() == Op_ConN &&
+ out->in(2)->get_narrowcon() == 0) {
+ // Null check, no oop is exposed.
+ break;
+ }
+ if (out->in(2) == n &&
+ out->in(1)->Opcode() == Op_ConN &&
+ out->in(1)->get_narrowcon() == 0) {
+ // Null check, no oop is exposed.
+ break;
+ }
+ return false;
+ }
+
+ case Op_CmpP: {
+ if (out->in(1) == n &&
+ out->in(2)->Opcode() == Op_ConP &&
+ out->in(2)->get_ptr() == 0) {
+ // Null check, no oop is exposed.
+ break;
+ }
+ if (out->in(2) == n &&
+ out->in(1)->Opcode() == Op_ConP &&
+ out->in(1)->get_ptr() == 0) {
+ // Null check, no oop is exposed.
+ break;
+ }
+ return false;
+ }
+
+ case Op_CallStaticJava: {
+ if (out->as_CallStaticJava()->is_uncommon_trap()) {
+ // Local feeds into uncommon trap. Deopt machinery handles barriers itself.
+ break;
+ }
+ return false;
+ }
+
+ default: {
+ // Paranoidly distrust any other nodes.
+ return false;
+ }
+ }
}
}
- return result;
+
+ // Nothing troublesome found.
+ return true;
}
+uint8_t ShenandoahBarrierSetC2::refine_load(Node* n, uint8_t bd) {
+ assert(ShenandoahElideIdealBarriers, "Checked by caller");
+ assert(bd != 0, "Checked by caller");
-bool ShenandoahBarrierSetC2::is_gc_pre_barrier_node(Node* node) const {
- return is_shenandoah_wb_pre_call(node);
-}
-
-bool ShenandoahBarrierSetC2::is_gc_barrier_node(Node* node) const {
- return (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) ||
- is_shenandoah_lrb_call(node) ||
- is_shenandoah_wb_pre_call(node) ||
- is_shenandoah_clone_call(node);
-}
-
-Node* ShenandoahBarrierSetC2::step_over_gc_barrier(Node* c) const {
- if (c == nullptr) {
- return c;
+ // Do not touch weak loads at all: they are responsible for shielding from
+ // Reference.referent resurrection.
+ if ((bd & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0) {
+ return bd;
}
- if (c->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
- return c->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
+
+ if (((bd & ShenandoahBitStrong) != 0) && can_remove_load_barrier(n)) {
+ bd &= ~ShenandoahBitStrong;
}
- return c;
+
+ return bd;
}
-bool ShenandoahBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
- return !ShenandoahBarrierC2Support::expand(C, igvn);
+uint8_t ShenandoahBarrierSetC2::refine_store(Node* n, uint8_t bd) {
+ assert(ShenandoahElideIdealBarriers, "Checked by caller");
+ assert(bd != 0, "Checked by caller");
+ assert(n->is_Mem() || n->is_LoadStore(), "Sanity");
+
+ const Node* newval = n->in(MemNode::ValueIn);
+ assert(newval != nullptr, "Should be present");
+
+ // Type system tells us something about nullity?
+ const Type* newval_bottom = newval->bottom_type();
+ assert(newval_bottom->isa_oopptr() || newval_bottom->isa_narrowoop() ||
+ newval_bottom == TypePtr::NULL_PTR, "Should be an oop store");
+ const TypePtr* newval_type = newval_bottom->make_ptr();
+ assert(newval_type != nullptr, "Should have been filtered before");
+ TypePtr::PTR newval_type_ptr = newval_type->ptr();
+ if (newval_type_ptr == TypePtr::Null) {
+ bd &= ~ShenandoahBitNotNull;
+ // Card table barrier is not needed if we store null.
+ bd &= ~ShenandoahBitCardMark;
+ } else if (newval_type_ptr == TypePtr::NotNull) {
+ // Definitely not null.
+ bd |= ShenandoahBitNotNull;
+ }
+
+ return bd;
}
-bool ShenandoahBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const {
- if (mode == LoopOptsShenandoahExpand) {
- assert(UseShenandoahGC, "only for shenandoah");
- ShenandoahBarrierC2Support::pin_and_expand(phase);
- return true;
+void ShenandoahBarrierSetC2::final_refinement(Compile* compile) const {
+ ResourceMark rm;
+ Unique_Node_List wq;
+
+ RootNode* root = compile->root();
+ wq.push(root);
+
+ // Also seed the outs to capture nodes are not reachable from in()-s, e.g. endless loops.
+ for (DUIterator_Fast imax, i = root->fast_outs(imax); i < imax; i++) {
+ Node* m = root->fast_out(i);
+ wq.push(m);
}
- return false;
+
+ for (uint next = 0; next < wq.size(); next++) {
+ Node* n = wq.at(next);
+
+ assert(!n->is_Mach(), "No Mach nodes here yet");
+
+ int opc = n->Opcode();
+ bool is_load = is_Load(opc);
+ bool is_store = is_Store(opc);
+ bool is_load_store = is_LoadStore(opc);
+
+ uint8_t orig_bd = 0;
+ if (is_load_store) {
+ orig_bd = n->as_LoadStore()->barrier_data();
+ } else if (is_load || is_store) {
+ orig_bd = n->as_Mem()->barrier_data();
+ }
+
+ uint8_t bd = orig_bd;
+ if (ShenandoahElideIdealBarriers && bd != 0) {
+ // Note: we cannot apply load optimizations to LoadStores,
+ // because their load barriers are needed for fixups.
+ if (is_load) {
+ bd = refine_load(n, bd);
+ }
+ if (is_store || is_load_store) {
+ bd = refine_store(n, bd);
+ }
+ }
+
+ // If there are no real barrier flags on the node, strip away additional fluff.
+ // Matcher does not care about this, and we would like to avoid invoking "barrier_data() != 0"
+ // rules when the only flags are the irrelevant fluff.
+ if ((bd != 0) && (bd & ShenandoahBitsReal) == 0) {
+ bd = 0;
+ }
+
+ if (bd != orig_bd) {
+ if (is_load_store) {
+ n->as_LoadStore()->set_barrier_data(bd);
+ } else {
+ n->as_Mem()->set_barrier_data(bd);
+ }
+ }
+
+ for (uint j = 0; j < n->req(); j++) {
+ Node* in = n->in(j);
+ if (in != nullptr) {
+ wq.push(in);
+ }
+ }
+ }
+}
+
+// Support for macro expanded GC barriers
+void ShenandoahBarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
+ if (node->is_LoadStore()) {
+ LoadStoreNode* loadstore = node->as_LoadStore();
+ loadstore->set_barrier_data(0);
+ } else if (node->is_Mem()) {
+ MemNode* mem = node->as_Mem();
+ mem->set_barrier_data(0);
+ }
+}
+
+void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
+ eliminate_gc_barrier_data(node);
+}
+
+void ShenandoahBarrierSetC2::elide_dominated_barrier(MachNode* node, MachNode* dominator) const {
+ uint8_t orig_bd = node->barrier_data();
+ if (orig_bd == 0) {
+ // Nothing to do.
+ return;
+ }
+
+ uint8_t bd = orig_bd;
+ int node_opcode = node->ideal_Opcode();
+
+ if (dominator == nullptr) {
+ // Must be allocation node.
+ if (is_Load(node_opcode) || is_LoadStore(node_opcode)) {
+ // Loads from recent allocations do not need LRBs.
+ bd &= ~ShenandoahBitStrong;
+ }
+ if (is_Store(node_opcode) || is_LoadStore(node_opcode)) {
+ // Stores to recent allocations do not need KA or CM.
+ bd &= ~ShenandoahBitKeepAlive;
+ bd &= ~ShenandoahBitCardMark;
+ }
+ } else {
+ // LoadStores do not get these optimizations, since their LRBs
+ // are required for fixups.
+ if (is_Load(node_opcode) || is_Store(node_opcode)) {
+ int dom_opcode = dominator->ideal_Opcode();
+ uint8_t dom_bd = dominator->barrier_data();
+
+ if (is_Load(dom_opcode) || is_LoadStore(dom_opcode)) {
+ // If dominating load is set up to perform LRB fixups, no further LRB is needed.
+ if ((dom_bd & ShenandoahBitStrong) != 0) {
+ bd &= ~ShenandoahBitStrong;
+ }
+ }
+ if (is_Store(dom_opcode)) {
+ // Dominating store has stored the good ref, no LRB is needed.
+ bd &= ~ShenandoahBitStrong;
+ }
+ }
+ }
+
+ if (orig_bd != bd) {
+ // We are already in final output.
+ // Strip the extra barrier data if no real bits are left.
+ if ((bd & ShenandoahBitsReal) != 0) {
+ node->set_barrier_data(bd);
+ } else {
+ node->set_barrier_data(0);
+ }
+ }
+}
+
+void ShenandoahBarrierSetC2::analyze_dominating_barriers() const {
+ if (!ShenandoahElideMachBarriers) {
+ return;
+ }
+
+ ResourceMark rm;
+ Node_List accesses, dominators;
+
+ PhaseCFG* const cfg = Compile::current()->cfg();
+ for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
+ const Block* const block = cfg->get_block(i);
+ for (uint j = 0; j < block->number_of_nodes(); ++j) {
+ Node* const node = block->get_node(j);
+
+ // Everything that happens in allocations does not need barriers.
+ // Record them for dominance analysis.
+ if (node->is_Phi() && is_allocation(node)) {
+ dominators.push(node);
+ continue;
+ }
+
+ if (!node->is_Mach()) {
+ continue;
+ }
+
+ MachNode* const mach = node->as_Mach();
+ int opcode = mach->ideal_Opcode();
+ if (is_Load(opcode) || is_Store(opcode) || is_LoadStore(opcode)) {
+ if ((mach->barrier_data() & ShenandoahBitsReal) != 0) {
+ accesses.push(mach);
+ dominators.push(mach);
+ }
+ }
+ }
+ }
+
+ elide_dominated_barriers(accesses, dominators);
+}
+
+uint ShenandoahBarrierSetC2::estimated_barrier_size(const Node* node) const {
+ // Barrier impact on fast-path is driven by GC state checks emitted very late.
+ // These checks are tight load-test-branch sequences, with no impact on C2 graph
+ // size. Limiting unrolling in presence of GC barriers might turn some loops
+ // tighter than with default unrolling, which may benefit performance due to denser
+ // code. Testing shows it is still counter-productive.
+ // Therefore, we report zero barrier size to let C2 do its normal thing.
+ return 0;
}
bool ShenandoahBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const {
@@ -870,186 +538,122 @@ bool ShenandoahBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_couple
return true;
}
-bool ShenandoahBarrierSetC2::clone_needs_barrier(Node* src, PhaseGVN& gvn) {
- const TypeOopPtr* src_type = gvn.type(src)->is_oopptr();
+bool ShenandoahBarrierSetC2::clone_needs_barrier(const TypeOopPtr* src_type, bool& is_oop_array) {
+ if (!ShenandoahCloneBarrier) {
+ return false;
+ }
+
if (src_type->isa_instptr() != nullptr) {
+ // Instance: need barrier only if there is a possibility of having an oop anywhere in it.
ciInstanceKlass* ik = src_type->is_instptr()->instance_klass();
- if ((src_type->klass_is_exact() || !ik->has_subklass()) && !ik->has_injected_fields()) {
- if (ik->has_object_fields()) {
- return true;
- } else {
- if (!src_type->klass_is_exact()) {
- Compile::current()->dependencies()->assert_leaf_type(ik);
- }
+ if ((src_type->klass_is_exact() || !ik->has_subklass()) &&
+ !ik->has_injected_fields() && !ik->has_object_fields()) {
+ if (!src_type->klass_is_exact()) {
+ // Class is *currently* the leaf in the hierarchy.
+ // Record the dependency so that we deopt if this does not hold in future.
+ Compile::current()->dependencies()->assert_leaf_type(ik);
}
- } else {
- return true;
- }
- } else if (src_type->isa_aryptr()) {
+ return false;
+ }
+ } else if (src_type->isa_aryptr() != nullptr) {
+ // Array: need barrier only if array is oop-bearing.
BasicType src_elem = src_type->isa_aryptr()->elem()->array_element_basic_type();
if (is_reference_type(src_elem, true)) {
- return true;
+ is_oop_array = true;
+ } else {
+ return false;
}
- } else {
- return true;
}
- return false;
+
+ // Assume the worst.
+ return true;
+}
+
+void ShenandoahBarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
+ const TypeOopPtr* src_type = kit->gvn().type(src_base)->is_oopptr();
+
+ bool is_oop_array = false;
+ if (!clone_needs_barrier(src_type, is_oop_array)) {
+ // No barrier is needed? Just do what common BarrierSetC2 wants with it.
+ BarrierSetC2::clone(kit, src_base, dst_base, size, is_array);
+ return;
+ }
+
+ if (ShenandoahCloneRuntime || !is_array || !is_oop_array) {
+ // Looks like an instance? Prepare the instance clone. This would either
+ // be exploded into individual accesses or be left as runtime call.
+ // Common BarrierSetC2 prepares everything for both cases.
+ BarrierSetC2::clone(kit, src_base, dst_base, size, is_array);
+ return;
+ }
+
+ // We are cloning the oop array. Prepare to call the normal arraycopy stub
+ // after the expansion. Normal stub takes the number of actual type-sized
+ // elements to copy after the base, compute the count here.
+ Node* offset = kit->MakeConX(arrayOopDesc::base_offset_in_bytes(UseCompressedOops ? T_NARROWOOP : T_OBJECT));
+ size = kit->gvn().transform(new SubXNode(size, offset));
+ size = kit->gvn().transform(new URShiftXNode(size, kit->intcon(LogBytesPerHeapOop)));
+ ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, size, true, false);
+ ac->set_clone_array();
+ Node* n = kit->gvn().transform(ac);
+ if (n == ac) {
+ ac->set_adr_type(TypeRawPtr::BOTTOM);
+ kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), TypeRawPtr::BOTTOM);
+ } else {
+ kit->set_all_memory(n);
+ }
}
void ShenandoahBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
- Node* ctrl = ac->in(TypeFunc::Control);
- Node* mem = ac->in(TypeFunc::Memory);
- Node* src_base = ac->in(ArrayCopyNode::Src);
- Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
- Node* dest_base = ac->in(ArrayCopyNode::Dest);
- Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
- Node* length = ac->in(ArrayCopyNode::Length);
+ Node* const ctrl = ac->in(TypeFunc::Control);
+ Node* const mem = ac->in(TypeFunc::Memory);
+ Node* const src = ac->in(ArrayCopyNode::Src);
+ Node* const src_offset = ac->in(ArrayCopyNode::SrcPos);
+ Node* const dest = ac->in(ArrayCopyNode::Dest);
+ Node* const dest_offset = ac->in(ArrayCopyNode::DestPos);
+ Node* length = ac->in(ArrayCopyNode::Length);
- Node* src = phase->basic_plus_adr(src_base, src_offset);
- Node* dest = phase->basic_plus_adr(dest_base, dest_offset);
+ const TypeOopPtr* src_type = phase->igvn().type(src)->is_oopptr();
- if (ShenandoahCloneBarrier && clone_needs_barrier(src, phase->igvn())) {
- // Check if heap is has forwarded objects. If it does, we need to call into the special
- // routine that would fix up source references before we can continue.
-
- enum { _heap_stable = 1, _heap_unstable, PATH_LIMIT };
- Node* region = new RegionNode(PATH_LIMIT);
- Node* mem_phi = new PhiNode(region, Type::MEMORY, TypeRawPtr::BOTTOM);
-
- Node* thread = phase->transform_later(new ThreadLocalNode());
- Node* offset = phase->igvn().MakeConX(in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
- Node* gc_state_addr = phase->transform_later(AddPNode::make_off_heap(thread, offset));
-
- uint gc_state_idx = Compile::AliasIdxRaw;
- const TypePtr* gc_state_adr_type = nullptr; // debug-mode-only argument
- DEBUG_ONLY(gc_state_adr_type = phase->C->get_adr_type(gc_state_idx));
-
- Node* gc_state = phase->transform_later(new LoadBNode(ctrl, mem, gc_state_addr, gc_state_adr_type, TypeInt::BYTE, MemNode::unordered));
- Node* stable_and = phase->transform_later(new AndINode(gc_state, phase->igvn().intcon(ShenandoahHeap::HAS_FORWARDED)));
- Node* stable_cmp = phase->transform_later(new CmpINode(stable_and, phase->igvn().zerocon(T_INT)));
- Node* stable_test = phase->transform_later(new BoolNode(stable_cmp, BoolTest::ne));
-
- IfNode* stable_iff = phase->transform_later(new IfNode(ctrl, stable_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN))->as_If();
- Node* stable_ctrl = phase->transform_later(new IfFalseNode(stable_iff));
- Node* unstable_ctrl = phase->transform_later(new IfTrueNode(stable_iff));
-
- // Heap is stable, no need to do anything additional
- region->init_req(_heap_stable, stable_ctrl);
- mem_phi->init_req(_heap_stable, mem);
-
- // Heap is unstable, call into clone barrier stub
- Node* call = phase->make_leaf_call(unstable_ctrl, mem,
- ShenandoahBarrierSetC2::clone_barrier_Type(),
- CAST_FROM_FN_PTR(address, ShenandoahRuntime::clone_barrier),
- "shenandoah_clone",
- TypeRawPtr::BOTTOM,
- src_base);
- call = phase->transform_later(call);
-
- ctrl = phase->transform_later(new ProjNode(call, TypeFunc::Control));
- mem = phase->transform_later(new ProjNode(call, TypeFunc::Memory));
- region->init_req(_heap_unstable, ctrl);
- mem_phi->init_req(_heap_unstable, mem);
-
- // Wire up the actual arraycopy stub now
- ctrl = phase->transform_later(region);
- mem = phase->transform_later(mem_phi);
-
- const char* name = "arraycopy";
- call = phase->make_leaf_call(ctrl, mem,
- OptoRuntime::fast_arraycopy_Type(),
- phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, name, true),
- name, TypeRawPtr::BOTTOM,
- src, dest, length
- LP64_ONLY(COMMA phase->top()));
- call = phase->transform_later(call);
-
- // Hook up the whole thing into the graph
- phase->igvn().replace_node(ac, call);
- } else {
+ bool is_oop_array = false;
+ if (!clone_needs_barrier(src_type, is_oop_array)) {
+ // No barrier is needed? Expand to normal HeapWord-sized arraycopy.
BarrierSetC2::clone_at_expansion(phase, ac);
- }
-}
-
-
-// Support for macro expanded GC barriers
-void ShenandoahBarrierSetC2::register_potential_barrier_node(Node* node) const {
- if (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
- state()->add_load_reference_barrier((ShenandoahLoadReferenceBarrierNode*) node);
- }
-}
-
-void ShenandoahBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
- if (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
- state()->remove_load_reference_barrier((ShenandoahLoadReferenceBarrierNode*) node);
- }
-}
-
-void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
- if (is_shenandoah_wb_pre_call(node)) {
- shenandoah_eliminate_wb_pre(node, ¯o->igvn());
- }
- if (ShenandoahCardBarrier && node->Opcode() == Op_CastP2X) {
- Node* shift = node->unique_out();
- Node* addp = shift->unique_out();
- for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
- Node* mem = addp->last_out(j);
- if (UseCondCardMark && mem->is_Load()) {
- assert(mem->Opcode() == Op_LoadB, "unexpected code shape");
- // The load is checking if the card has been written so
- // replace it with zero to fold the test.
- macro->replace_node(mem, macro->intcon(0));
- continue;
- }
- assert(mem->is_Store(), "store required");
- macro->replace_node(mem, mem->in(MemNode::Memory));
- }
- }
-}
-
-void ShenandoahBarrierSetC2::shenandoah_eliminate_wb_pre(Node* call, PhaseIterGVN* igvn) const {
- assert(UseShenandoahGC && is_shenandoah_wb_pre_call(call), "");
- Node* c = call->as_Call()->proj_out(TypeFunc::Control);
- c = c->unique_ctrl_out();
- assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");
- c = c->unique_ctrl_out();
- assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");
- Node* iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0);
- assert(iff->is_If(), "expect test");
- if (!is_shenandoah_marking_if(igvn, iff)) {
- c = c->unique_ctrl_out();
- assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");
- iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0);
- assert(is_shenandoah_marking_if(igvn, iff), "expect marking test");
- }
- Node* cmpx = iff->in(1)->in(1);
- igvn->replace_node(cmpx, igvn->makecon(TypeInt::CC_EQ));
- igvn->rehash_node_delayed(call);
- call->del_req(call->req()-1);
-}
-
-void ShenandoahBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {
- if (node->Opcode() == Op_AddP && ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(node)) {
- igvn->add_users_to_worklist(node);
- }
-}
-
-void ShenandoahBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {
- for (uint i = 0; i < useful.size(); i++) {
- Node* n = useful.at(i);
- if (n->Opcode() == Op_AddP && ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(n)) {
- for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
- C->record_for_igvn(n->fast_out(i));
- }
- }
+ return;
}
- for (int i = state()->load_reference_barriers_count() - 1; i >= 0; i--) {
- ShenandoahLoadReferenceBarrierNode* n = state()->load_reference_barrier(i);
- if (!useful.member(n)) {
- state()->remove_load_reference_barrier(n);
- }
+ if (ShenandoahCloneRuntime || !ac->is_clone_array() || !is_oop_array) {
+ // Still looks like an instance? Likely a large instance or reflective
+ // clone with unknown length. Go to runtime and handle it there.
+ clone_in_runtime(phase, ac, ShenandoahRuntime::clone_addr(), "ShenandoahRuntime::clone");
+ return;
}
+
+ // We are cloning the oop array. Call into normal oop array copy stubs.
+ // Those stubs would call BarrierSetAssembler to handle GC barriers.
+
+ // This is the full clone, so offsets should equal each other and be at array base.
+ assert(src_offset == dest_offset, "should be equal");
+ const jlong offset = src_offset->get_long();
+ const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr();
+ BasicType bt = ary_ptr->elem()->array_element_basic_type();
+ assert(offset == arrayOopDesc::base_offset_in_bytes(bt), "should match");
+
+ const char* copyfunc_name = "arraycopy";
+ const address copyfunc_addr = phase->basictype2arraycopy(T_OBJECT, nullptr, nullptr, true, copyfunc_name, true);
+
+ Node* const call = phase->make_leaf_call(ctrl, mem,
+ OptoRuntime::fast_arraycopy_Type(),
+ copyfunc_addr, copyfunc_name,
+ TypeRawPtr::BOTTOM,
+ phase->basic_plus_adr(src, src_offset),
+ phase->basic_plus_adr(dest, dest_offset),
+ length,
+ phase->top()
+ );
+ phase->transform_later(call);
+
+ phase->igvn().replace_node(ac, call);
}
void* ShenandoahBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
@@ -1060,291 +664,370 @@ ShenandoahBarrierSetC2State* ShenandoahBarrierSetC2::state() const {
return reinterpret_cast(Compile::current()->barrier_set_state());
}
-// If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be
-// expanded later, then now is the time to do so.
-bool ShenandoahBarrierSetC2::expand_macro_nodes(PhaseMacroExpand* macro) const { return false; }
+void ShenandoahBarrierSetC2::print_barrier_data(outputStream* os, uint8_t data) {
+ os->print(" Node barriers: ");
+ if ((data & ShenandoahBitStrong) != 0) {
+ data &= ~ShenandoahBitStrong;
+ os->print("strong ");
+ }
+
+ if ((data & ShenandoahBitWeak) != 0) {
+ data &= ~ShenandoahBitWeak;
+ os->print("weak ");
+ }
+
+ if ((data & ShenandoahBitPhantom) != 0) {
+ data &= ~ShenandoahBitPhantom;
+ os->print("phantom ");
+ }
+
+ if ((data & ShenandoahBitKeepAlive) != 0) {
+ data &= ~ShenandoahBitKeepAlive;
+ os->print("keepalive ");
+ }
+
+ if ((data & ShenandoahBitCardMark) != 0) {
+ data &= ~ShenandoahBitCardMark;
+ os->print("cardmark ");
+ }
+
+ if ((data & ShenandoahBitNative) != 0) {
+ data &= ~ShenandoahBitNative;
+ os->print("native ");
+ }
+
+ if ((data & ShenandoahBitNotNull) != 0) {
+ data &= ~ShenandoahBitNotNull;
+ os->print("not-null ");
+ }
+
+ if ((data & ShenandoahBitElided) != 0) {
+ data &= ~ShenandoahBitElided;
+ os->print("elided ");
+ }
+
+ os->cr();
+
+ if (data > 0) {
+ fatal("Unknown bit!");
+ }
+
+ os->print_cr(" GC configuration: %sLRB %sSATB %sCAS %sClone %sCard",
+ (ShenandoahLoadRefBarrier ? "+" : "-"),
+ (ShenandoahSATBBarrier ? "+" : "-"),
+ (ShenandoahCASBarrier ? "+" : "-"),
+ (ShenandoahCloneBarrier ? "+" : "-"),
+ (ShenandoahCardBarrier ? "+" : "-")
+ );
+}
+
#ifdef ASSERT
+void ShenandoahBarrierSetC2::verify_gc_barrier_assert(bool cond, const char* msg, uint8_t bd, Node* n) {
+ if (!cond) {
+ stringStream ss;
+ ss.print_cr("%s", msg);
+ ss.print_cr("-----------------");
+ print_barrier_data(&ss, bd);
+ ss.print_cr("-----------------");
+ n->dump_bfs(1, nullptr, "", &ss);
+ report_vm_error(__FILE__, __LINE__, ss.as_string());
+ }
+}
+
void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
- if (ShenandoahVerifyOptoBarriers && phase == BarrierSetC2::BeforeMacroExpand) {
- ShenandoahBarrierC2Support::verify(Compile::current()->root());
- } else if (phase == BarrierSetC2::BeforeCodeGen) {
- // Verify Shenandoah pre-barriers
- const int gc_state_offset = in_bytes(ShenandoahThreadLocalData::gc_state_offset());
+ if (!ShenandoahVerifyOptoBarriers) {
+ return;
+ }
- Unique_Node_List visited;
- Node_List worklist;
- // We're going to walk control flow backwards starting from the Root
- worklist.push(compile->root());
- while (worklist.size() > 0) {
- Node *x = worklist.pop();
- if (x == nullptr || x == compile->top()) {
- continue;
+ // Verify depending on the barriers actually enabled, allowing verification in passive mode.
+ // Normally, we have _some_ bits set on all accesses. Optimizations may drop some bits,
+ // but only the last optimization step eliminates all remaining metadata flags. Only then
+ // the access data can be completely blank.
+ bool final_phase = (phase == BeforeCodeGen);
+ bool expect_load_barriers = !final_phase && ShenandoahLoadRefBarrier;
+ bool expect_store_barriers = !final_phase && (ShenandoahSATBBarrier || ShenandoahCardBarrier);
+ bool expect_load_store_barriers = expect_load_barriers || expect_store_barriers;
+ bool expect_some_real = final_phase;
+
+ Unique_Node_List wq;
+
+ RootNode* root = compile->root();
+ wq.push(root);
+
+ // Also seed the outs to capture nodes are not reachable from in()-s, e.g. endless loops.
+ for (DUIterator_Fast imax, i = root->fast_outs(imax); i < imax; i++) {
+ Node* m = root->fast_out(i);
+ wq.push(m);
+ }
+
+ for (uint next = 0; next < wq.size(); next++) {
+ Node *n = wq.at(next);
+ assert(!n->is_Mach(), "No Mach nodes here yet");
+
+ int opc = n->Opcode();
+
+ uint8_t bd = 0;
+ const TypePtr* adr_type = nullptr;
+ if (is_Load(opc)) {
+ bd = n->as_Load()->barrier_data();
+ adr_type = n->as_Load()->adr_type();
+ } else if (is_Store(opc)) {
+ bd = n->as_Store()->barrier_data();
+ adr_type = n->as_Store()->adr_type();
+ } else if (is_LoadStore(opc)) {
+ bd = n->as_LoadStore()->barrier_data();
+ adr_type = n->as_LoadStore()->adr_type();
+ } else if (n->is_Mem()) {
+ bd = MemNode::barrier_data(n);
+ verify_gc_barrier_assert(bd == 0, "Other mem nodes should have no barrier data", bd, n);
+ }
+
+ bool is_weak = (bd & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0;
+ bool is_native = (bd & ShenandoahBitNative) != 0;
+
+ bool is_referent = adr_type != nullptr &&
+ adr_type->isa_instptr() &&
+ adr_type->is_instptr()->instance_klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
+ adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset();
+
+ bool is_oop_addr = (adr_type != nullptr) && (adr_type->isa_oopptr() || adr_type->isa_narrowoop());
+ bool is_raw_addr = (adr_type != nullptr) && (adr_type->isa_rawptr() || adr_type->isa_klassptr());
+
+ verify_gc_barrier_assert(!expect_some_real || (bd == 0) || (bd & ShenandoahBitsReal) != 0, "Without real barriers, metadata should be stripped at this point", bd, n);
+
+ if (is_oop_addr) {
+ if (is_Load(opc)) {
+ verify_gc_barrier_assert(!expect_load_barriers || (bd != 0), "Oop load should have barrier data", bd, n);
+ verify_gc_barrier_assert(!is_weak || is_referent, "Weak load only for Reference.referent", bd, n);
+ } else if (is_Store(opc)) {
+ // Reference.referent stores can be without barriers.
+ verify_gc_barrier_assert(!expect_store_barriers || is_referent || (bd != 0), "Oop store should have barrier data", bd, n);
+ } else if (is_LoadStore(opc)) {
+ verify_gc_barrier_assert(!expect_load_store_barriers || (bd != 0), "Oop load-store should have barrier data", bd, n);
}
-
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
-
- if (x->is_Region()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
+ } else if (is_raw_addr) {
+ if (is_native) {
+ if (is_Load(opc)) {
+ verify_gc_barrier_assert(!expect_load_barriers || (bd != 0), "Native oop load should have barrier data", bd, n);
+ }
+ if (is_Store(opc)) {
+ verify_gc_barrier_assert(!expect_store_barriers || (bd != 0), "Native oop store should have barrier data", bd, n);
+ }
+ if (is_LoadStore(opc)) {
+ verify_gc_barrier_assert(!expect_load_store_barriers || (bd != 0), "Native oop load-store should have barrier data", bd, n);
}
} else {
- worklist.push(x->in(0));
- // We are looking for the pattern:
- // /->ThreadLocal
- // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)
- // \->ConI(0)
- // We want to verify that the If and the LoadB have the same control
- // See GraphKit::g1_write_barrier_pre()
- if (x->is_If()) {
- IfNode *iff = x->as_If();
- if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {
- CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();
- if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0
- && cmp->in(1)->is_Load()) {
- LoadNode *load = cmp->in(1)->as_Load();
- if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal
- && load->in(2)->in(3)->is_Con()
- && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == gc_state_offset) {
+ // Some Load/Stores are used for T_ADDRESS and/or raw stores, which are supposed not to have barriers.
+ // Some other Load/Stores are emitted for real oops, but on raw addresses via Unsafe.
+ // The distinction on this level is lost, so we cannot really verify this.
+ }
+ } else {
+ if (is_Load(opc) || is_Store(opc) || is_LoadStore(opc)) {
+ verify_gc_barrier_assert(false, "Unclassified access type", bd, n);
+ }
+ }
- Node *if_ctrl = iff->in(0);
- Node *load_ctrl = load->in(0);
-
- if (if_ctrl != load_ctrl) {
- // Skip possible CProj->NeverBranch in infinite loops
- if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)
- && if_ctrl->in(0)->is_NeverBranch()) {
- if_ctrl = if_ctrl->in(0)->in(0);
- }
- }
- assert(load_ctrl != nullptr && if_ctrl == load_ctrl, "controls must match");
- }
- }
- }
- }
+ for (uint j = 0; j < n->req(); j++) {
+ Node* in = n->in(j);
+ if (in != nullptr) {
+ wq.push(in);
}
}
}
}
#endif
-Node* ShenandoahBarrierSetC2::ideal_node(PhaseGVN* phase, Node* n, bool can_reshape) const {
- if (is_shenandoah_wb_pre_call(n)) {
- uint cnt = ShenandoahBarrierSetC2::write_barrier_pre_Type()->domain()->cnt();
- if (n->req() > cnt) {
- Node* addp = n->in(cnt);
- if (has_only_shenandoah_wb_pre_uses(addp)) {
- n->del_req(cnt);
- if (can_reshape) {
- phase->is_IterGVN()->_worklist.push(addp);
- }
- return n;
+static ShenandoahBarrierSetC2State* barrier_set_state() {
+ return reinterpret_cast(Compile::current()->barrier_set_state());
+}
+
+int ShenandoahBarrierSetC2::estimate_stub_size() const {
+ GrowableArray* const stubs = barrier_set_state()->stubs();
+ assert(stubs->is_empty(), "Lifecycle: no stubs were yet created");
+ return 0;
+}
+
+void ShenandoahBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
+ MacroAssembler masm(&cb);
+
+ PhaseOutput* const output = Compile::current()->output();
+ assert(masm.offset() <= output->buffer_sizing_data()->_code,
+ "Stubs are assumed to be emitted directly after code and code_size is a hard limit on where it can start");
+ barrier_set_state()->set_stubs_start_offset(masm.offset());
+
+ // Stub generation counts all stubs as skipped for the sake of inlining policy.
+ // This is critical for performance, check it.
+#ifdef ASSERT
+ int offset_before = masm.offset();
+ int skipped_before = cb.total_skipped_instructions_size();
+#endif
+
+ GrowableArray* const stubs = barrier_set_state()->stubs();
+ for (int i = 0; i < stubs->length(); i++) {
+ // Make sure there is enough space in the code buffer
+ if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ stubs->at(i)->emit_code(masm);
+ }
+
+#ifdef ASSERT
+ int offset_after = masm.offset();
+ int skipped_after = cb.total_skipped_instructions_size();
+ assert(offset_after - offset_before == skipped_after - skipped_before,
+ "All stubs are counted as skipped. masm: %d - %d = %d, cb: %d - %d = %d",
+ offset_after, offset_before, offset_after - offset_before,
+ skipped_after, skipped_before, skipped_after - skipped_before);
+#endif
+
+ masm.flush();
+}
+
+void ShenandoahBarrierStubC2::register_stub(ShenandoahBarrierStubC2* stub) {
+ if (!Compile::current()->output()->in_scratch_emit_size()) {
+ barrier_set_state()->stubs()->append(stub);
+ }
+}
+
+ShenandoahBarrierStubC2* ShenandoahBarrierStubC2::create(const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow, bool do_load) {
+ auto* stub = new (Compile::current()->comp_arena()) ShenandoahBarrierStubC2(node, obj, addr, tmp1, tmp2, narrow, do_load);
+ register_stub(stub);
+ return stub;
+}
+
+void ShenandoahBarrierStubC2::load_post(MacroAssembler* masm, const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow) {
+ // Load post-barrier:
+ // a. Satisfies the need for LRB for normal loads
+ // b. Passes a weak load through LRB-weak
+ // c. Keep-alives a weak load
+ if (needs_slow_barrier(node)) {
+ ShenandoahBarrierStubC2* const stub = create(node, obj, addr, tmp1, tmp2, narrow, /* do_load = */ false);
+ char check = 0;
+ check |= needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
+ check |= needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
+ check |= needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
+ stub->enter_if_gc_state(*masm, check, tmp1);
+ }
+}
+
+void ShenandoahBarrierStubC2::store_pre(MacroAssembler* masm, const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow) {
+ // Store pre-barrier: SATB, keep-alive the current memory value.
+ if (needs_slow_barrier(node)) {
+ assert(!needs_load_ref_barrier(node), "Should not be required for stores");
+ ShenandoahBarrierStubC2* const stub = create(node, obj, addr, tmp1, tmp2, narrow, /* do_load = */ true);
+ stub->enter_if_gc_state(*masm, ShenandoahHeap::MARKING, tmp1);
+ }
+}
+
+void ShenandoahBarrierStubC2::load_store_pre(MacroAssembler* masm, const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow) {
+ // Load/Store pre-barrier:
+ // a. Avoids false positives from CAS encountering to-space memory values.
+ // b. Satisfies the need for LRB for the CAE result.
+ // c. Records old value for the sake of SATB.
+ //
+ // (a) and (b) are covered because load barrier does memory location fixup.
+ // (c) is covered by KA on the current memory value.
+ if (needs_slow_barrier(node)) {
+ ShenandoahBarrierStubC2* const stub = create(node, obj, addr, tmp1, tmp2, narrow, /* do_load = */ true);
+ char check = 0;
+ check |= needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
+ check |= needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
+ assert(!needs_load_ref_barrier_weak(node), "Not supported for Load/Stores");
+ stub->enter_if_gc_state(*masm, check, tmp1);
+ }
+}
+
+void ShenandoahBarrierStubC2::store_post(MacroAssembler* masm, const MachNode* node, Address addr, Register tmp1, Register tmp2) {
+ if (needs_card_barrier(node)) {
+ cardtable(*masm, addr, tmp1, tmp2);
+ }
+}
+
+void ShenandoahBarrierStubC2::load_store_post(MacroAssembler* masm, const MachNode* node, Address addr, Register tmp1, Register tmp2) {
+ store_post(masm, node, addr, tmp1, tmp2);
+}
+
+bool ShenandoahBarrierStubC2::is_live_register(Register reg) {
+ return preserve_set().member(OptoReg::as_OptoReg(reg->as_VMReg()));
+}
+
+Register ShenandoahBarrierStubC2::select_temp_register(bool& selected_live, Register skip_reg1, Register skip_reg2) {
+ Register tmp = noreg;
+ Register fallback_live = noreg;
+
+ // Try to select non-live first:
+ for (int i = 0; i < available_gp_registers(); i++) {
+ Register r = as_Register(i);
+ if (r != _obj && r != _addr.base() && r != _addr.index() &&
+ r != skip_reg1 && r != skip_reg2 && !is_special_register(r)) {
+ if (!is_live_register(r)) {
+ tmp = r;
+ break;
+ } else if (fallback_live == noreg) {
+ fallback_live = r;
}
}
}
- if (n->Opcode() == Op_CmpP) {
- Node* in1 = n->in(1);
- Node* in2 = n->in(2);
- // If one input is null, then step over the strong LRB barriers on the other input
- if (in1->bottom_type() == TypePtr::NULL_PTR &&
- !((in2->Opcode() == Op_ShenandoahLoadReferenceBarrier) &&
- !ShenandoahBarrierSet::is_strong_access(((ShenandoahLoadReferenceBarrierNode*)in2)->decorators()))) {
- in2 = step_over_gc_barrier(in2);
- }
- if (in2->bottom_type() == TypePtr::NULL_PTR &&
- !((in1->Opcode() == Op_ShenandoahLoadReferenceBarrier) &&
- !ShenandoahBarrierSet::is_strong_access(((ShenandoahLoadReferenceBarrierNode*)in1)->decorators()))) {
- in1 = step_over_gc_barrier(in1);
- }
-
- if (in1 != n->in(1)) {
- n->set_req_X(1, in1, phase);
- assert(in2 == n->in(2), "only one change");
- return n;
- }
- if (in2 != n->in(2)) {
- n->set_req_X(2, in2, phase);
- return n;
- }
- } else if (can_reshape &&
- n->Opcode() == Op_If &&
- ShenandoahBarrierC2Support::is_heap_stable_test(n) &&
- n->in(0) != nullptr &&
- n->outcnt() == 2) {
- Node* dom = n->in(0);
- Node* prev_dom = n;
- int op = n->Opcode();
- int dist = 16;
- // Search up the dominator tree for another heap stable test
- while (dom->Opcode() != op || // Not same opcode?
- !ShenandoahBarrierC2Support::is_heap_stable_test(dom) || // Not same input 1?
- prev_dom->in(0) != dom) { // One path of test does not dominate?
- if (dist < 0) return nullptr;
-
- dist--;
- prev_dom = dom;
- dom = IfNode::up_one_dom(dom);
- if (!dom) return nullptr;
- }
-
- // Check that we did not follow a loop back to ourselves
- if (n == dom) {
- return nullptr;
- }
-
- return n->as_If()->dominated_by(prev_dom, phase->is_IterGVN(), false);
+ // If we could not find a non-live register, select the live fallback:
+ if (tmp == noreg) {
+ tmp = fallback_live;
+ selected_live = true;
+ } else {
+ selected_live = false;
}
+ assert(tmp != noreg, "successfully selected");
+ assert_different_registers(tmp, skip_reg1);
+ assert_different_registers(tmp, skip_reg2);
+ assert_different_registers(tmp, _obj);
+ assert_different_registers(tmp, _addr.base());
+ assert_different_registers(tmp, _addr.index());
+ return tmp;
+}
+
+address ShenandoahBarrierStubC2::keepalive_runtime_entry_addr() {
+ if (_narrow) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre_narrow);
+ } else {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre);
+ }
+}
+
+address ShenandoahBarrierStubC2::lrb_runtime_entry_addr() {
+ bool is_strong = (_node->barrier_data() & ShenandoahBitStrong) != 0;
+ bool is_weak = (_node->barrier_data() & ShenandoahBitWeak) != 0;
+ bool is_phantom = (_node->barrier_data() & ShenandoahBitPhantom) != 0;
+
+ if (_narrow) {
+ if (is_strong) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow_narrow);
+ } else if (is_weak) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow_narrow);
+ } else if (is_phantom) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow_narrow);
+ }
+ } else {
+ if (is_strong) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
+ } else if (is_weak) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
+ } else if (is_phantom) {
+ return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
+ }
+ }
+
+ ShouldNotReachHere();
return nullptr;
}
-bool ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(Node* n) {
- for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
- Node* u = n->fast_out(i);
- if (!is_shenandoah_wb_pre_call(u)) {
- return false;
- }
- }
- return n->outcnt() > 0;
+bool ShenandoahBarrierSetC2State::needs_liveness_data(const MachNode* mach) const {
+ // Nodes that require slow-path stubs need liveness data.
+ return ShenandoahBarrierStubC2::needs_slow_barrier(mach);
}
-bool ShenandoahBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode, Unique_Node_List& dead_nodes) const {
- switch (opcode) {
- case Op_CallLeaf:
- case Op_CallLeafNoFP: {
- assert (n->is_Call(), "");
- CallNode *call = n->as_Call();
- if (ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(call)) {
- uint cnt = ShenandoahBarrierSetC2::write_barrier_pre_Type()->domain()->cnt();
- if (call->req() > cnt) {
- assert(call->req() == cnt + 1, "only one extra input");
- Node *addp = call->in(cnt);
- assert(!ShenandoahBarrierSetC2::has_only_shenandoah_wb_pre_uses(addp), "useless address computation?");
- call->del_req(cnt);
- }
- }
- return false;
- }
- case Op_ShenandoahCompareAndSwapP:
- case Op_ShenandoahCompareAndSwapN:
- case Op_ShenandoahWeakCompareAndSwapN:
- case Op_ShenandoahWeakCompareAndSwapP:
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN:
- return true;
- case Op_ShenandoahLoadReferenceBarrier:
- assert(false, "should have been expanded already");
- return true;
- default:
- return false;
- }
-}
-
-bool ShenandoahBarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const {
- switch (opcode) {
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN:
- conn_graph->add_objload_to_connection_graph(n, delayed_worklist);
- // fallthrough
- case Op_ShenandoahWeakCompareAndSwapP:
- case Op_ShenandoahWeakCompareAndSwapN:
- case Op_ShenandoahCompareAndSwapP:
- case Op_ShenandoahCompareAndSwapN:
- conn_graph->add_to_congraph_unsafe_access(n, opcode, delayed_worklist);
- return true;
- case Op_StoreP: {
- Node* adr = n->in(MemNode::Address);
- const Type* adr_type = gvn->type(adr);
- // Pointer stores in Shenandoah barriers looks like unsafe access.
- // Ignore such stores to be able scalar replace non-escaping
- // allocations.
- if (adr_type->isa_rawptr() && adr->is_AddP()) {
- Node* base = conn_graph->get_addp_base(adr);
- if (base->Opcode() == Op_LoadP &&
- base->in(MemNode::Address)->is_AddP()) {
- adr = base->in(MemNode::Address);
- Node* tls = conn_graph->get_addp_base(adr);
- if (tls->Opcode() == Op_ThreadLocal) {
- int offs = (int) gvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
- const int buf_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset());
- if (offs == buf_offset) {
- return true; // Pre barrier previous oop value store.
- }
- }
- }
- }
- return false;
- }
- case Op_ShenandoahLoadReferenceBarrier:
- conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(ShenandoahLoadReferenceBarrierNode::ValueIn), delayed_worklist);
- return true;
- default:
- // Nothing
- break;
- }
- return false;
-}
-
-bool ShenandoahBarrierSetC2::escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const {
- switch (opcode) {
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN: {
- Node *adr = n->in(MemNode::Address);
- conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, adr, nullptr);
- // fallthrough
- }
- case Op_ShenandoahCompareAndSwapP:
- case Op_ShenandoahCompareAndSwapN:
- case Op_ShenandoahWeakCompareAndSwapP:
- case Op_ShenandoahWeakCompareAndSwapN:
- return conn_graph->add_final_edges_unsafe_access(n, opcode);
- case Op_ShenandoahLoadReferenceBarrier:
- conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(ShenandoahLoadReferenceBarrierNode::ValueIn), nullptr);
- return true;
- default:
- // Nothing
- break;
- }
- return false;
-}
-
-bool ShenandoahBarrierSetC2::escape_has_out_with_unsafe_object(Node* n) const {
- return n->has_out_with(Op_ShenandoahCompareAndExchangeP) || n->has_out_with(Op_ShenandoahCompareAndExchangeN) ||
- n->has_out_with(Op_ShenandoahCompareAndSwapP, Op_ShenandoahCompareAndSwapN, Op_ShenandoahWeakCompareAndSwapP, Op_ShenandoahWeakCompareAndSwapN);
-
-}
-
-bool ShenandoahBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const {
- switch (opcode) {
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN:
- case Op_ShenandoahWeakCompareAndSwapP:
- case Op_ShenandoahWeakCompareAndSwapN:
- case Op_ShenandoahCompareAndSwapP:
- case Op_ShenandoahCompareAndSwapN: { // Convert trinary to binary-tree
- Node* newval = n->in(MemNode::ValueIn);
- Node* oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
- Node* pair = new BinaryNode(oldval, newval);
- n->set_req(MemNode::ValueIn,pair);
- n->del_req(LoadStoreConditionalNode::ExpectedIn);
- return true;
- }
- default:
- break;
- }
- return false;
-}
-
-bool ShenandoahBarrierSetC2::matcher_is_store_load_barrier(Node* x, uint xop) const {
- return xop == Op_ShenandoahCompareAndExchangeP ||
- xop == Op_ShenandoahCompareAndExchangeN ||
- xop == Op_ShenandoahWeakCompareAndSwapP ||
- xop == Op_ShenandoahWeakCompareAndSwapN ||
- xop == Op_ShenandoahCompareAndSwapN ||
- xop == Op_ShenandoahCompareAndSwapP;
+bool ShenandoahBarrierSetC2State::needs_livein_data() const {
+ return true;
}
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp
index c77a9da63fc..d18ebe26853 100644
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp
@@ -26,68 +26,79 @@
#define SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP
#include "gc/shared/c2/barrierSetC2.hpp"
-#include "gc/shenandoah/c2/shenandoahSupport.hpp"
-#include "utilities/growableArray.hpp"
-class ShenandoahBarrierSetC2State : public ArenaObj {
-private:
- GrowableArray* _load_reference_barriers;
+static const uint8_t ShenandoahBitStrong = 1 << 0; // Barrier: LRB, strong
+static const uint8_t ShenandoahBitWeak = 1 << 1; // Barrier: LRB, weak
+static const uint8_t ShenandoahBitPhantom = 1 << 2; // Barrier: LRB, phantom
+static const uint8_t ShenandoahBitKeepAlive = 1 << 3; // Barrier: KeepAlive (SATB for stores, KA for loads)
+static const uint8_t ShenandoahBitCardMark = 1 << 4; // Barrier: CM
+static const uint8_t ShenandoahBitNotNull = 1 << 5; // Metadata: src/dst is definitely not null
+static const uint8_t ShenandoahBitNative = 1 << 6; // Metadata: access is in native, not in heap
+static const uint8_t ShenandoahBitElided = 1 << 7; // Metadata: some part of the barrier is elided
+
+// Barrier data that implies real barriers, not additional metadata.
+static const uint8_t ShenandoahBitsReal = ShenandoahBitStrong | ShenandoahBitWeak | ShenandoahBitPhantom |
+ ShenandoahBitKeepAlive |
+ ShenandoahBitCardMark;
+
+class MachNode;
+class ShenandoahBarrierStubC2;
+
+class ShenandoahBarrierSetC2State : public BarrierSetC2State {
+ GrowableArray* _stubs;
+ int _trampoline_stubs_count;
+ int _stubs_start_offset;
+ int _stubs_current_total_size;
public:
- ShenandoahBarrierSetC2State(Arena* comp_arena);
+ explicit ShenandoahBarrierSetC2State(Arena* comp_arena);
- int load_reference_barriers_count() const;
- ShenandoahLoadReferenceBarrierNode* load_reference_barrier(int idx) const;
- void add_load_reference_barrier(ShenandoahLoadReferenceBarrierNode* n);
- void remove_load_reference_barrier(ShenandoahLoadReferenceBarrierNode * n);
+ bool needs_liveness_data(const MachNode* mach) const override;
+ bool needs_livein_data() const override;
+
+ GrowableArray* stubs() {
+ return _stubs;
+ }
+
+ void inc_trampoline_stubs_count() {
+ assert(_trampoline_stubs_count != INT_MAX, "Overflow");
+ ++_trampoline_stubs_count;
+ }
+
+ int trampoline_stubs_count() {
+ return _trampoline_stubs_count;
+ }
+
+ void set_stubs_start_offset(int offset) {
+ _stubs_start_offset = offset;
+ }
+
+ int stubs_start_offset() {
+ return _stubs_start_offset;
+ }
+
+ int inc_stubs_current_total_size(int size) {
+ _stubs_current_total_size += size;
+ return _stubs_current_total_size;
+ }
+
+ int stubs_current_total_size() {
+ return _stubs_current_total_size;
+ }
};
class ShenandoahBarrierSetC2 : public BarrierSetC2 {
-private:
- void shenandoah_eliminate_wb_pre(Node* call, PhaseIterGVN* igvn) const;
- bool satb_can_remove_pre_barrier(GraphKit* kit, PhaseGVN* phase, Node* adr,
- BasicType bt, uint adr_idx) const;
- void satb_write_barrier_pre(GraphKit* kit, bool do_load,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const;
+ static bool clone_needs_barrier(const TypeOopPtr* src_type, bool& is_oop_array);
- void shenandoah_write_barrier_pre(GraphKit* kit,
- bool do_load,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const;
+ static bool can_remove_load_barrier(Node* node);
- void post_barrier(GraphKit* kit,
- Node* ctl,
- Node* store,
- Node* obj,
- Node* adr,
- uint adr_idx,
- Node* val,
- BasicType bt,
- bool use_precise) const;
+ static uint8_t refine_load(Node* node, uint8_t bd);
+ static uint8_t refine_store(Node* node, uint8_t bd);
- void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
- Node* pre_val, bool need_mem_bar) const;
-
- static bool clone_needs_barrier(Node* src, PhaseGVN& gvn);
-
- static const TypeFunc* _write_barrier_pre_Type;
- static const TypeFunc* _clone_barrier_Type;
- static const TypeFunc* _load_reference_barrier_Type;
- static void make_write_barrier_pre_Type();
- static void make_clone_barrier_Type();
- static void make_load_reference_barrier_Type();
+ static bool is_Load(int opcode);
+ static bool is_Store(int opcode);
+ static bool is_LoadStore(int opcode);
protected:
virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
@@ -101,64 +112,128 @@ protected:
public:
static ShenandoahBarrierSetC2* bsc2();
- static bool is_shenandoah_wb_pre_call(Node* call);
- static bool is_shenandoah_clone_call(Node* call);
- static bool is_shenandoah_lrb_call(Node* call);
- static bool is_shenandoah_marking_if(PhaseValues* phase, Node* n);
- static bool is_shenandoah_state_load(Node* n);
- static bool has_only_shenandoah_wb_pre_uses(Node* n);
-
ShenandoahBarrierSetC2State* state() const;
- static const TypeFunc* write_barrier_pre_Type();
- static const TypeFunc* clone_barrier_Type();
- static const TypeFunc* load_reference_barrier_Type();
- static void init();
-
- virtual bool has_load_barrier_nodes() const { return true; }
-
// This is the entry-point for the backend to perform accesses through the Access API.
+ virtual void clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const;
virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
// These are general helper methods used by C2
- virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const;
-
- // Support for GC barriers emitted during parsing
- virtual bool is_gc_pre_barrier_node(Node* node) const;
- virtual bool is_gc_barrier_node(Node* node) const;
- virtual Node* step_over_gc_barrier(Node* c) const;
- virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
- virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const;
- virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return mode == LoopOptsShenandoahExpand; }
- virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return mode == LoopOptsShenandoahExpand; }
+ virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone,
+ bool is_clone_instance, ArrayCopyPhase phase) const;
// Support for macro expanded GC barriers
- virtual void register_potential_barrier_node(Node* node) const;
- virtual void unregister_potential_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
- virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
- virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
+ virtual void eliminate_gc_barrier_data(Node* node) const;
// Allow barrier sets to have shared state that is preserved across a compilation unit.
// This could for example comprise macro nodes to be expanded during macro expansion.
virtual void* create_barrier_state(Arena* comp_arena) const;
- // If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be
- // expanded later, then now is the time to do so.
- virtual bool expand_macro_nodes(PhaseMacroExpand* macro) const;
#ifdef ASSERT
virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
+ static void verify_gc_barrier_assert(bool cond, const char* msg, uint8_t bd, Node* n);
#endif
- virtual Node* ideal_node(PhaseGVN* phase, Node* n, bool can_reshape) const;
- virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode, Unique_Node_List& dead_nodes) const;
+ virtual int estimate_stub_size() const;
+ virtual void emit_stubs(CodeBuffer& cb) const;
+ virtual void late_barrier_analysis() const {
+ compute_liveness_at_stubs();
+ analyze_dominating_barriers();
+ }
- virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const;
- virtual bool escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const;
- virtual bool escape_has_out_with_unsafe_object(Node* n) const;
+ virtual void elide_dominated_barrier(MachNode* mach, MachNode* dominator) const;
+ virtual void analyze_dominating_barriers() const;
+ virtual void final_refinement(Compile* C) const;
- virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const;
- virtual bool matcher_is_store_load_barrier(Node* x, uint xop) const;
+ virtual uint estimated_barrier_size(const Node* node) const;
+
+ static void print_barrier_data(outputStream* os, uint8_t data);
};
+class ShenandoahBarrierStubC2 : public BarrierStubC2 {
+ Register _obj;
+ Address const _addr;
+ Register const _tmp1;
+ Register const _tmp2;
+ const bool _do_load;
+ const bool _narrow;
+ const bool _needs_load_ref_barrier;
+ const bool _needs_load_ref_weak_barrier;
+ const bool _needs_keep_alive_barrier;
+ bool _needs_far_jump;
+
+ static void register_stub(ShenandoahBarrierStubC2* stub);
+
+ int available_gp_registers();
+ bool is_live_register(Register reg);
+ bool is_special_register(Register reg);
+ Register select_temp_register(bool& selected_live, Register skip_reg1 = noreg, Register skip_reg2 = noreg);
+
+ void maybe_far_jump_if_zero(MacroAssembler& masm, Register reg);
+
+ void enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp);
+
+ void keepalive(MacroAssembler& masm, Label* L_done);
+ void lrb(MacroAssembler& masm);
+
+ static void cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2);
+
+ address keepalive_runtime_entry_addr();
+ address lrb_runtime_entry_addr();
+
+ static ShenandoahBarrierStubC2* create(const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow, bool do_load);
+ void post_init();
+
+ ShenandoahBarrierStubC2(const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow, bool do_load) :
+ BarrierStubC2(node),
+ _obj(obj),
+ _addr(addr),
+ _tmp1(tmp1),
+ _tmp2(tmp2),
+ _do_load(do_load),
+ _narrow(narrow),
+ _needs_load_ref_barrier(needs_load_ref_barrier(node)),
+ _needs_load_ref_weak_barrier(needs_load_ref_barrier_weak(node)),
+ _needs_keep_alive_barrier(needs_keep_alive_barrier(node)),
+ _needs_far_jump() {
+ assert(!_narrow || is_heap_access(node), "Only heap accesses can be narrow");
+ if (_tmp1 != noreg && _tmp2 != noreg) {
+ assert_different_registers(_tmp1, _tmp2, _obj, _addr.base(), _addr.index());
+ } else {
+ assert(_tmp1 == _tmp2, "should both be noreg");
+ assert_different_registers(_obj, _addr.base(), _addr.index());
+ }
+ post_init();
+ }
+
+ static bool is_heap_access(const MachNode* node) {
+ return (node->barrier_data() & ShenandoahBitNative) == 0;
+ }
+ static bool needs_load_ref_barrier(const MachNode* node) {
+ return (node->barrier_data() & (ShenandoahBitStrong | ShenandoahBitWeak | ShenandoahBitPhantom)) != 0;
+ }
+ static bool needs_load_ref_barrier_weak(const MachNode* node) {
+ return (node->barrier_data() & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0;
+ }
+ static bool needs_keep_alive_barrier(const MachNode* node) {
+ return (node->barrier_data() & ShenandoahBitKeepAlive) != 0;
+ }
+ static bool needs_card_barrier(const MachNode* node) {
+ return (node->barrier_data() & ShenandoahBitCardMark) != 0;
+ }
+
+public:
+ static bool needs_slow_barrier(const MachNode* node) {
+ return needs_load_ref_barrier(node) || needs_keep_alive_barrier(node);
+ }
+
+ static void load_post(MacroAssembler* masm, const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow);
+ static void store_pre(MacroAssembler* masm, const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow);
+ static void store_post(MacroAssembler* masm, const MachNode* node, Address addr, Register tmp1, Register tmp2);
+ static void load_store_pre(MacroAssembler* masm, const MachNode* node, Register obj, Address addr, Register tmp1, Register tmp2, bool narrow);
+ static void load_store_post(MacroAssembler* masm, const MachNode* node, Address addr, Register tmp1, Register tmp2);
+
+ void emit_code(MacroAssembler& masm);
+};
#endif // SHARE_GC_SHENANDOAH_C2_SHENANDOAHBARRIERSETC2_HPP
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
deleted file mode 100644
index 86de9eef459..00000000000
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
+++ /dev/null
@@ -1,2533 +0,0 @@
-/*
- * Copyright (c) 2015, 2026, Red Hat, Inc. All rights reserved.
- * Copyright (C) 2022, Tencent. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-
-#include "classfile/javaClasses.hpp"
-#include "code/aotCodeCache.hpp"
-#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
-#include "gc/shenandoah/c2/shenandoahSupport.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-#include "gc/shenandoah/shenandoahForwarding.hpp"
-#include "gc/shenandoah/shenandoahHeap.hpp"
-#include "gc/shenandoah/shenandoahHeapRegion.hpp"
-#include "gc/shenandoah/shenandoahRuntime.hpp"
-#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
-#include "opto/arraycopynode.hpp"
-#include "opto/block.hpp"
-#include "opto/callnode.hpp"
-#include "opto/castnode.hpp"
-#include "opto/movenode.hpp"
-#include "opto/phaseX.hpp"
-#include "opto/rootnode.hpp"
-#include "opto/runtime.hpp"
-#include "opto/subnode.hpp"
-
-bool ShenandoahBarrierC2Support::expand(Compile* C, PhaseIterGVN& igvn) {
- ShenandoahBarrierSetC2State* state = ShenandoahBarrierSetC2::bsc2()->state();
- if (state->load_reference_barriers_count() > 0) {
- assert(C->post_loop_opts_phase(), "no loop opts allowed");
- C->reset_post_loop_opts_phase(); // ... but we know what we are doing
- C->clear_major_progress();
- PhaseIdealLoop::optimize(igvn, LoopOptsShenandoahExpand);
- if (C->failing()) return false;
- C->process_for_post_loop_opts_igvn(igvn);
- if (C->failing()) return false;
-
- C->set_post_loop_opts_phase(); // now for real!
- }
- return true;
-}
-
-bool ShenandoahBarrierC2Support::is_gc_state_test(Node* iff, int mask) {
- if (!UseShenandoahGC) {
- return false;
- }
- assert(iff->is_If(), "bad input");
- if (iff->Opcode() != Op_If) {
- return false;
- }
- Node* bol = iff->in(1);
- if (!bol->is_Bool() || bol->as_Bool()->_test._test != BoolTest::ne) {
- return false;
- }
- Node* cmp = bol->in(1);
- if (cmp->Opcode() != Op_CmpI) {
- return false;
- }
- Node* in1 = cmp->in(1);
- Node* in2 = cmp->in(2);
- if (in2->find_int_con(-1) != 0) {
- return false;
- }
- if (in1->Opcode() != Op_AndI) {
- return false;
- }
- in2 = in1->in(2);
- if (in2->find_int_con(-1) != mask) {
- return false;
- }
- in1 = in1->in(1);
-
- return is_gc_state_load(in1);
-}
-
-bool ShenandoahBarrierC2Support::is_heap_stable_test(Node* iff) {
- return is_gc_state_test(iff, ShenandoahHeap::HAS_FORWARDED);
-}
-
-bool ShenandoahBarrierC2Support::is_gc_state_load(Node *n) {
- if (!UseShenandoahGC) {
- return false;
- }
- if (n->Opcode() != Op_LoadB && n->Opcode() != Op_LoadUB) {
- return false;
- }
- Node* addp = n->in(MemNode::Address);
- if (!addp->is_AddP()) {
- return false;
- }
- Node* base = addp->in(AddPNode::Address);
- Node* off = addp->in(AddPNode::Offset);
- if (base->Opcode() != Op_ThreadLocal) {
- return false;
- }
- if (off->find_intptr_t_con(-1) != in_bytes(ShenandoahThreadLocalData::gc_state_offset())) {
- return false;
- }
- return true;
-}
-
-bool ShenandoahBarrierC2Support::has_safepoint_between(Node* start, Node* stop, PhaseIdealLoop *phase) {
- assert(phase->is_dominator(stop, start), "bad inputs");
- ResourceMark rm;
- Unique_Node_List wq;
- wq.push(start);
- for (uint next = 0; next < wq.size(); next++) {
- Node *m = wq.at(next);
- if (m == stop) {
- continue;
- }
- if (m->is_SafePoint() && !m->is_CallLeaf()) {
- return true;
- }
- if (m->is_Region()) {
- for (uint i = 1; i < m->req(); i++) {
- wq.push(m->in(i));
- }
- } else {
- wq.push(m->in(0));
- }
- }
- return false;
-}
-
-#ifdef ASSERT
-bool ShenandoahBarrierC2Support::verify_helper(Node* in, Node_Stack& phis, VectorSet& visited, verify_type t, bool trace, Unique_Node_List& barriers_used) {
- assert(phis.size() == 0, "");
-
- while (true) {
- if (in->bottom_type() == TypePtr::NULL_PTR) {
- if (trace) {tty->print_cr("null");}
- } else if (!in->bottom_type()->make_ptr()->make_oopptr()) {
- if (trace) {tty->print_cr("Non oop");}
- } else {
- if (in->is_ConstraintCast()) {
- in = in->in(1);
- continue;
- } else if (in->is_AddP()) {
- assert(!in->in(AddPNode::Address)->is_top(), "no raw memory access");
- in = in->in(AddPNode::Address);
- continue;
- } else if (in->is_Con()) {
- if (trace) {
- tty->print("Found constant");
- in->dump();
- }
- } else if (in->Opcode() == Op_Parm) {
- if (trace) {
- tty->print("Found argument");
- }
- } else if (in->Opcode() == Op_CreateEx) {
- if (trace) {
- tty->print("Found create-exception");
- }
- } else if (in->Opcode() == Op_LoadP && in->adr_type() == TypeRawPtr::BOTTOM) {
- if (trace) {
- tty->print("Found raw LoadP (OSR argument?)");
- }
- } else if (in->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
- if (t == ShenandoahOopStore) {
- return false;
- }
- barriers_used.push(in);
- if (trace) {tty->print("Found barrier"); in->dump();}
- } else if (in->is_Proj() && in->in(0)->is_Allocate()) {
- if (trace) {
- tty->print("Found alloc");
- in->in(0)->dump();
- }
- } else if (in->is_Proj() && (in->in(0)->Opcode() == Op_CallStaticJava || in->in(0)->Opcode() == Op_CallDynamicJava)) {
- if (trace) {
- tty->print("Found Java call");
- }
- } else if (in->is_Phi()) {
- if (!visited.test_set(in->_idx)) {
- if (trace) {tty->print("Pushed phi:"); in->dump();}
- phis.push(in, 2);
- in = in->in(1);
- continue;
- }
- if (trace) {tty->print("Already seen phi:"); in->dump();}
- } else if (in->Opcode() == Op_CMoveP || in->Opcode() == Op_CMoveN) {
- if (!visited.test_set(in->_idx)) {
- if (trace) {tty->print("Pushed cmovep:"); in->dump();}
- phis.push(in, CMoveNode::IfTrue);
- in = in->in(CMoveNode::IfFalse);
- continue;
- }
- if (trace) {tty->print("Already seen cmovep:"); in->dump();}
- } else if (in->Opcode() == Op_EncodeP || in->Opcode() == Op_DecodeN) {
- in = in->in(1);
- continue;
- } else {
- return false;
- }
- }
- bool cont = false;
- while (phis.is_nonempty()) {
- uint idx = phis.index();
- Node* phi = phis.node();
- if (idx >= phi->req()) {
- if (trace) {tty->print("Popped phi:"); phi->dump();}
- phis.pop();
- continue;
- }
- if (trace) {tty->print("Next entry(%d) for phi:", idx); phi->dump();}
- in = phi->in(idx);
- phis.set_index(idx+1);
- cont = true;
- break;
- }
- if (!cont) {
- break;
- }
- }
- return true;
-}
-
-void ShenandoahBarrierC2Support::report_verify_failure(const char* msg, Node* n1, Node* n2) {
- if (n1 != nullptr) {
- n1->dump(+10);
- }
- if (n2 != nullptr) {
- n2->dump(+10);
- }
- fatal("%s", msg);
-}
-
-void ShenandoahBarrierC2Support::verify(RootNode* root) {
- ResourceMark rm;
- Unique_Node_List wq;
- GrowableArray barriers;
- Unique_Node_List barriers_used;
- Node_Stack phis(0);
- VectorSet visited;
- const bool trace = false;
- const bool verify_no_useless_barrier = false;
-
- wq.push(root);
- for (uint next = 0; next < wq.size(); next++) {
- Node *n = wq.at(next);
- if (n->is_Load()) {
- const bool trace = false;
- if (trace) {tty->print("Verifying"); n->dump();}
- if (n->Opcode() == Op_LoadRange || n->Opcode() == Op_LoadKlass || n->Opcode() == Op_LoadNKlass) {
- if (trace) {tty->print_cr("Load range/klass");}
- } else {
- const TypePtr* adr_type = n->as_Load()->adr_type();
-
- if (adr_type->isa_oopptr() && adr_type->is_oopptr()->offset() == oopDesc::mark_offset_in_bytes()) {
- if (trace) {tty->print_cr("Mark load");}
- } else if (adr_type->isa_instptr() &&
- adr_type->is_instptr()->instance_klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
- adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset()) {
- if (trace) {tty->print_cr("Reference.get()");}
- } else if (!verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahLoad, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: Load should have barriers", n);
- }
- }
- } else if (n->is_Store()) {
- const bool trace = false;
-
- if (trace) {tty->print("Verifying"); n->dump();}
- if (n->in(MemNode::ValueIn)->bottom_type()->make_oopptr()) {
- Node* adr = n->in(MemNode::Address);
- bool verify = true;
-
- if (adr->is_AddP() && adr->in(AddPNode::Base)->is_top()) {
- adr = adr->in(AddPNode::Address);
- if (adr->is_AddP()) {
- assert(adr->in(AddPNode::Base)->is_top(), "");
- adr = adr->in(AddPNode::Address);
- if (adr->Opcode() == Op_LoadP &&
- adr->in(MemNode::Address)->in(AddPNode::Base)->is_top() &&
- adr->in(MemNode::Address)->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
- adr->in(MemNode::Address)->in(AddPNode::Offset)->find_intptr_t_con(-1) == in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())) {
- if (trace) {tty->print_cr("SATB prebarrier");}
- verify = false;
- }
- }
- }
-
- if (verify && !verify_helper(n->in(MemNode::ValueIn), phis, visited, ShenandoahValue, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: Store should have barriers", n);
- }
- }
- if (!verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahStore, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: Store (address) should have barriers", n);
- }
- } else if (n->Opcode() == Op_CmpP) {
- const bool trace = false;
-
- Node* in1 = n->in(1);
- Node* in2 = n->in(2);
- if (in1->bottom_type()->isa_oopptr()) {
- if (trace) {tty->print("Verifying"); n->dump();}
-
- bool mark_inputs = false;
- if (in1->bottom_type() == TypePtr::NULL_PTR || in2->bottom_type() == TypePtr::NULL_PTR ||
- (in1->is_Con() || in2->is_Con())) {
- if (trace) {tty->print_cr("Comparison against a constant");}
- mark_inputs = true;
- } else if ((in1->is_CheckCastPP() && in1->in(1)->is_Proj() && in1->in(1)->in(0)->is_Allocate()) ||
- (in2->is_CheckCastPP() && in2->in(1)->is_Proj() && in2->in(1)->in(0)->is_Allocate())) {
- if (trace) {tty->print_cr("Comparison with newly alloc'ed object");}
- mark_inputs = true;
- } else {
- assert(in2->bottom_type()->isa_oopptr(), "");
-
- if (!verify_helper(in1, phis, visited, ShenandoahStore, trace, barriers_used) ||
- !verify_helper(in2, phis, visited, ShenandoahStore, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: Cmp should have barriers", n);
- }
- }
- if (verify_no_useless_barrier &&
- mark_inputs &&
- (!verify_helper(in1, phis, visited, ShenandoahValue, trace, barriers_used) ||
- !verify_helper(in2, phis, visited, ShenandoahValue, trace, barriers_used))) {
- phis.clear();
- visited.reset();
- }
- }
- } else if (n->is_LoadStore()) {
- if (n->in(MemNode::ValueIn)->bottom_type()->make_ptr() &&
- !verify_helper(n->in(MemNode::ValueIn), phis, visited, ShenandoahValue, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: LoadStore (value) should have barriers", n);
- }
-
- if (n->in(MemNode::Address)->bottom_type()->make_oopptr() && !verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahStore, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: LoadStore (address) should have barriers", n);
- }
- } else if (n->Opcode() == Op_CallLeafNoFP || n->Opcode() == Op_CallLeaf) {
- CallNode* call = n->as_Call();
-
- static struct {
- const char* name;
- struct {
- int pos;
- verify_type t;
- } args[6];
- } calls[] = {
- "array_partition_stub",
- { { TypeFunc::Parms, ShenandoahStore }, { TypeFunc::Parms+4, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone }, { -1, ShenandoahNone }, { -1, ShenandoahNone } },
- "arraysort_stub",
- { { TypeFunc::Parms, ShenandoahStore }, { -1, ShenandoahNone }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "aescrypt_encryptBlock",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "aescrypt_decryptBlock",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "multiplyToLen",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad }, { TypeFunc::Parms+4, ShenandoahStore },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "squareToLen",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad }, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "montgomery_multiply",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad },
- { TypeFunc::Parms+6, ShenandoahStore }, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "montgomery_square",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+5, ShenandoahStore },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "mulAdd",
- { { TypeFunc::Parms, ShenandoahStore }, { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "vectorizedMismatch",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "updateBytesCRC32",
- { { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "updateBytesAdler32",
- { { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "updateBytesCRC32C",
- { { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahLoad}, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "counterMode_AESCrypt",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },
- { TypeFunc::Parms+3, ShenandoahStore }, { TypeFunc::Parms+5, ShenandoahStore }, { TypeFunc::Parms+6, ShenandoahStore } },
- "cipherBlockChaining_encryptAESCrypt",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },
- { TypeFunc::Parms+3, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "cipherBlockChaining_decryptAESCrypt",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },
- { TypeFunc::Parms+3, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "shenandoah_clone",
- { { TypeFunc::Parms, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone},
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "ghash_processBlocks",
- { { TypeFunc::Parms, ShenandoahStore }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "sha1_implCompress",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "sha256_implCompress",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "sha512_implCompress",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "sha1_implCompressMB",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "sha256_implCompressMB",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "sha512_implCompressMB",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "encodeBlock",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "decodeBlock",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahStore }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "intpoly_montgomeryMult_P256",
- { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahStore },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- "intpoly_assign",
- { { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, { -1, ShenandoahNone },
- { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
- };
-
- if (call->is_call_to_arraycopystub()) {
- Node* dest = nullptr;
- const TypeTuple* args = n->as_Call()->_tf->domain();
- for (uint i = TypeFunc::Parms, j = 0; i < args->cnt(); i++) {
- if (args->field_at(i)->isa_ptr()) {
- j++;
- if (j == 2) {
- dest = n->in(i);
- break;
- }
- }
- }
- if (!verify_helper(n->in(TypeFunc::Parms), phis, visited, ShenandoahLoad, trace, barriers_used) ||
- !verify_helper(dest, phis, visited, ShenandoahStore, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: ArrayCopy should have barriers", n);
- }
- } else if (strlen(call->_name) > 5 &&
- !strcmp(call->_name + strlen(call->_name) - 5, "_fill")) {
- if (!verify_helper(n->in(TypeFunc::Parms), phis, visited, ShenandoahStore, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: _fill should have barriers", n);
- }
- } else if (!strcmp(call->_name, "shenandoah_wb_pre")) {
- // skip
- } else {
- const int calls_len = sizeof(calls) / sizeof(calls[0]);
- int i = 0;
- for (; i < calls_len; i++) {
- if (!strcmp(calls[i].name, call->_name)) {
- break;
- }
- }
- if (i != calls_len) {
- const uint args_len = sizeof(calls[0].args) / sizeof(calls[0].args[0]);
- for (uint j = 0; j < args_len; j++) {
- int pos = calls[i].args[j].pos;
- if (pos == -1) {
- break;
- }
- if (!verify_helper(call->in(pos), phis, visited, calls[i].args[j].t, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: intrinsic calls should have barriers", n);
- }
- }
- for (uint j = TypeFunc::Parms; j < call->req(); j++) {
- if (call->in(j)->bottom_type()->make_ptr() &&
- call->in(j)->bottom_type()->make_ptr()->isa_oopptr()) {
- uint k = 0;
- for (; k < args_len && calls[i].args[k].pos != (int)j; k++);
- if (k == args_len) {
- fatal("arg %d for call %s not covered", j, call->_name);
- }
- }
- }
- } else {
- for (uint j = TypeFunc::Parms; j < call->req(); j++) {
- if (call->in(j)->bottom_type()->make_ptr() &&
- call->in(j)->bottom_type()->make_ptr()->isa_oopptr()) {
- fatal("%s not covered", call->_name);
- }
- }
- }
- }
- } else if (n->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
- // skip
- } else if (n->is_AddP()
- || n->is_Phi()
- || n->is_ConstraintCast()
- || n->Opcode() == Op_Return
- || n->Opcode() == Op_CMoveP
- || n->Opcode() == Op_CMoveN
- || n->Opcode() == Op_Rethrow
- || n->is_MemBar()
- || n->Opcode() == Op_Conv2B
- || n->Opcode() == Op_SafePoint
- || n->is_CallJava()
- || n->Opcode() == Op_Unlock
- || n->Opcode() == Op_EncodeP
- || n->Opcode() == Op_DecodeN) {
- // nothing to do
- } else {
- static struct {
- int opcode;
- struct {
- int pos;
- verify_type t;
- } inputs[2];
- } others[] = {
- Op_FastLock,
- { { 1, ShenandoahLoad }, { -1, ShenandoahNone} },
- Op_Lock,
- { { TypeFunc::Parms, ShenandoahLoad }, { -1, ShenandoahNone} },
- Op_ArrayCopy,
- { { ArrayCopyNode::Src, ShenandoahLoad }, { ArrayCopyNode::Dest, ShenandoahStore } },
- Op_StrCompressedCopy,
- { { 2, ShenandoahLoad }, { 3, ShenandoahStore } },
- Op_StrInflatedCopy,
- { { 2, ShenandoahLoad }, { 3, ShenandoahStore } },
- Op_AryEq,
- { { 2, ShenandoahLoad }, { 3, ShenandoahLoad } },
- Op_StrIndexOf,
- { { 2, ShenandoahLoad }, { 4, ShenandoahLoad } },
- Op_StrComp,
- { { 2, ShenandoahLoad }, { 4, ShenandoahLoad } },
- Op_StrEquals,
- { { 2, ShenandoahLoad }, { 3, ShenandoahLoad } },
- Op_VectorizedHashCode,
- { { 2, ShenandoahLoad }, { -1, ShenandoahNone } },
- Op_EncodeISOArray,
- { { 2, ShenandoahLoad }, { 3, ShenandoahStore } },
- Op_CountPositives,
- { { 2, ShenandoahLoad }, { -1, ShenandoahNone} },
- Op_CastP2X,
- { { 1, ShenandoahLoad }, { -1, ShenandoahNone} },
- Op_StrIndexOfChar,
- { { 2, ShenandoahLoad }, { -1, ShenandoahNone } },
- };
-
- const int others_len = sizeof(others) / sizeof(others[0]);
- int i = 0;
- for (; i < others_len; i++) {
- if (others[i].opcode == n->Opcode()) {
- break;
- }
- }
- uint stop = n->is_Call() ? n->as_Call()->tf()->domain()->cnt() : n->req();
- if (i != others_len) {
- const uint inputs_len = sizeof(others[0].inputs) / sizeof(others[0].inputs[0]);
- for (uint j = 0; j < inputs_len; j++) {
- int pos = others[i].inputs[j].pos;
- if (pos == -1) {
- break;
- }
- if (!verify_helper(n->in(pos), phis, visited, others[i].inputs[j].t, trace, barriers_used)) {
- report_verify_failure("Shenandoah verification: intrinsic calls should have barriers", n);
- }
- }
- for (uint j = 1; j < stop; j++) {
- if (n->in(j) != nullptr && n->in(j)->bottom_type()->make_ptr() &&
- n->in(j)->bottom_type()->make_ptr()->make_oopptr()) {
- uint k = 0;
- for (; k < inputs_len && others[i].inputs[k].pos != (int)j; k++);
- if (k == inputs_len) {
- fatal("arg %d for node %s not covered", j, n->Name());
- }
- }
- }
- } else {
- for (uint j = 1; j < stop; j++) {
- if (n->in(j) != nullptr && n->in(j)->bottom_type()->make_ptr() &&
- n->in(j)->bottom_type()->make_ptr()->make_oopptr()) {
- fatal("%s not covered", n->Name());
- }
- }
- }
- }
-
- if (n->is_SafePoint()) {
- SafePointNode* sfpt = n->as_SafePoint();
- if (verify_no_useless_barrier && sfpt->jvms() != nullptr) {
- for (uint i = sfpt->jvms()->scloff(); i < sfpt->jvms()->endoff(); i++) {
- if (!verify_helper(sfpt->in(i), phis, visited, ShenandoahLoad, trace, barriers_used)) {
- phis.clear();
- visited.reset();
- }
- }
- }
- }
- }
-
- if (verify_no_useless_barrier) {
- for (int i = 0; i < barriers.length(); i++) {
- Node* n = barriers.at(i);
- if (!barriers_used.member(n)) {
- tty->print("XXX useless barrier"); n->dump(-2);
- ShouldNotReachHere();
- }
- }
- }
-}
-#endif
-
-bool ShenandoahBarrierC2Support::is_anti_dependent_load_at_control(PhaseIdealLoop* phase, Node* maybe_load, Node* store,
- Node* control) {
- return maybe_load->is_Load() && phase->C->can_alias(store->adr_type(), phase->C->get_alias_index(maybe_load->adr_type())) &&
- phase->ctrl_or_self(maybe_load) == control;
-}
-
-void ShenandoahBarrierC2Support::maybe_push_anti_dependent_loads(PhaseIdealLoop* phase, Node* maybe_store, Node* control, Unique_Node_List &wq) {
- if (!maybe_store->is_Store() && !maybe_store->is_LoadStore()) {
- return;
- }
- Node* mem = maybe_store->in(MemNode::Memory);
- for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
- Node* u = mem->fast_out(i);
- if (is_anti_dependent_load_at_control(phase, u, maybe_store, control)) {
- wq.push(u);
- }
- }
-}
-
-void ShenandoahBarrierC2Support::push_data_inputs_at_control(PhaseIdealLoop* phase, Node* n, Node* ctrl, Unique_Node_List &wq) {
- for (uint i = 0; i < n->req(); i++) {
- Node* in = n->in(i);
- if (in != nullptr && phase->has_ctrl(in) && phase->get_ctrl(in) == ctrl) {
- wq.push(in);
- }
- }
-}
-
-bool ShenandoahBarrierC2Support::is_dominator_same_ctrl(Node* c, Node* d, Node* n, PhaseIdealLoop* phase) {
- // That both nodes have the same control is not sufficient to prove
- // domination, verify that there's no path from d to n
- ResourceMark rm;
- Unique_Node_List wq;
- wq.push(d);
- for (uint next = 0; next < wq.size(); next++) {
- Node *m = wq.at(next);
- if (m == n) {
- return false;
- }
- if (m->is_Phi() && m->in(0)->is_Loop()) {
- assert(phase->ctrl_or_self(m->in(LoopNode::EntryControl)) != c, "following loop entry should lead to new control");
- } else {
- // Take anti-dependencies into account
- maybe_push_anti_dependent_loads(phase, m, c, wq);
- push_data_inputs_at_control(phase, m, c, wq);
- }
- }
- return true;
-}
-
-bool ShenandoahBarrierC2Support::is_dominator(Node* d_c, Node* n_c, Node* d, Node* n, PhaseIdealLoop* phase) {
- if (d_c != n_c) {
- return phase->is_dominator(d_c, n_c);
- }
- return is_dominator_same_ctrl(d_c, d, n, phase);
-}
-
-Node* next_mem(Node* mem, int alias) {
- Node* res = nullptr;
- if (mem->is_Proj()) {
- res = mem->in(0);
- } else if (mem->is_SafePoint() || mem->is_MemBar()) {
- res = mem->in(TypeFunc::Memory);
- } else if (mem->is_Phi()) {
- res = mem->in(1);
- } else if (mem->is_MergeMem()) {
- res = mem->as_MergeMem()->memory_at(alias);
- } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) {
- assert(alias == Compile::AliasIdxRaw, "following raw memory can't lead to a barrier");
- res = mem->in(MemNode::Memory);
- } else {
-#ifdef ASSERT
- mem->dump();
-#endif
- ShouldNotReachHere();
- }
- return res;
-}
-
-Node* ShenandoahBarrierC2Support::no_branches(Node* c, Node* dom, bool allow_one_proj, PhaseIdealLoop* phase) {
- Node* iffproj = nullptr;
- while (c != dom) {
- Node* next = phase->idom(c);
- assert(next->unique_ctrl_out_or_null() == c || c->is_Proj() || c->is_Region(), "multiple control flow out but no proj or region?");
- if (c->is_Region()) {
- ResourceMark rm;
- Unique_Node_List wq;
- wq.push(c);
- for (uint i = 0; i < wq.size(); i++) {
- Node *n = wq.at(i);
- if (n == next) {
- continue;
- }
- if (n->is_Region()) {
- for (uint j = 1; j < n->req(); j++) {
- wq.push(n->in(j));
- }
- } else {
- wq.push(n->in(0));
- }
- }
- for (uint i = 0; i < wq.size(); i++) {
- Node *n = wq.at(i);
- assert(n->is_CFG(), "");
- if (n->is_Multi()) {
- for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
- Node* u = n->fast_out(j);
- if (u->is_CFG()) {
- if (!wq.member(u) && !u->as_Proj()->is_uncommon_trap_proj()) {
- return NodeSentinel;
- }
- }
- }
- }
- }
- } else if (c->is_Proj()) {
- if (c->is_IfProj()) {
- if (c->as_Proj()->is_uncommon_trap_if_pattern() != nullptr) {
- // continue;
- } else {
- if (!allow_one_proj) {
- return NodeSentinel;
- }
- if (iffproj == nullptr) {
- iffproj = c;
- } else {
- return NodeSentinel;
- }
- }
- } else if (c->Opcode() == Op_JumpProj) {
- return NodeSentinel; // unsupported
- } else if (c->Opcode() == Op_CatchProj) {
- return NodeSentinel; // unsupported
- } else if (c->Opcode() == Op_CProj && next->is_NeverBranch()) {
- return NodeSentinel; // unsupported
- } else {
- assert(next->unique_ctrl_out() == c, "unsupported branch pattern");
- }
- }
- c = next;
- }
- return iffproj;
-}
-
-Node* ShenandoahBarrierC2Support::dom_mem(Node* mem, Node* ctrl, int alias, Node*& mem_ctrl, PhaseIdealLoop* phase) {
- ResourceMark rm;
- VectorSet wq;
- wq.set(mem->_idx);
- mem_ctrl = phase->ctrl_or_self(mem);
- while (!phase->is_dominator(mem_ctrl, ctrl) || mem_ctrl == ctrl) {
- mem = next_mem(mem, alias);
- if (wq.test_set(mem->_idx)) {
- return nullptr;
- }
- mem_ctrl = phase->ctrl_or_self(mem);
- }
- if (mem->is_MergeMem()) {
- mem = mem->as_MergeMem()->memory_at(alias);
- mem_ctrl = phase->ctrl_or_self(mem);
- }
- return mem;
-}
-
-Node* ShenandoahBarrierC2Support::find_bottom_mem(Node* ctrl, PhaseIdealLoop* phase) {
- Node* mem = nullptr;
- Node* c = ctrl;
- do {
- if (c->is_Region()) {
- for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax && mem == nullptr; i++) {
- Node* u = c->fast_out(i);
- if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
- if (u->adr_type() == TypePtr::BOTTOM) {
- mem = u;
- }
- }
- }
- } else {
- if (c->is_Call() && c->as_Call()->adr_type() != nullptr) {
- CallProjections projs;
- c->as_Call()->extract_projections(&projs, true, false);
- if (projs.fallthrough_memproj != nullptr) {
- if (projs.fallthrough_memproj->adr_type() == TypePtr::BOTTOM) {
- if (projs.catchall_memproj == nullptr) {
- mem = projs.fallthrough_memproj;
- } else {
- if (phase->is_dominator(projs.fallthrough_catchproj, ctrl)) {
- mem = projs.fallthrough_memproj;
- } else {
- assert(phase->is_dominator(projs.catchall_catchproj, ctrl), "one proj must dominate barrier");
- mem = projs.catchall_memproj;
- }
- }
- }
- } else {
- Node* proj = c->as_Call()->proj_out(TypeFunc::Memory);
- if (proj != nullptr &&
- proj->adr_type() == TypePtr::BOTTOM) {
- mem = proj;
- }
- }
- } else {
- for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) {
- Node* u = c->fast_out(i);
- if (u->is_Proj() &&
- u->bottom_type() == Type::MEMORY &&
- u->adr_type() == TypePtr::BOTTOM) {
- assert(c->is_SafePoint() || c->is_MemBar() || c->is_Start(), "");
- assert(mem == nullptr, "only one proj");
- mem = u;
- }
- }
- assert(!c->is_Call() || c->as_Call()->adr_type() != nullptr || mem == nullptr, "no mem projection expected");
- }
- }
- c = phase->idom(c);
- } while (mem == nullptr);
- return mem;
-}
-
-void ShenandoahBarrierC2Support::follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses, PhaseIdealLoop* phase) {
- for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
- Node* u = n->fast_out(i);
- if (!u->is_CFG() && phase->get_ctrl(u) == ctrl && (!u->is_Phi() || !u->in(0)->is_Loop() || u->in(LoopNode::LoopBackControl) != n)) {
- uses.push(u);
- }
- }
-}
-
-static void hide_strip_mined_loop(OuterStripMinedLoopNode* outer, CountedLoopNode* inner, PhaseIdealLoop* phase) {
- OuterStripMinedLoopEndNode* le = inner->outer_loop_end();
- Node* new_outer = new LoopNode(outer->in(LoopNode::EntryControl), outer->in(LoopNode::LoopBackControl));
- phase->register_control(new_outer, phase->get_loop(outer), outer->in(LoopNode::EntryControl));
- Node* new_le = new IfNode(le->in(0), le->in(1), le->_prob, le->_fcnt);
- phase->register_control(new_le, phase->get_loop(le), le->in(0));
- phase->replace_node_and_forward_ctrl(outer, new_outer);
- phase->replace_node_and_forward_ctrl(le, new_le);
- inner->clear_strip_mined();
-}
-
-void ShenandoahBarrierC2Support::test_gc_state(Node*& ctrl, Node* raw_mem, Node*& test_fail_ctrl,
- PhaseIdealLoop* phase, int flags) {
- PhaseIterGVN& igvn = phase->igvn();
- Node* old_ctrl = ctrl;
-
- Node* thread = new ThreadLocalNode();
- Node* gc_state_offset = igvn.MakeConX(in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
- Node* gc_state_addr = AddPNode::make_off_heap(thread, gc_state_offset);
- Node* gc_state = new LoadBNode(old_ctrl, raw_mem, gc_state_addr,
- DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(nullptr),
- TypeInt::BYTE, MemNode::unordered);
- Node* gc_state_and = new AndINode(gc_state, igvn.intcon(flags));
- Node* gc_state_cmp = new CmpINode(gc_state_and, igvn.zerocon(T_INT));
- Node* gc_state_bool = new BoolNode(gc_state_cmp, BoolTest::ne);
-
- IfNode* gc_state_iff = new IfNode(old_ctrl, gc_state_bool, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
- ctrl = new IfTrueNode(gc_state_iff);
- test_fail_ctrl = new IfFalseNode(gc_state_iff);
-
- IdealLoopTree* loop = phase->get_loop(old_ctrl);
- phase->register_control(gc_state_iff, loop, old_ctrl);
- phase->register_control(ctrl, loop, gc_state_iff);
- phase->register_control(test_fail_ctrl, loop, gc_state_iff);
-
- phase->register_new_node(thread, old_ctrl);
- phase->register_new_node(gc_state_addr, old_ctrl);
- phase->register_new_node(gc_state, old_ctrl);
- phase->register_new_node(gc_state_and, old_ctrl);
- phase->register_new_node(gc_state_cmp, old_ctrl);
- phase->register_new_node(gc_state_bool, old_ctrl);
-
- phase->set_root_as_ctrl(gc_state_offset);
-
- assert(is_gc_state_test(gc_state_iff, flags), "Should match the shape");
-}
-
-void ShenandoahBarrierC2Support::test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase) {
- Node* old_ctrl = ctrl;
- PhaseIterGVN& igvn = phase->igvn();
-
- const Type* val_t = igvn.type(val);
- if (val_t->meet(TypePtr::NULL_PTR) == val_t) {
- Node* null_cmp = new CmpPNode(val, igvn.zerocon(T_OBJECT));
- Node* null_test = new BoolNode(null_cmp, BoolTest::ne);
-
- IfNode* null_iff = new IfNode(old_ctrl, null_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
- ctrl = new IfTrueNode(null_iff);
- null_ctrl = new IfFalseNode(null_iff);
-
- IdealLoopTree* loop = phase->get_loop(old_ctrl);
- phase->register_control(null_iff, loop, old_ctrl);
- phase->register_control(ctrl, loop, null_iff);
- phase->register_control(null_ctrl, loop, null_iff);
-
- phase->register_new_node(null_cmp, old_ctrl);
- phase->register_new_node(null_test, old_ctrl);
- }
-}
-
-void ShenandoahBarrierC2Support::test_in_cset(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase) {
- Node* old_ctrl = ctrl;
- PhaseIterGVN& igvn = phase->igvn();
-
- Node* raw_val = new CastP2XNode(old_ctrl, val);
- Node* region_size_shift = nullptr;
- if (AOTCodeCache::is_on_for_dump()) {
- Node* aot_addr = igvn.makecon(TypeRawPtr::make(AOTRuntimeConstants::grain_shift_address()));
- region_size_shift = new LoadINode(old_ctrl, raw_mem, aot_addr,
- DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(nullptr),
- TypeInt::INT, MemNode::unordered);
- phase->register_new_node(region_size_shift, old_ctrl);
- } else {
- region_size_shift = igvn.intcon(ShenandoahHeapRegion::region_size_bytes_shift_jint());
- }
- Node* cset_idx = new URShiftXNode(raw_val, region_size_shift);
-
- // Figure out the target cset address with raw pointer math.
- // This avoids matching AddP+LoadB that would emit inefficient code.
- // See JDK-8245465.
- Node* cset_addr_ptr = nullptr;
- if (AOTCodeCache::is_on_for_dump()) {
- Node* aot_addr = igvn.makecon(TypeRawPtr::make(AOTRuntimeConstants::cset_base_address()));
- cset_addr_ptr = new LoadPNode(old_ctrl, raw_mem, aot_addr,
- DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(nullptr),
- TypeRawPtr::NOTNULL, MemNode::unordered);
- phase->register_new_node(cset_addr_ptr, old_ctrl);
- } else {
- cset_addr_ptr = igvn.makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr()));
- }
- Node* cset_addr = new CastP2XNode(old_ctrl, cset_addr_ptr);
- Node* cset_load_addr = new AddXNode(cset_addr, cset_idx);
- Node* cset_load_ptr = new CastX2PNode(cset_load_addr);
-
- Node* cset_load = new LoadBNode(old_ctrl, raw_mem, cset_load_ptr,
- DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(nullptr),
- TypeInt::BYTE, MemNode::unordered);
- Node* cset_cmp = new CmpINode(cset_load, igvn.zerocon(T_INT));
- Node* cset_bool = new BoolNode(cset_cmp, BoolTest::ne);
-
- IfNode* cset_iff = new IfNode(old_ctrl, cset_bool, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
- ctrl = new IfTrueNode(cset_iff);
- not_cset_ctrl = new IfFalseNode(cset_iff);
-
- IdealLoopTree *loop = phase->get_loop(old_ctrl);
- phase->register_control(cset_iff, loop, old_ctrl);
- phase->register_control(ctrl, loop, cset_iff);
- phase->register_control(not_cset_ctrl, loop, cset_iff);
-
- phase->set_root_as_ctrl(cset_addr_ptr);
-
- phase->register_new_node(raw_val, old_ctrl);
- phase->register_new_node(cset_idx, old_ctrl);
- phase->register_new_node(cset_addr, old_ctrl);
- phase->register_new_node(cset_load_addr, old_ctrl);
- phase->register_new_node(cset_load_ptr, old_ctrl);
- phase->register_new_node(cset_load, old_ctrl);
- phase->register_new_node(cset_cmp, old_ctrl);
- phase->register_new_node(cset_bool, old_ctrl);
-}
-
-void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr,
- DecoratorSet decorators, PhaseIdealLoop* phase) {
- IdealLoopTree*loop = phase->get_loop(ctrl);
- const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr();
-
- address calladdr = nullptr;
- const char* name = nullptr;
- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
- bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
- bool is_narrow = UseCompressedOops && !is_native;
- if (is_strong) {
- if (is_narrow) {
- calladdr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
- name = "load_reference_barrier_strong_narrow";
- } else {
- calladdr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
- name = "load_reference_barrier_strong";
- }
- } else if (is_weak) {
- if (is_narrow) {
- calladdr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
- name = "load_reference_barrier_weak_narrow";
- } else {
- calladdr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
- name = "load_reference_barrier_weak";
- }
- } else {
- assert(is_phantom, "only remaining strength");
- if (is_narrow) {
- calladdr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow);
- name = "load_reference_barrier_phantom_narrow";
- } else {
- calladdr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
- name = "load_reference_barrier_phantom";
- }
- }
- Node* call = new CallLeafNode(ShenandoahBarrierSetC2::load_reference_barrier_Type(), calladdr, name, TypeRawPtr::BOTTOM);
-
- call->init_req(TypeFunc::Control, ctrl);
- call->init_req(TypeFunc::I_O, phase->C->top());
- call->init_req(TypeFunc::Memory, phase->C->top());
- call->init_req(TypeFunc::FramePtr, phase->C->top());
- call->init_req(TypeFunc::ReturnAdr, phase->C->top());
- call->init_req(TypeFunc::Parms, val);
- call->init_req(TypeFunc::Parms+1, load_addr);
- phase->register_control(call, loop, ctrl);
- ctrl = new ProjNode(call, TypeFunc::Control);
- phase->register_control(ctrl, loop, call);
- val = new ProjNode(call, TypeFunc::Parms);
- phase->register_new_node(val, call);
- val = new CheckCastPPNode(ctrl, val, obj_type);
- phase->register_new_node(val, ctrl);
-}
-
-void ShenandoahBarrierC2Support::collect_nodes_above_barrier(Unique_Node_List &nodes_above_barrier, PhaseIdealLoop* phase, Node* ctrl, Node* init_raw_mem) {
- nodes_above_barrier.clear();
- if (phase->has_ctrl(init_raw_mem) && phase->get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) {
- nodes_above_barrier.push(init_raw_mem);
- }
- for (uint next = 0; next < nodes_above_barrier.size(); next++) {
- Node* n = nodes_above_barrier.at(next);
- // Take anti-dependencies into account
- maybe_push_anti_dependent_loads(phase, n, ctrl, nodes_above_barrier);
- push_data_inputs_at_control(phase, n, ctrl, nodes_above_barrier);
- }
-}
-
-void ShenandoahBarrierC2Support::fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& nodes_above_barrier, uint last, PhaseIdealLoop* phase) {
- Node* ctrl = phase->get_ctrl(barrier);
- Node* init_raw_mem = fixer.find_mem(ctrl, barrier);
-
- // Update the control of all nodes that should be after the
- // barrier control flow
- uses.clear();
- // Every node that is control dependent on the barrier's input
- // control will be after the expanded barrier. The raw memory (if
- // its memory is control dependent on the barrier's input control)
- // must stay above the barrier.
- collect_nodes_above_barrier(nodes_above_barrier, phase, ctrl, init_raw_mem);
- for (DUIterator_Fast imax, i = ctrl->fast_outs(imax); i < imax; i++) {
- Node* u = ctrl->fast_out(i);
- if (u->_idx < last &&
- u != barrier &&
- !u->depends_only_on_test() && // preserve dependency on test
- !nodes_above_barrier.member(u) &&
- (u->in(0) != ctrl || (!u->is_Region() && !u->is_Phi())) &&
- (ctrl->Opcode() != Op_CatchProj || u->Opcode() != Op_CreateEx)) {
- Node* old_c = phase->ctrl_or_self(u);
- if (old_c != ctrl ||
- is_dominator_same_ctrl(old_c, barrier, u, phase) ||
- ShenandoahBarrierSetC2::is_shenandoah_state_load(u)) {
- phase->igvn().rehash_node_delayed(u);
- int nb = u->replace_edge(ctrl, region, &phase->igvn());
- if (u->is_CFG()) {
- if (phase->idom(u) == ctrl) {
- phase->set_idom(u, region, phase->dom_depth(region));
- }
- } else if (phase->get_ctrl(u) == ctrl) {
- assert(u != init_raw_mem, "should leave input raw mem above the barrier");
- uses.push(u);
- }
- assert(nb == 1, "more than 1 ctrl input?");
- --i, imax -= nb;
- }
- }
- }
-}
-
-static Node* create_phis_on_call_return(Node* ctrl, Node* c, Node* n, Node* n_clone, const CallProjections& projs, PhaseIdealLoop* phase) {
- Node* region = nullptr;
- while (c != ctrl) {
- if (c->is_Region()) {
- region = c;
- }
- c = phase->idom(c);
- }
- assert(region != nullptr, "");
- Node* phi = new PhiNode(region, n->bottom_type());
- for (uint j = 1; j < region->req(); j++) {
- Node* in = region->in(j);
- if (phase->is_dominator(projs.fallthrough_catchproj, in)) {
- phi->init_req(j, n);
- } else if (phase->is_dominator(projs.catchall_catchproj, in)) {
- phi->init_req(j, n_clone);
- } else {
- phi->init_req(j, create_phis_on_call_return(ctrl, in, n, n_clone, projs, phase));
- }
- }
- phase->register_new_node(phi, region);
- return phi;
-}
-
-void ShenandoahBarrierC2Support::pin_and_expand(PhaseIdealLoop* phase) {
- ShenandoahBarrierSetC2State* state = ShenandoahBarrierSetC2::bsc2()->state();
-
- Unique_Node_List uses;
- Node_Stack stack(0);
- Node_List clones;
- for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
- ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
-
- Node* ctrl = phase->get_ctrl(lrb);
- Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
-
- CallStaticJavaNode* unc = nullptr;
- Node* unc_ctrl = nullptr;
- Node* uncasted_val = val;
-
- for (DUIterator_Fast imax, i = lrb->fast_outs(imax); i < imax; i++) {
- Node* u = lrb->fast_out(i);
- if (u->Opcode() == Op_CastPP &&
- u->in(0) != nullptr &&
- phase->is_dominator(u->in(0), ctrl)) {
- const Type* u_t = phase->igvn().type(u);
-
- if (u_t->meet(TypePtr::NULL_PTR) != u_t &&
- u->in(0)->Opcode() == Op_IfTrue &&
- u->in(0)->as_Proj()->is_uncommon_trap_if_pattern() &&
- u->in(0)->in(0)->is_If() &&
- u->in(0)->in(0)->in(1)->Opcode() == Op_Bool &&
- u->in(0)->in(0)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
- u->in(0)->in(0)->in(1)->in(1)->Opcode() == Op_CmpP &&
- u->in(0)->in(0)->in(1)->in(1)->in(1) == val &&
- u->in(0)->in(0)->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) {
- IdealLoopTree* loop = phase->get_loop(ctrl);
- IdealLoopTree* unc_loop = phase->get_loop(u->in(0));
-
- if (!unc_loop->is_member(loop)) {
- continue;
- }
-
- Node* branch = no_branches(ctrl, u->in(0), false, phase);
- assert(branch == nullptr || branch == NodeSentinel, "was not looking for a branch");
- if (branch == NodeSentinel) {
- continue;
- }
-
- Node* iff = u->in(0)->in(0);
- Node* bol = iff->in(1)->clone();
- Node* cmp = bol->in(1)->clone();
- cmp->set_req(1, lrb);
- bol->set_req(1, cmp);
- phase->igvn().replace_input_of(iff, 1, bol);
- phase->set_ctrl(lrb, iff->in(0));
- phase->register_new_node(cmp, iff->in(0));
- phase->register_new_node(bol, iff->in(0));
- break;
- }
- }
- }
- // Load barrier on the control output of a call
- if ((ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) || ctrl->is_CallJava()) {
- CallJavaNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_CallJava() : ctrl->as_CallJava();
- if (call->entry_point() == OptoRuntime::rethrow_stub()) {
- // The rethrow call may have too many projections to be
- // properly handled here. Given there's no reason for a
- // barrier to depend on the call, move it above the call
- stack.push(lrb, 0);
- do {
- Node* n = stack.node();
- uint idx = stack.index();
- if (idx < n->req()) {
- Node* in = n->in(idx);
- stack.set_index(idx+1);
- if (in != nullptr) {
- if (phase->has_ctrl(in)) {
- if (phase->is_dominator(call, phase->get_ctrl(in))) {
-#ifdef ASSERT
- for (uint i = 0; i < stack.size(); i++) {
- assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
- }
-#endif
- stack.push(in, 0);
- }
- } else {
- assert(phase->is_dominator(in, call->in(0)), "no dependency on the call");
- }
- }
- } else {
- phase->set_ctrl(n, call->in(0));
- stack.pop();
- }
- } while(stack.size() > 0);
- continue;
- }
- CallProjections projs;
- call->extract_projections(&projs, false, false);
-
- // If this is a runtime call, it doesn't have an exception handling path
- if (projs.fallthrough_catchproj == nullptr) {
- assert(call->method() == nullptr, "should be runtime call");
- assert(projs.catchall_catchproj == nullptr, "runtime call should not have catch all projection");
- continue;
- }
-
- // Otherwise, clone the barrier so there's one for the fallthrough and one for the exception handling path
-#ifdef ASSERT
- VectorSet cloned;
-#endif
- Node* lrb_clone = lrb->clone();
- phase->register_new_node(lrb_clone, projs.catchall_catchproj);
- phase->set_ctrl(lrb, projs.fallthrough_catchproj);
-
- stack.push(lrb, 0);
- clones.push(lrb_clone);
-
- do {
- assert(stack.size() == clones.size(), "");
- Node* n = stack.node();
-#ifdef ASSERT
- if (n->is_Load()) {
- Node* mem = n->in(MemNode::Memory);
- for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
- Node* u = mem->fast_out(j);
- assert(!u->is_Store() || !u->is_LoadStore() || phase->get_ctrl(u) != ctrl, "anti dependent store?");
- }
- }
-#endif
- uint idx = stack.index();
- Node* n_clone = clones.at(clones.size()-1);
- if (idx < n->outcnt()) {
- Node* u = n->raw_out(idx);
- Node* c = phase->ctrl_or_self(u);
- if (phase->is_dominator(call, c) && phase->is_dominator(c, projs.fallthrough_proj)) {
- stack.set_index(idx+1);
- assert(!u->is_CFG(), "");
- stack.push(u, 0);
- assert(!cloned.test_set(u->_idx), "only one clone");
- Node* u_clone = u->clone();
- int nb = u_clone->replace_edge(n, n_clone, &phase->igvn());
- assert(nb > 0, "should have replaced some uses");
- phase->register_new_node(u_clone, projs.catchall_catchproj);
- clones.push(u_clone);
- phase->set_ctrl(u, projs.fallthrough_catchproj);
- } else {
- bool replaced = false;
- if (u->is_Phi()) {
- for (uint k = 1; k < u->req(); k++) {
- if (u->in(k) == n) {
- if (phase->is_dominator(projs.catchall_catchproj, u->in(0)->in(k))) {
- phase->igvn().replace_input_of(u, k, n_clone);
- replaced = true;
- } else if (!phase->is_dominator(projs.fallthrough_catchproj, u->in(0)->in(k))) {
- phase->igvn().replace_input_of(u, k, create_phis_on_call_return(ctrl, u->in(0)->in(k), n, n_clone, projs, phase));
- replaced = true;
- }
- }
- }
- } else {
- if (phase->is_dominator(projs.catchall_catchproj, c)) {
- phase->igvn().rehash_node_delayed(u);
- int nb = u->replace_edge(n, n_clone, &phase->igvn());
- assert(nb > 0, "should have replaced some uses");
- replaced = true;
- } else if (!phase->is_dominator(projs.fallthrough_catchproj, c)) {
- if (u->is_If()) {
- // Can't break If/Bool/Cmp chain
- assert(n->is_Bool(), "unexpected If shape");
- assert(stack.node_at(stack.size()-2)->is_Cmp(), "unexpected If shape");
- assert(n_clone->is_Bool(), "unexpected clone");
- assert(clones.at(clones.size()-2)->is_Cmp(), "unexpected clone");
- Node* bol_clone = n->clone();
- Node* cmp_clone = stack.node_at(stack.size()-2)->clone();
- bol_clone->set_req(1, cmp_clone);
-
- Node* nn = stack.node_at(stack.size()-3);
- Node* nn_clone = clones.at(clones.size()-3);
- assert(nn->Opcode() == nn_clone->Opcode(), "mismatch");
-
- int nb = cmp_clone->replace_edge(nn, create_phis_on_call_return(ctrl, c, nn, nn_clone, projs, phase),
- &phase->igvn());
- assert(nb > 0, "should have replaced some uses");
-
- phase->register_new_node(bol_clone, u->in(0));
- phase->register_new_node(cmp_clone, u->in(0));
-
- phase->igvn().replace_input_of(u, 1, bol_clone);
-
- } else {
- phase->igvn().rehash_node_delayed(u);
- int nb = u->replace_edge(n, create_phis_on_call_return(ctrl, c, n, n_clone, projs, phase), &phase->igvn());
- assert(nb > 0, "should have replaced some uses");
- }
- replaced = true;
- }
- }
- if (!replaced) {
- stack.set_index(idx+1);
- }
- }
- } else {
- stack.pop();
- clones.pop();
- }
- } while (stack.size() > 0);
- assert(stack.size() == 0 && clones.size() == 0, "");
- }
- }
-
- for (int i = 0; i < state->load_reference_barriers_count(); i++) {
- ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
- Node* ctrl = phase->get_ctrl(lrb);
- IdealLoopTree* loop = phase->get_loop(ctrl);
- Node* head = loop->head();
- if (head->is_OuterStripMinedLoop()) {
- // Expanding a barrier here will break loop strip mining
- // verification. Transform the loop so the loop nest doesn't
- // appear as strip mined.
- OuterStripMinedLoopNode* outer = head->as_OuterStripMinedLoop();
- hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
- }
- if (head->is_BaseCountedLoop() && ctrl->is_IfProj() && ctrl->in(0)->is_BaseCountedLoopEnd() &&
- head->as_BaseCountedLoop()->loopexit() == ctrl->in(0)) {
- Node* entry = head->in(LoopNode::EntryControl);
- Node* backedge = head->in(LoopNode::LoopBackControl);
- Node* new_head = new LoopNode(entry, backedge);
- phase->register_control(new_head, phase->get_loop(entry), entry);
- phase->replace_node_and_forward_ctrl(head, new_head);
- }
- }
-
- // Expand load-reference-barriers
- MemoryGraphFixer fixer(Compile::AliasIdxRaw, true, phase);
- Unique_Node_List nodes_above_barriers;
- for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
- ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
- uint last = phase->C->unique();
- Node* ctrl = phase->get_ctrl(lrb);
- Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
-
- Node* orig_ctrl = ctrl;
-
- Node* raw_mem = fixer.find_mem(ctrl, lrb);
- Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, nullptr);
-
- IdealLoopTree *loop = phase->get_loop(ctrl);
-
- Node* heap_stable_ctrl = nullptr;
- Node* null_ctrl = nullptr;
-
- assert(val->bottom_type()->make_oopptr(), "need oop");
- assert(val->bottom_type()->make_oopptr()->const_oop() == nullptr, "expect non-constant");
-
- enum { _heap_stable = 1, _evac_path, _not_cset, PATH_LIMIT };
- Node* region = new RegionNode(PATH_LIMIT);
- Node* val_phi = new PhiNode(region, val->bottom_type()->is_oopptr());
-
- // Stable path.
- int flags = ShenandoahHeap::HAS_FORWARDED;
- if (!ShenandoahBarrierSet::is_strong_access(lrb->decorators())) {
- flags |= ShenandoahHeap::WEAK_ROOTS;
- }
- test_gc_state(ctrl, raw_mem, heap_stable_ctrl, phase, flags);
- IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If();
-
- // Heap stable case
- region->init_req(_heap_stable, heap_stable_ctrl);
- val_phi->init_req(_heap_stable, val);
-
- // Test for in-cset, unless it's a native-LRB. Native LRBs need to return null
- // even for non-cset objects to prevent resurrection of such objects.
- // Wires !in_cset(obj) to slot 2 of region and phis
- Node* not_cset_ctrl = nullptr;
- if (ShenandoahBarrierSet::is_strong_access(lrb->decorators())) {
- test_in_cset(ctrl, not_cset_ctrl, val, raw_mem, phase);
- }
- if (not_cset_ctrl != nullptr) {
- region->init_req(_not_cset, not_cset_ctrl);
- val_phi->init_req(_not_cset, val);
- } else {
- region->del_req(_not_cset);
- val_phi->del_req(_not_cset);
- }
-
- // Resolve object when orig-value is in cset.
- // Make the unconditional resolve for fwdptr.
-
- // Call lrb-stub and wire up that path in slots 4
- Node* result_mem = nullptr;
-
- Node* addr;
- {
- VectorSet visited;
- addr = get_load_addr(phase, visited, lrb);
- }
- if (addr->Opcode() == Op_AddP) {
- Node* orig_base = addr->in(AddPNode::Base);
- Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::DependencyType::NonFloatingNarrowing);
- phase->register_new_node(base, ctrl);
- if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
- // Field access
- addr = addr->clone();
- addr->set_req(AddPNode::Base, base);
- addr->set_req(AddPNode::Address, base);
- phase->register_new_node(addr, ctrl);
- } else {
- Node* addr2 = addr->in(AddPNode::Address);
- if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
- addr2->in(AddPNode::Base) == orig_base) {
- addr2 = addr2->clone();
- addr2->set_req(AddPNode::Base, base);
- addr2->set_req(AddPNode::Address, base);
- phase->register_new_node(addr2, ctrl);
- addr = addr->clone();
- addr->set_req(AddPNode::Base, base);
- addr->set_req(AddPNode::Address, addr2);
- phase->register_new_node(addr, ctrl);
- }
- }
- }
- call_lrb_stub(ctrl, val, addr, lrb->decorators(), phase);
- region->init_req(_evac_path, ctrl);
- val_phi->init_req(_evac_path, val);
-
- phase->register_control(region, loop, heap_stable_iff);
- Node* out_val = val_phi;
- phase->register_new_node(val_phi, region);
-
- fix_ctrl(lrb, region, fixer, uses, nodes_above_barriers, last, phase);
-
- ctrl = orig_ctrl;
-
- phase->igvn().replace_node(lrb, out_val);
-
- follow_barrier_uses(out_val, ctrl, uses, phase);
-
- for(uint next = 0; next < uses.size(); next++ ) {
- Node *n = uses.at(next);
- assert(phase->get_ctrl(n) == ctrl, "bad control");
- assert(n != raw_mem, "should leave input raw mem above the barrier");
- phase->set_ctrl(n, region);
- follow_barrier_uses(n, ctrl, uses, phase);
- }
- fixer.record_new_ctrl(ctrl, region, raw_mem, raw_mem_for_ctrl);
- }
- // Done expanding load-reference-barriers.
- assert(ShenandoahBarrierSetC2::bsc2()->state()->load_reference_barriers_count() == 0, "all load reference barrier nodes should have been replaced");
-}
-
-Node* ShenandoahBarrierC2Support::get_load_addr(PhaseIdealLoop* phase, VectorSet& visited, Node* in) {
- if (visited.test_set(in->_idx)) {
- return nullptr;
- }
- switch (in->Opcode()) {
- case Op_Proj:
- return get_load_addr(phase, visited, in->in(0));
- case Op_CastPP:
- case Op_CheckCastPP:
- case Op_DecodeN:
- case Op_EncodeP:
- return get_load_addr(phase, visited, in->in(1));
- case Op_LoadN:
- case Op_LoadP:
- return in->in(MemNode::Address);
- case Op_CompareAndExchangeN:
- case Op_CompareAndExchangeP:
- case Op_GetAndSetN:
- case Op_GetAndSetP:
- case Op_ShenandoahCompareAndExchangeP:
- case Op_ShenandoahCompareAndExchangeN:
- // Those instructions would just have stored a different
- // value into the field. No use to attempt to fix it at this point.
- return phase->igvn().zerocon(T_OBJECT);
- case Op_CMoveP:
- case Op_CMoveN: {
- Node* t = get_load_addr(phase, visited, in->in(CMoveNode::IfTrue));
- Node* f = get_load_addr(phase, visited, in->in(CMoveNode::IfFalse));
- // Handle unambiguous cases: single address reported on both branches.
- if (t != nullptr && f == nullptr) return t;
- if (t == nullptr && f != nullptr) return f;
- if (t != nullptr && t == f) return t;
- // Ambiguity.
- return phase->igvn().zerocon(T_OBJECT);
- }
- case Op_Phi: {
- Node* addr = nullptr;
- for (uint i = 1; i < in->req(); i++) {
- Node* addr1 = get_load_addr(phase, visited, in->in(i));
- if (addr == nullptr) {
- addr = addr1;
- }
- if (addr != addr1) {
- return phase->igvn().zerocon(T_OBJECT);
- }
- }
- return addr;
- }
- case Op_ShenandoahLoadReferenceBarrier:
- return get_load_addr(phase, visited, in->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
- case Op_CallDynamicJava:
- case Op_CallLeaf:
- case Op_CallStaticJava:
- case Op_ConN:
- case Op_ConP:
- case Op_Parm:
- case Op_CreateEx:
- return phase->igvn().zerocon(T_OBJECT);
- default:
-#ifdef ASSERT
- fatal("Unknown node in get_load_addr: %s", NodeClassNames[in->Opcode()]);
-#endif
- return phase->igvn().zerocon(T_OBJECT);
- }
-
-}
-
-#ifdef ASSERT
-static bool has_never_branch(Node* root) {
- for (uint i = 1; i < root->req(); i++) {
- Node* in = root->in(i);
- if (in != nullptr && in->Opcode() == Op_Halt && in->in(0)->is_Proj() && in->in(0)->in(0)->is_NeverBranch()) {
- return true;
- }
- }
- return false;
-}
-#endif
-
-void MemoryGraphFixer::collect_memory_nodes() {
- Node_Stack stack(0);
- VectorSet visited;
- Node_List regions;
-
- // Walk the raw memory graph and create a mapping from CFG node to
- // memory node. Exclude phis for now.
- stack.push(_phase->C->root(), 1);
- do {
- Node* n = stack.node();
- int opc = n->Opcode();
- uint i = stack.index();
- if (i < n->req()) {
- Node* mem = nullptr;
- if (opc == Op_Root) {
- Node* in = n->in(i);
- int in_opc = in->Opcode();
- if (in_opc == Op_Return || in_opc == Op_Rethrow) {
- mem = in->in(TypeFunc::Memory);
- } else if (in_opc == Op_Halt) {
- if (in->in(0)->is_Region()) {
- Node* r = in->in(0);
- for (uint j = 1; j < r->req(); j++) {
- assert(!r->in(j)->is_NeverBranch(), "");
- }
- } else {
- Node* proj = in->in(0);
- assert(proj->is_Proj(), "");
- Node* in = proj->in(0);
- assert(in->is_CallStaticJava() || in->is_NeverBranch() || in->Opcode() == Op_Catch || proj->is_IfProj(), "");
- if (in->is_CallStaticJava()) {
- mem = in->in(TypeFunc::Memory);
- } else if (in->Opcode() == Op_Catch) {
- Node* call = in->in(0)->in(0);
- assert(call->is_Call(), "");
- mem = call->in(TypeFunc::Memory);
- } else if (in->is_NeverBranch()) {
- mem = collect_memory_for_infinite_loop(in);
- }
- }
- } else {
-#ifdef ASSERT
- n->dump();
- in->dump();
-#endif
- ShouldNotReachHere();
- }
- } else {
- assert(n->is_Phi() && n->bottom_type() == Type::MEMORY, "");
- assert(n->adr_type() == TypePtr::BOTTOM || _phase->C->get_alias_index(n->adr_type()) == _alias, "");
- mem = n->in(i);
- }
- i++;
- stack.set_index(i);
- if (mem == nullptr) {
- continue;
- }
- for (;;) {
- if (visited.test_set(mem->_idx) || mem->is_Start()) {
- break;
- }
- if (mem->is_Phi()) {
- stack.push(mem, 2);
- mem = mem->in(1);
- } else if (mem->is_Proj()) {
- stack.push(mem, mem->req());
- mem = mem->in(0);
- } else if (mem->is_SafePoint() || mem->is_MemBar()) {
- mem = mem->in(TypeFunc::Memory);
- } else if (mem->is_MergeMem()) {
- MergeMemNode* mm = mem->as_MergeMem();
- mem = mm->memory_at(_alias);
- } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) {
- assert(_alias == Compile::AliasIdxRaw, "");
- stack.push(mem, mem->req());
- mem = mem->in(MemNode::Memory);
- } else {
-#ifdef ASSERT
- mem->dump();
-#endif
- ShouldNotReachHere();
- }
- }
- } else {
- if (n->is_Phi()) {
- // Nothing
- } else if (!n->is_Root()) {
- Node* c = get_ctrl(n);
- _memory_nodes.map(c->_idx, n);
- }
- stack.pop();
- }
- } while(stack.is_nonempty());
-
- // Iterate over CFG nodes in rpo and propagate memory state to
- // compute memory state at regions, creating new phis if needed.
- Node_List rpo_list;
- visited.clear();
- _phase->rpo(_phase->C->root(), stack, visited, rpo_list);
- Node* root = rpo_list.pop();
- assert(root == _phase->C->root(), "");
-
- const bool trace = false;
-#ifdef ASSERT
- if (trace) {
- for (int i = rpo_list.size() - 1; i >= 0; i--) {
- Node* c = rpo_list.at(i);
- if (_memory_nodes[c->_idx] != nullptr) {
- tty->print("X %d", c->_idx); _memory_nodes[c->_idx]->dump();
- }
- }
- }
-#endif
- uint last = _phase->C->unique();
-
-#ifdef ASSERT
- uint16_t max_depth = 0;
- for (LoopTreeIterator iter(_phase->ltree_root()); !iter.done(); iter.next()) {
- IdealLoopTree* lpt = iter.current();
- max_depth = MAX2(max_depth, lpt->_nest);
- }
-#endif
-
- bool progress = true;
- int iteration = 0;
- Node_List dead_phis;
- while (progress) {
- progress = false;
- iteration++;
- assert(iteration <= 2+max_depth || _phase->C->has_irreducible_loop() || has_never_branch(_phase->C->root()), "");
- if (trace) { tty->print_cr("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); }
-
- for (int i = rpo_list.size() - 1; i >= 0; i--) {
- Node* c = rpo_list.at(i);
-
- Node* prev_mem = _memory_nodes[c->_idx];
- if (c->is_Region() && (_include_lsm || !c->is_OuterStripMinedLoop())) {
- Node* prev_region = regions[c->_idx];
- Node* unique = nullptr;
- for (uint j = 1; j < c->req() && unique != NodeSentinel; j++) {
- Node* m = _memory_nodes[c->in(j)->_idx];
- assert(m != nullptr || (c->is_Loop() && j == LoopNode::LoopBackControl && iteration == 1) || _phase->C->has_irreducible_loop() || has_never_branch(_phase->C->root()), "expect memory state");
- if (m != nullptr) {
- if (m == prev_region && ((c->is_Loop() && j == LoopNode::LoopBackControl) || (prev_region->is_Phi() && prev_region->in(0) == c))) {
- assert((c->is_Loop() && j == LoopNode::LoopBackControl) || _phase->C->has_irreducible_loop() || has_never_branch(_phase->C->root()), "");
- // continue
- } else if (unique == nullptr) {
- unique = m;
- } else if (m == unique) {
- // continue
- } else {
- unique = NodeSentinel;
- }
- }
- }
- assert(unique != nullptr, "empty phi???");
- if (unique != NodeSentinel) {
- if (prev_region != nullptr && prev_region->is_Phi() && prev_region->in(0) == c) {
- dead_phis.push(prev_region);
- }
- regions.map(c->_idx, unique);
- } else {
- Node* phi = nullptr;
- if (prev_region != nullptr && prev_region->is_Phi() && prev_region->in(0) == c && prev_region->_idx >= last) {
- phi = prev_region;
- for (uint k = 1; k < c->req(); k++) {
- Node* m = _memory_nodes[c->in(k)->_idx];
- assert(m != nullptr, "expect memory state");
- phi->set_req(k, m);
- }
- } else {
- for (DUIterator_Fast jmax, j = c->fast_outs(jmax); j < jmax && phi == nullptr; j++) {
- Node* u = c->fast_out(j);
- if (u->is_Phi() && u->bottom_type() == Type::MEMORY &&
- (u->adr_type() == TypePtr::BOTTOM || _phase->C->get_alias_index(u->adr_type()) == _alias)) {
- phi = u;
- for (uint k = 1; k < c->req() && phi != nullptr; k++) {
- Node* m = _memory_nodes[c->in(k)->_idx];
- assert(m != nullptr, "expect memory state");
- if (u->in(k) != m) {
- phi = NodeSentinel;
- }
- }
- }
- }
- if (phi == NodeSentinel) {
- phi = new PhiNode(c, Type::MEMORY, _phase->C->get_adr_type(_alias));
- for (uint k = 1; k < c->req(); k++) {
- Node* m = _memory_nodes[c->in(k)->_idx];
- assert(m != nullptr, "expect memory state");
- phi->init_req(k, m);
- }
- }
- }
- if (phi != nullptr) {
- regions.map(c->_idx, phi);
- } else {
- assert(c->unique_ctrl_out()->Opcode() == Op_Halt, "expected memory state");
- }
- }
- Node* current_region = regions[c->_idx];
- if (current_region != prev_region) {
- progress = true;
- if (prev_region == prev_mem) {
- _memory_nodes.map(c->_idx, current_region);
- }
- }
- } else if (prev_mem == nullptr || prev_mem->is_Phi() || ctrl_or_self(prev_mem) != c) {
- Node* m = _memory_nodes[_phase->idom(c)->_idx];
- assert(m != nullptr || c->Opcode() == Op_Halt, "expect memory state");
- if (m != prev_mem) {
- _memory_nodes.map(c->_idx, m);
- progress = true;
- }
- }
-#ifdef ASSERT
- if (trace) { tty->print("X %d", c->_idx); _memory_nodes[c->_idx]->dump(); }
-#endif
- }
- }
-
- // Replace existing phi with computed memory state for that region
- // if different (could be a new phi or a dominating memory node if
- // that phi was found to be useless).
- while (dead_phis.size() > 0) {
- Node* n = dead_phis.pop();
- n->replace_by(_phase->C->top());
- n->destruct(&_phase->igvn());
- }
- for (int i = rpo_list.size() - 1; i >= 0; i--) {
- Node* c = rpo_list.at(i);
- if (c->is_Region() && (_include_lsm || !c->is_OuterStripMinedLoop())) {
- Node* n = regions[c->_idx];
- assert(n != nullptr || c->unique_ctrl_out()->Opcode() == Op_Halt, "expected memory state");
- if (n != nullptr && n->is_Phi() && n->_idx >= last && n->in(0) == c) {
- _phase->register_new_node(n, c);
- }
- }
- }
- for (int i = rpo_list.size() - 1; i >= 0; i--) {
- Node* c = rpo_list.at(i);
- if (c->is_Region() && (_include_lsm || !c->is_OuterStripMinedLoop())) {
- Node* n = regions[c->_idx];
- assert(n != nullptr || c->unique_ctrl_out()->Opcode() == Op_Halt, "expected memory state");
- for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) {
- Node* u = c->fast_out(i);
- if (u->is_Phi() && u->bottom_type() == Type::MEMORY &&
- u != n) {
- assert(c->unique_ctrl_out()->Opcode() != Op_Halt, "expected memory state");
- if (u->adr_type() == TypePtr::BOTTOM) {
- fix_memory_uses(u, n, n, c);
- } else if (_phase->C->get_alias_index(u->adr_type()) == _alias) {
- _phase->igvn().replace_node(u, n);
- --i; --imax;
- }
- }
- }
- }
- }
-}
-
-Node* MemoryGraphFixer::collect_memory_for_infinite_loop(const Node* in) {
- Node* mem = nullptr;
- Node* head = in->in(0);
- assert(head->is_Region(), "unexpected infinite loop graph shape");
-
- Node* phi_mem = nullptr;
- for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
- Node* u = head->fast_out(j);
- if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
- if (_phase->C->get_alias_index(u->adr_type()) == _alias) {
- assert(phi_mem == nullptr || phi_mem->adr_type() == TypePtr::BOTTOM, "");
- phi_mem = u;
- } else if (u->adr_type() == TypePtr::BOTTOM) {
- assert(phi_mem == nullptr || _phase->C->get_alias_index(phi_mem->adr_type()) == _alias, "");
- if (phi_mem == nullptr) {
- phi_mem = u;
- }
- }
- }
- }
- if (phi_mem == nullptr) {
- ResourceMark rm;
- Node_Stack stack(0);
- stack.push(head, 1);
- do {
- Node* n = stack.node();
- uint i = stack.index();
- if (i >= n->req()) {
- stack.pop();
- } else {
- stack.set_index(i + 1);
- Node* c = n->in(i);
- assert(c != head, "should have found a safepoint on the way");
- if (stack.size() != 1 || _phase->is_dominator(head, c)) {
- for (;;) {
- if (c->is_Region()) {
- stack.push(c, 1);
- break;
- } else if (c->is_SafePoint() && !c->is_CallLeaf()) {
- Node* m = c->in(TypeFunc::Memory);
- if (m->is_MergeMem()) {
- m = m->as_MergeMem()->memory_at(_alias);
- }
- assert(mem == nullptr || mem == m, "several memory states");
- mem = m;
- break;
- } else {
- assert(c != c->in(0), "");
- c = c->in(0);
- }
- }
- }
- }
- } while (stack.size() > 0);
- assert(mem != nullptr, "should have found safepoint");
- } else {
- mem = phi_mem;
- }
- return mem;
-}
-
-Node* MemoryGraphFixer::get_ctrl(Node* n) const {
- Node* c = _phase->get_ctrl(n);
- if (n->is_Proj() && n->in(0) != nullptr && n->in(0)->is_Call()) {
- assert(c == n->in(0), "");
- CallNode* call = c->as_Call();
- CallProjections projs;
- call->extract_projections(&projs, true, false);
- if (projs.catchall_memproj != nullptr) {
- if (projs.fallthrough_memproj == n) {
- c = projs.fallthrough_catchproj;
- } else {
- assert(projs.catchall_memproj == n, "");
- c = projs.catchall_catchproj;
- }
- }
- }
- return c;
-}
-
-Node* MemoryGraphFixer::ctrl_or_self(Node* n) const {
- if (_phase->has_ctrl(n))
- return get_ctrl(n);
- else {
- assert (n->is_CFG(), "must be a CFG node");
- return n;
- }
-}
-
-bool MemoryGraphFixer::mem_is_valid(Node* m, Node* c) const {
- return m != nullptr && get_ctrl(m) == c;
-}
-
-Node* MemoryGraphFixer::find_mem(Node* ctrl, Node* n) const {
- assert(n == nullptr || _phase->ctrl_or_self(n) == ctrl, "");
- assert(!ctrl->is_Call() || ctrl == n, "projection expected");
-#ifdef ASSERT
- if ((ctrl->is_Proj() && ctrl->in(0)->is_Call()) ||
- (ctrl->is_Catch() && ctrl->in(0)->in(0)->is_Call())) {
- CallNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_Call() : ctrl->in(0)->in(0)->as_Call();
- int mems = 0;
- for (DUIterator_Fast imax, i = call->fast_outs(imax); i < imax; i++) {
- Node* u = call->fast_out(i);
- if (u->bottom_type() == Type::MEMORY) {
- mems++;
- }
- }
- assert(mems <= 1, "No node right after call if multiple mem projections");
- }
-#endif
- Node* mem = _memory_nodes[ctrl->_idx];
- Node* c = ctrl;
- while (!mem_is_valid(mem, c) &&
- (!c->is_CatchProj() || mem == nullptr || c->in(0)->in(0)->in(0) != get_ctrl(mem))) {
- c = _phase->idom(c);
- mem = _memory_nodes[c->_idx];
- }
- if (n != nullptr && mem_is_valid(mem, c)) {
- while (!ShenandoahBarrierC2Support::is_dominator_same_ctrl(c, mem, n, _phase) && _phase->ctrl_or_self(mem) == ctrl) {
- mem = next_mem(mem, _alias);
- }
- if (mem->is_MergeMem()) {
- mem = mem->as_MergeMem()->memory_at(_alias);
- }
- if (!mem_is_valid(mem, c)) {
- do {
- c = _phase->idom(c);
- mem = _memory_nodes[c->_idx];
- } while (!mem_is_valid(mem, c) &&
- (!c->is_CatchProj() || mem == nullptr || c->in(0)->in(0)->in(0) != get_ctrl(mem)));
- }
- }
- assert(mem->bottom_type() == Type::MEMORY, "");
- return mem;
-}
-
-bool MemoryGraphFixer::has_mem_phi(Node* region) const {
- for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
- Node* use = region->fast_out(i);
- if (use->is_Phi() && use->bottom_type() == Type::MEMORY &&
- (_phase->C->get_alias_index(use->adr_type()) == _alias)) {
- return true;
- }
- }
- return false;
-}
-
-void MemoryGraphFixer::fix_mem(Node* ctrl, Node* new_ctrl, Node* mem, Node* mem_for_ctrl, Node* new_mem, Unique_Node_List& uses) {
- assert(_phase->ctrl_or_self(new_mem) == new_ctrl, "");
- const bool trace = false;
- DEBUG_ONLY(if (trace) { tty->print("ZZZ control is"); ctrl->dump(); });
- DEBUG_ONLY(if (trace) { tty->print("ZZZ mem is"); mem->dump(); });
- GrowableArray phis;
- if (mem_for_ctrl != mem) {
- Node* old = mem_for_ctrl;
- Node* prev = nullptr;
- while (old != mem) {
- prev = old;
- if (old->is_Store() || old->is_ClearArray() || old->is_LoadStore()) {
- assert(_alias == Compile::AliasIdxRaw, "");
- old = old->in(MemNode::Memory);
- } else if (old->Opcode() == Op_SCMemProj) {
- assert(_alias == Compile::AliasIdxRaw, "");
- old = old->in(0);
- } else {
- ShouldNotReachHere();
- }
- }
- assert(prev != nullptr, "");
- if (new_ctrl != ctrl) {
- _memory_nodes.map(ctrl->_idx, mem);
- _memory_nodes.map(new_ctrl->_idx, mem_for_ctrl);
- }
- uint input = (uint)MemNode::Memory;
- _phase->igvn().replace_input_of(prev, input, new_mem);
- } else {
- uses.clear();
- _memory_nodes.map(new_ctrl->_idx, new_mem);
- uses.push(new_ctrl);
- for(uint next = 0; next < uses.size(); next++ ) {
- Node *n = uses.at(next);
- assert(n->is_CFG(), "");
- DEBUG_ONLY(if (trace) { tty->print("ZZZ ctrl"); n->dump(); });
- for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
- Node* u = n->fast_out(i);
- if (!u->is_Root() && u->is_CFG() && u != n) {
- Node* m = _memory_nodes[u->_idx];
- if (u->is_Region() && (!u->is_OuterStripMinedLoop() || _include_lsm) &&
- !has_mem_phi(u) &&
- u->unique_ctrl_out()->Opcode() != Op_Halt) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ region"); u->dump(); });
- DEBUG_ONLY(if (trace && m != nullptr) { tty->print("ZZZ mem"); m->dump(); });
-
- if (!mem_is_valid(m, u) || !m->is_Phi()) {
- bool push = true;
- bool create_phi = true;
- if (_phase->is_dominator(new_ctrl, u)) {
- create_phi = false;
- }
- if (create_phi) {
- Node* phi = new PhiNode(u, Type::MEMORY, _phase->C->get_adr_type(_alias));
- _phase->register_new_node(phi, u);
- phis.push(phi);
- DEBUG_ONLY(if (trace) { tty->print("ZZZ new phi"); phi->dump(); });
- if (!mem_is_valid(m, u)) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting mem"); phi->dump(); });
- _memory_nodes.map(u->_idx, phi);
- } else {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ NOT setting mem"); m->dump(); });
- for (;;) {
- assert(m->is_Mem() || m->is_LoadStore() || m->is_Proj(), "");
- Node* next = nullptr;
- if (m->is_Proj()) {
- next = m->in(0);
- } else {
- assert(m->is_Mem() || m->is_LoadStore(), "");
- assert(_alias == Compile::AliasIdxRaw, "");
- next = m->in(MemNode::Memory);
- }
- if (_phase->get_ctrl(next) != u) {
- break;
- }
- if (next->is_MergeMem()) {
- assert(_phase->get_ctrl(next->as_MergeMem()->memory_at(_alias)) != u, "");
- break;
- }
- if (next->is_Phi()) {
- assert(next->adr_type() == TypePtr::BOTTOM && next->in(0) == u, "");
- break;
- }
- m = next;
- }
-
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting to phi"); m->dump(); });
- assert(m->is_Mem() || m->is_LoadStore(), "");
- uint input = (uint)MemNode::Memory;
- _phase->igvn().replace_input_of(m, input, phi);
- push = false;
- }
- } else {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ skipping region"); u->dump(); });
- }
- if (push) {
- uses.push(u);
- }
- }
- } else if (!mem_is_valid(m, u) &&
- !(u->Opcode() == Op_CProj && u->in(0)->is_NeverBranch() && u->as_Proj()->_con == 1)) {
- uses.push(u);
- }
- }
- }
- }
- for (int i = 0; i < phis.length(); i++) {
- Node* n = phis.at(i);
- Node* r = n->in(0);
- DEBUG_ONLY(if (trace) { tty->print("ZZZ fixing new phi"); n->dump(); });
- for (uint j = 1; j < n->req(); j++) {
- Node* m = find_mem(r->in(j), nullptr);
- _phase->igvn().replace_input_of(n, j, m);
- DEBUG_ONLY(if (trace) { tty->print("ZZZ fixing new phi: %d", j); m->dump(); });
- }
- }
- }
- uint last = _phase->C->unique();
- MergeMemNode* mm = nullptr;
- int alias = _alias;
- DEBUG_ONLY(if (trace) { tty->print("ZZZ raw mem is"); mem->dump(); });
- // Process loads first to not miss an anti-dependency: if the memory
- // edge of a store is updated before a load is processed then an
- // anti-dependency may be missed.
- for (DUIterator i = mem->outs(); mem->has_out(i); i++) {
- Node* u = mem->out(i);
- if (u->_idx < last && u->is_Load() && _phase->C->get_alias_index(u->adr_type()) == alias) {
- Node* m = find_mem(_phase->get_ctrl(u), u);
- if (m != mem) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
- _phase->igvn().replace_input_of(u, MemNode::Memory, m);
- --i;
- }
- }
- }
- for (DUIterator i = mem->outs(); mem->has_out(i); i++) {
- Node* u = mem->out(i);
- if (u->_idx < last) {
- if (u->is_Mem()) {
- if (_phase->C->get_alias_index(u->adr_type()) == alias) {
- Node* m = find_mem(_phase->get_ctrl(u), u);
- if (m != mem) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
- _phase->igvn().replace_input_of(u, MemNode::Memory, m);
- --i;
- }
- }
- } else if (u->is_MergeMem()) {
- MergeMemNode* u_mm = u->as_MergeMem();
- if (u_mm->memory_at(alias) == mem) {
- MergeMemNode* newmm = nullptr;
- for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) {
- Node* uu = u->fast_out(j);
- assert(!uu->is_MergeMem(), "chain of MergeMems?");
- if (uu->is_Phi()) {
- assert(uu->adr_type() == TypePtr::BOTTOM, "");
- Node* region = uu->in(0);
- int nb = 0;
- for (uint k = 1; k < uu->req(); k++) {
- if (uu->in(k) == u) {
- Node* m = find_mem(region->in(k), nullptr);
- if (m != mem) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", k); uu->dump(); });
- newmm = clone_merge_mem(u, mem, m, _phase->ctrl_or_self(m), i);
- if (newmm != u) {
- _phase->igvn().replace_input_of(uu, k, newmm);
- nb++;
- --jmax;
- }
- }
- }
- }
- if (nb > 0) {
- --j;
- }
- } else {
- Node* m = find_mem(_phase->ctrl_or_self(uu), uu);
- if (m != mem) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); uu->dump(); });
- newmm = clone_merge_mem(u, mem, m, _phase->ctrl_or_self(m), i);
- if (newmm != u) {
- _phase->igvn().replace_input_of(uu, uu->find_edge(u), newmm);
- --j, --jmax;
- }
- }
- }
- }
- }
- } else if (u->is_Phi()) {
- assert(u->bottom_type() == Type::MEMORY, "what else?");
- if (_phase->C->get_alias_index(u->adr_type()) == alias || u->adr_type() == TypePtr::BOTTOM) {
- Node* region = u->in(0);
- bool replaced = false;
- for (uint j = 1; j < u->req(); j++) {
- if (u->in(j) == mem) {
- Node* m = find_mem(region->in(j), nullptr);
- Node* nnew = m;
- if (m != mem) {
- if (u->adr_type() == TypePtr::BOTTOM) {
- mm = allocate_merge_mem(mem, m, _phase->ctrl_or_self(m));
- nnew = mm;
- }
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", j); u->dump(); });
- _phase->igvn().replace_input_of(u, j, nnew);
- replaced = true;
- }
- }
- }
- if (replaced) {
- --i;
- }
- }
- } else if ((u->adr_type() == TypePtr::BOTTOM && u->Opcode() != Op_StrInflatedCopy) ||
- u->adr_type() == nullptr) {
- assert(u->adr_type() != nullptr ||
- u->Opcode() == Op_Rethrow ||
- u->Opcode() == Op_Return ||
- u->Opcode() == Op_SafePoint ||
- (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) ||
- (u->is_CallStaticJava() && u->as_CallStaticJava()->_entry_point == OptoRuntime::rethrow_stub()) ||
- u->Opcode() == Op_CallLeaf, "");
- Node* m = find_mem(_phase->ctrl_or_self(u), u);
- if (m != mem) {
- mm = allocate_merge_mem(mem, m, _phase->get_ctrl(m));
- _phase->igvn().replace_input_of(u, u->find_edge(mem), mm);
- --i;
- }
- } else if (_phase->C->get_alias_index(u->adr_type()) == alias) {
- Node* m = find_mem(_phase->ctrl_or_self(u), u);
- if (m != mem) {
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
- _phase->igvn().replace_input_of(u, u->find_edge(mem), m);
- --i;
- }
- } else if (u->adr_type() != TypePtr::BOTTOM &&
- _memory_nodes[_phase->ctrl_or_self(u)->_idx] == u) {
- Node* m = find_mem(_phase->ctrl_or_self(u), u);
- assert(m != mem, "");
- // u is on the wrong slice...
- assert(u->is_ClearArray(), "");
- DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
- _phase->igvn().replace_input_of(u, u->find_edge(mem), m);
- --i;
- }
- }
- }
-#ifdef ASSERT
- assert(new_mem->outcnt() > 0, "");
- for (int i = 0; i < phis.length(); i++) {
- Node* n = phis.at(i);
- assert(n->outcnt() > 0, "new phi must have uses now");
- }
-#endif
-}
-
-void MemoryGraphFixer::record_new_ctrl(Node* ctrl, Node* new_ctrl, Node* mem, Node* mem_for_ctrl) {
- if (mem_for_ctrl != mem && new_ctrl != ctrl) {
- _memory_nodes.map(ctrl->_idx, mem);
- _memory_nodes.map(new_ctrl->_idx, mem_for_ctrl);
- }
-}
-
-MergeMemNode* MemoryGraphFixer::allocate_merge_mem(Node* mem, Node* rep_proj, Node* rep_ctrl) const {
- MergeMemNode* mm = MergeMemNode::make(mem);
- mm->set_memory_at(_alias, rep_proj);
- _phase->register_new_node(mm, rep_ctrl);
- return mm;
-}
-
-MergeMemNode* MemoryGraphFixer::clone_merge_mem(Node* u, Node* mem, Node* rep_proj, Node* rep_ctrl, DUIterator& i) const {
- MergeMemNode* newmm = nullptr;
- MergeMemNode* u_mm = u->as_MergeMem();
- Node* c = _phase->get_ctrl(u);
- if (_phase->is_dominator(c, rep_ctrl)) {
- c = rep_ctrl;
- } else {
- assert(_phase->is_dominator(rep_ctrl, c), "one must dominate the other");
- }
- if (u->outcnt() == 1) {
- if (u->req() > (uint)_alias && u->in(_alias) == mem) {
- _phase->igvn().replace_input_of(u, _alias, rep_proj);
- --i;
- } else {
- _phase->igvn().rehash_node_delayed(u);
- u_mm->set_memory_at(_alias, rep_proj);
- }
- newmm = u_mm;
- _phase->set_ctrl_and_loop(u, c);
- } else {
- // can't simply clone u and then change one of its input because
- // it adds and then removes an edge which messes with the
- // DUIterator
- newmm = MergeMemNode::make(u_mm->base_memory());
- for (uint j = 0; j < u->req(); j++) {
- if (j < newmm->req()) {
- if (j == (uint)_alias) {
- newmm->set_req(j, rep_proj);
- } else if (newmm->in(j) != u->in(j)) {
- newmm->set_req(j, u->in(j));
- }
- } else if (j == (uint)_alias) {
- newmm->add_req(rep_proj);
- } else {
- newmm->add_req(u->in(j));
- }
- }
- if ((uint)_alias >= u->req()) {
- newmm->set_memory_at(_alias, rep_proj);
- }
- _phase->register_new_node(newmm, c);
- }
- return newmm;
-}
-
-bool MemoryGraphFixer::should_process_phi(Node* phi) const {
- if (phi->adr_type() == TypePtr::BOTTOM) {
- Node* region = phi->in(0);
- for (DUIterator_Fast jmax, j = region->fast_outs(jmax); j < jmax; j++) {
- Node* uu = region->fast_out(j);
- if (uu->is_Phi() && uu != phi && uu->bottom_type() == Type::MEMORY && _phase->C->get_alias_index(uu->adr_type()) == _alias) {
- return false;
- }
- }
- return true;
- }
- return _phase->C->get_alias_index(phi->adr_type()) == _alias;
-}
-
-void MemoryGraphFixer::fix_memory_uses(Node* mem, Node* replacement, Node* rep_proj, Node* rep_ctrl) const {
- uint last = _phase-> C->unique();
- MergeMemNode* mm = nullptr;
- assert(mem->bottom_type() == Type::MEMORY, "");
- for (DUIterator i = mem->outs(); mem->has_out(i); i++) {
- Node* u = mem->out(i);
- if (u != replacement && u->_idx < last) {
- if (u->is_MergeMem()) {
- MergeMemNode* u_mm = u->as_MergeMem();
- if (u_mm->memory_at(_alias) == mem) {
- MergeMemNode* newmm = nullptr;
- for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) {
- Node* uu = u->fast_out(j);
- assert(!uu->is_MergeMem(), "chain of MergeMems?");
- if (uu->is_Phi()) {
- if (should_process_phi(uu)) {
- Node* region = uu->in(0);
- int nb = 0;
- for (uint k = 1; k < uu->req(); k++) {
- if (uu->in(k) == u && _phase->is_dominator(rep_ctrl, region->in(k))) {
- if (newmm == nullptr) {
- newmm = clone_merge_mem(u, mem, rep_proj, rep_ctrl, i);
- }
- if (newmm != u) {
- _phase->igvn().replace_input_of(uu, k, newmm);
- nb++;
- --jmax;
- }
- }
- }
- if (nb > 0) {
- --j;
- }
- }
- } else {
- if (rep_ctrl != uu && ShenandoahBarrierC2Support::is_dominator(rep_ctrl, _phase->ctrl_or_self(uu), replacement, uu, _phase)) {
- if (newmm == nullptr) {
- newmm = clone_merge_mem(u, mem, rep_proj, rep_ctrl, i);
- }
- if (newmm != u) {
- _phase->igvn().replace_input_of(uu, uu->find_edge(u), newmm);
- --j, --jmax;
- }
- }
- }
- }
- }
- } else if (u->is_Phi()) {
- assert(u->bottom_type() == Type::MEMORY, "what else?");
- Node* region = u->in(0);
- if (should_process_phi(u)) {
- bool replaced = false;
- for (uint j = 1; j < u->req(); j++) {
- if (u->in(j) == mem && _phase->is_dominator(rep_ctrl, region->in(j))) {
- Node* nnew = rep_proj;
- if (u->adr_type() == TypePtr::BOTTOM) {
- if (mm == nullptr) {
- mm = allocate_merge_mem(mem, rep_proj, rep_ctrl);
- }
- nnew = mm;
- }
- _phase->igvn().replace_input_of(u, j, nnew);
- replaced = true;
- }
- }
- if (replaced) {
- --i;
- }
-
- }
- } else if ((u->adr_type() == TypePtr::BOTTOM && u->Opcode() != Op_StrInflatedCopy) ||
- u->adr_type() == nullptr) {
- assert(u->adr_type() != nullptr ||
- u->Opcode() == Op_Rethrow ||
- u->Opcode() == Op_Return ||
- u->Opcode() == Op_SafePoint ||
- (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) ||
- (u->is_CallStaticJava() && u->as_CallStaticJava()->_entry_point == OptoRuntime::rethrow_stub()) ||
- u->Opcode() == Op_CallLeaf, "%s", u->Name());
- if (ShenandoahBarrierC2Support::is_dominator(rep_ctrl, _phase->ctrl_or_self(u), replacement, u, _phase)) {
- if (mm == nullptr) {
- mm = allocate_merge_mem(mem, rep_proj, rep_ctrl);
- }
- _phase->igvn().replace_input_of(u, u->find_edge(mem), mm);
- --i;
- }
- } else if (_phase->C->get_alias_index(u->adr_type()) == _alias) {
- if (ShenandoahBarrierC2Support::is_dominator(rep_ctrl, _phase->ctrl_or_self(u), replacement, u, _phase)) {
- _phase->igvn().replace_input_of(u, u->find_edge(mem), rep_proj);
- --i;
- }
- }
- }
- }
-}
-
-ShenandoahLoadReferenceBarrierNode::ShenandoahLoadReferenceBarrierNode(Node* ctrl, Node* obj, DecoratorSet decorators)
-: Node(ctrl, obj), _decorators(decorators) {
- ShenandoahBarrierSetC2::bsc2()->state()->add_load_reference_barrier(this);
-}
-
-DecoratorSet ShenandoahLoadReferenceBarrierNode::decorators() const {
- return _decorators;
-}
-
-uint ShenandoahLoadReferenceBarrierNode::size_of() const {
- return sizeof(*this);
-}
-
-static DecoratorSet mask_decorators(DecoratorSet decorators) {
- return decorators & (ON_STRONG_OOP_REF | ON_WEAK_OOP_REF | ON_PHANTOM_OOP_REF | ON_UNKNOWN_OOP_REF | IN_NATIVE);
-}
-
-uint ShenandoahLoadReferenceBarrierNode::hash() const {
- uint hash = Node::hash();
- hash += mask_decorators(_decorators);
- return hash;
-}
-
-bool ShenandoahLoadReferenceBarrierNode::cmp( const Node &n ) const {
- return Node::cmp(n) && n.Opcode() == Op_ShenandoahLoadReferenceBarrier &&
- mask_decorators(_decorators) == mask_decorators(((const ShenandoahLoadReferenceBarrierNode&)n)._decorators);
-}
-
-const Type* ShenandoahLoadReferenceBarrierNode::bottom_type() const {
- if (in(ValueIn) == nullptr || in(ValueIn)->is_top()) {
- return Type::TOP;
- }
- const Type* t = in(ValueIn)->bottom_type();
- if (t == TypePtr::NULL_PTR) {
- return t;
- }
-
- if (ShenandoahBarrierSet::is_strong_access(decorators())) {
- return t;
- }
-
- return t->meet(TypePtr::NULL_PTR);
-}
-
-const Type* ShenandoahLoadReferenceBarrierNode::Value(PhaseGVN* phase) const {
- // Either input is TOP ==> the result is TOP
- const Type *t2 = phase->type(in(ValueIn));
- if( t2 == Type::TOP ) return Type::TOP;
-
- if (t2 == TypePtr::NULL_PTR) {
- return t2;
- }
-
- if (ShenandoahBarrierSet::is_strong_access(decorators())) {
- return t2;
- }
-
- return t2->meet(TypePtr::NULL_PTR);
-}
-
-Node* ShenandoahLoadReferenceBarrierNode::Identity(PhaseGVN* phase) {
- Node* value = in(ValueIn);
- if (!needs_barrier(phase, value)) {
- return value;
- }
- return this;
-}
-
-bool ShenandoahLoadReferenceBarrierNode::needs_barrier(PhaseGVN* phase, Node* n) {
- Unique_Node_List visited;
- return needs_barrier_impl(phase, n, visited);
-}
-
-bool ShenandoahLoadReferenceBarrierNode::needs_barrier_impl(PhaseGVN* phase, Node* n, Unique_Node_List &visited) {
- if (n == nullptr) return false;
- if (visited.member(n)) {
- return false; // Been there.
- }
- visited.push(n);
-
- if (n->is_Allocate()) {
- // tty->print_cr("optimize barrier on alloc");
- return false;
- }
- if (n->is_Call()) {
- // tty->print_cr("optimize barrier on call");
- return false;
- }
-
- const Type* type = phase->type(n);
- if (type == Type::TOP) {
- return false;
- }
- if (type->make_ptr()->higher_equal(TypePtr::NULL_PTR)) {
- // tty->print_cr("optimize barrier on null");
- return false;
- }
- if (type->make_oopptr() && type->make_oopptr()->const_oop() != nullptr) {
- // tty->print_cr("optimize barrier on constant");
- return false;
- }
-
- switch (n->Opcode()) {
- case Op_AddP:
- return true; // TODO: Can refine?
- case Op_LoadP:
- case Op_ShenandoahCompareAndExchangeN:
- case Op_ShenandoahCompareAndExchangeP:
- case Op_CompareAndExchangeN:
- case Op_CompareAndExchangeP:
- case Op_GetAndSetN:
- case Op_GetAndSetP:
- return true;
- case Op_Phi: {
- for (uint i = 1; i < n->req(); i++) {
- if (needs_barrier_impl(phase, n->in(i), visited)) return true;
- }
- return false;
- }
- case Op_CheckCastPP:
- case Op_CastPP:
- return needs_barrier_impl(phase, n->in(1), visited);
- case Op_Proj:
- return needs_barrier_impl(phase, n->in(0), visited);
- case Op_ShenandoahLoadReferenceBarrier:
- // tty->print_cr("optimize barrier on barrier");
- return false;
- case Op_Parm:
- // tty->print_cr("optimize barrier on input arg");
- return false;
- case Op_DecodeN:
- case Op_EncodeP:
- return needs_barrier_impl(phase, n->in(1), visited);
- case Op_LoadN:
- return true;
- case Op_CMoveN:
- case Op_CMoveP:
- return needs_barrier_impl(phase, n->in(2), visited) ||
- needs_barrier_impl(phase, n->in(3), visited);
- case Op_CreateEx:
- return false;
- default:
- break;
- }
-#ifdef ASSERT
- tty->print("need barrier on?: ");
- tty->print_cr("ins:");
- n->dump(2);
- tty->print_cr("outs:");
- n->dump(-2);
- ShouldNotReachHere();
-#endif
- return true;
-}
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp
deleted file mode 100644
index 63e8412a307..00000000000
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2015, 2021, Red Hat, Inc. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_SHENANDOAH_C2_SHENANDOAHSUPPORT_HPP
-#define SHARE_GC_SHENANDOAH_C2_SHENANDOAHSUPPORT_HPP
-
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-#include "memory/allocation.hpp"
-#include "opto/addnode.hpp"
-#include "opto/graphKit.hpp"
-#include "opto/machnode.hpp"
-#include "opto/memnode.hpp"
-#include "opto/multnode.hpp"
-#include "opto/node.hpp"
-
-class PhaseGVN;
-class MemoryGraphFixer;
-
-class ShenandoahBarrierC2Support : public AllStatic {
-private:
-#ifdef ASSERT
- enum verify_type {
- ShenandoahLoad,
- ShenandoahStore,
- ShenandoahValue,
- ShenandoahOopStore,
- ShenandoahNone
- };
-
- static bool verify_helper(Node* in, Node_Stack& phis, VectorSet& visited, verify_type t, bool trace, Unique_Node_List& barriers_used);
- static void report_verify_failure(const char* msg, Node* n1 = nullptr, Node* n2 = nullptr);
-#endif
- static Node* dom_mem(Node* mem, Node* ctrl, int alias, Node*& mem_ctrl, PhaseIdealLoop* phase);
- static Node* no_branches(Node* c, Node* dom, bool allow_one_proj, PhaseIdealLoop* phase);
- static bool is_gc_state_test(Node* iff, int mask);
- static bool has_safepoint_between(Node* start, Node* stop, PhaseIdealLoop *phase);
- static Node* find_bottom_mem(Node* ctrl, PhaseIdealLoop* phase);
- static void follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses, PhaseIdealLoop* phase);
- static void test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase);
- static void test_gc_state(Node*& ctrl, Node* raw_mem, Node*& heap_stable_ctrl,
- PhaseIdealLoop* phase, int flags);
- static void call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr,
- DecoratorSet decorators, PhaseIdealLoop* phase);
-
- static void collect_nodes_above_barrier(Unique_Node_List &nodes_above_barrier, PhaseIdealLoop* phase, Node* ctrl,
- Node* init_raw_mem);
-
- static void test_in_cset(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase);
- static void fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& nodes_above_barrier, uint last, PhaseIdealLoop* phase);
-
- static Node* get_load_addr(PhaseIdealLoop* phase, VectorSet& visited, Node* lrb);
-public:
- static bool is_dominator(Node* d_c, Node* n_c, Node* d, Node* n, PhaseIdealLoop* phase);
- static bool is_dominator_same_ctrl(Node* c, Node* d, Node* n, PhaseIdealLoop* phase);
-
- static bool is_gc_state_load(Node* n);
- static bool is_heap_stable_test(Node* iff);
-
- static bool expand(Compile* C, PhaseIterGVN& igvn);
- static void pin_and_expand(PhaseIdealLoop* phase);
-
- static void push_data_inputs_at_control(PhaseIdealLoop* phase, Node* n, Node* ctrl,
- Unique_Node_List &wq);
- static bool is_anti_dependent_load_at_control(PhaseIdealLoop* phase, Node* maybe_load, Node* store, Node* control);
-
- static void maybe_push_anti_dependent_loads(PhaseIdealLoop* phase, Node* maybe_store, Node* control, Unique_Node_List &wq);
-#ifdef ASSERT
- static void verify(RootNode* root);
-#endif
-};
-
-class MemoryGraphFixer : public ResourceObj {
-private:
- Node_List _memory_nodes;
- int _alias;
- PhaseIdealLoop* _phase;
- bool _include_lsm;
-
- void collect_memory_nodes();
- Node* get_ctrl(Node* n) const;
- Node* ctrl_or_self(Node* n) const;
- bool mem_is_valid(Node* m, Node* c) const;
- MergeMemNode* allocate_merge_mem(Node* mem, Node* rep_proj, Node* rep_ctrl) const;
- MergeMemNode* clone_merge_mem(Node* u, Node* mem, Node* rep_proj, Node* rep_ctrl, DUIterator& i) const;
- void fix_memory_uses(Node* mem, Node* replacement, Node* rep_proj, Node* rep_ctrl) const;
- bool should_process_phi(Node* phi) const;
- bool has_mem_phi(Node* region) const;
-
-public:
- MemoryGraphFixer(int alias, bool include_lsm, PhaseIdealLoop* phase) :
- _alias(alias), _phase(phase), _include_lsm(include_lsm) {
- assert(_alias != Compile::AliasIdxBot, "unsupported");
- collect_memory_nodes();
- }
-
- Node* find_mem(Node* ctrl, Node* n) const;
- void fix_mem(Node* ctrl, Node* region, Node* mem, Node* mem_for_ctrl, Node* mem_phi, Unique_Node_List& uses);
- int alias() const { return _alias; }
-
- Node* collect_memory_for_infinite_loop(const Node* in);
-
- void record_new_ctrl(Node* ctrl, Node* region, Node* mem, Node* mem_for_ctrl);
-};
-
-class ShenandoahCompareAndSwapPNode : public CompareAndSwapPNode {
-public:
- ShenandoahCompareAndSwapPNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord)
- : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
-
- virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
- if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypePtr::NULL_PTR) {
- return new CompareAndSwapPNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order());
- }
- return nullptr;
- }
-
- virtual int Opcode() const;
-};
-
-class ShenandoahCompareAndSwapNNode : public CompareAndSwapNNode {
-public:
- ShenandoahCompareAndSwapNNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord)
- : CompareAndSwapNNode(c, mem, adr, val, ex, mem_ord) { }
-
- virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
- if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypeNarrowOop::NULL_PTR) {
- return new CompareAndSwapNNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order());
- }
- return nullptr;
- }
-
- virtual int Opcode() const;
-};
-
-class ShenandoahWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode {
-public:
- ShenandoahWeakCompareAndSwapPNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord)
- : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
-
- virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
- if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypePtr::NULL_PTR) {
- return new WeakCompareAndSwapPNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order());
- }
- return nullptr;
- }
-
- virtual int Opcode() const;
-};
-
-class ShenandoahWeakCompareAndSwapNNode : public WeakCompareAndSwapNNode {
-public:
- ShenandoahWeakCompareAndSwapNNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord)
- : WeakCompareAndSwapNNode(c, mem, adr, val, ex, mem_ord) { }
-
- virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
- if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypeNarrowOop::NULL_PTR) {
- return new WeakCompareAndSwapNNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), order());
- }
- return nullptr;
- }
-
- virtual int Opcode() const;
-};
-
-class ShenandoahCompareAndExchangePNode : public CompareAndExchangePNode {
-public:
- ShenandoahCompareAndExchangePNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord)
- : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { }
-
- virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
- if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypePtr::NULL_PTR) {
- return new CompareAndExchangePNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), adr_type(), bottom_type(), order());
- }
- return nullptr;
- }
-
- virtual int Opcode() const;
-};
-
-class ShenandoahCompareAndExchangeNNode : public CompareAndExchangeNNode {
-public:
- ShenandoahCompareAndExchangeNNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord)
- : CompareAndExchangeNNode(c, mem, adr, val, ex, at, t, mem_ord) { }
-
- virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
- if (in(ExpectedIn) != nullptr && phase->type(in(ExpectedIn)) == TypeNarrowOop::NULL_PTR) {
- return new CompareAndExchangeNNode(in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), in(MemNode::ValueIn), in(ExpectedIn), adr_type(), bottom_type(), order());
- }
- return nullptr;
- }
-
- virtual int Opcode() const;
-};
-
-class ShenandoahLoadReferenceBarrierNode : public Node {
-public:
- enum {
- Control,
- ValueIn
- };
-
-private:
- DecoratorSet _decorators;
-
-public:
- ShenandoahLoadReferenceBarrierNode(Node* ctrl, Node* val, DecoratorSet decorators);
-
- DecoratorSet decorators() const;
- virtual int Opcode() const;
- virtual const Type* bottom_type() const;
- virtual const Type* Value(PhaseGVN* phase) const;
- virtual const class TypePtr *adr_type() const { return TypeOopPtr::BOTTOM; }
- virtual uint match_edge(uint idx) const {
- return idx >= ValueIn;
- }
- virtual uint ideal_reg() const { return Op_RegP; }
-
- virtual Node* Identity(PhaseGVN* phase);
-
- virtual uint size_of() const;
- virtual uint hash() const;
- virtual bool cmp( const Node &n ) const;
-
-private:
- bool needs_barrier(PhaseGVN* phase, Node* n);
- bool needs_barrier_impl(PhaseGVN* phase, Node* n, Unique_Node_List &visited);
-};
-
-
-#endif // SHARE_GC_SHENANDOAH_C2_SHENANDOAHSUPPORT_HPP
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp
index ce74e8cf199..11ca6ff3e90 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,31 +24,27 @@
*
*/
-
-#include "gc/shared/gcCause.hpp"
#include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
#include "gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.inline.hpp"
#include "gc/shenandoah/shenandoahCollectionSet.hpp"
-#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
#include "gc/shenandoah/shenandoahHeap.inline.hpp"
-#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
#include "logging/log.hpp"
#include "logging/logTag.hpp"
-#include "runtime/globals.hpp"
+#include "utilities/globalDefinitions.hpp"
#include "utilities/quickSort.hpp"
-// These constants are used to adjust the margin of error for the moving
-// average of the allocation rate and cycle time. The units are standard
-// deviations.
-const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;
-const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;
+#include
+
+#define PROPERFMT_F "%.1f %s"
+#define PROPERFMT_F_ARGS(s) byte_size_in_proper_unit(s), proper_unit_for_byte_size(s)
// These are used to decide if we want to make any adjustments at all
// at the end of a successful concurrent cycle.
-const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
-const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
+constexpr double LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
+constexpr double HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
// These values are the confidence interval expressed as standard deviations.
// At the minimum confidence level, there is a 25% chance that the true value of
@@ -57,66 +53,15 @@ const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0
// MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance
// that the true value of our estimate is outside the interval. These are used
// as bounds on the adjustments applied at the outcome of a GC cycle.
-const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
-const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
-
-
-// To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization,
-// for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history
-// allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor.
-const size_t ShenandoahAdaptiveHeuristics::GC_TIME_SAMPLE_SIZE = 3;
-
-// We also keep separate track of recently sampled allocation rates for two purposes:
-// 1. The number of samples examined to determine acceleration of allocation is represented by
-// ShenandoahRateAccelerationSampleSize
-// 2. The number of most recent samples averaged to determine a momentary allocation spike is represented by
-// ShenandoahMomentaryAllocationRateSpikeSampleSize
-
-// Allocation rates are sampled by the regulator thread, which typically runs every ms. There may be jitter in the scheduling
-// of the regulator thread. To reduce signal noise and synchronization overhead, we do not sample allocation rate with every
-// iteration of the regulator. We prefer sample time longer than 1 ms so that there can be a statistically significant number
-// of allocations occuring within each sample period. The regulator thread samples allocation rate only if at least
-// ShenandoahAccelerationSamplePeriod ms have passed since it previously sampled the allocation rate.
-//
-// This trigger responds much more quickly than the traditional trigger, which monitors 100 ms spans. When acceleration is
-// detected, the impact of acceleration on anticipated consumption of available memory is also much more impactful
-// than the assumed constant allocation rate consumption of available memory.
+constexpr double MINIMUM_CONFIDENCE = 0.319; // 25%
+constexpr double MAXIMUM_CONFIDENCE = 3.291; // 99.9%
ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info) :
ShenandoahHeuristics(space_info),
_margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
- _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
_last_trigger(OTHER),
- _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor),
- _free_set(nullptr),
- _previous_acceleration_sample_timestamp(0.0),
- _gc_time_first_sample_index(0),
- _gc_time_num_samples(0),
- _gc_time_timestamps(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
- _gc_time_samples(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
- _gc_time_xy(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
- _gc_time_xx(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
- _gc_time_sum_of_timestamps(0),
- _gc_time_sum_of_samples(0),
- _gc_time_sum_of_xy(0),
- _gc_time_sum_of_xx(0),
- _gc_time_m(0.0),
- _gc_time_b(0.0),
- _gc_time_sd(0.0),
- _spike_acceleration_buffer_size(MAX2(ShenandoahRateAccelerationSampleSize, 1+ShenandoahMomentaryAllocationRateSpikeSampleSize)),
- _spike_acceleration_first_sample_index(0),
- _spike_acceleration_num_samples(0),
- _spike_acceleration_rate_samples(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)),
- _spike_acceleration_rate_timestamps(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)) {
- }
-
-ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {
- FREE_C_HEAP_ARRAY(_spike_acceleration_rate_samples);
- FREE_C_HEAP_ARRAY(_spike_acceleration_rate_timestamps);
- FREE_C_HEAP_ARRAY(_gc_time_timestamps);
- FREE_C_HEAP_ARRAY(_gc_time_samples);
- FREE_C_HEAP_ARRAY(_gc_time_xy);
- FREE_C_HEAP_ARRAY(_gc_time_xx);
+ _available(Moving_Average_Samples),
+ _headroom_adjustment(0) {
}
void ShenandoahAdaptiveHeuristics::initialize() {
@@ -125,7 +70,6 @@ void ShenandoahAdaptiveHeuristics::initialize() {
void ShenandoahAdaptiveHeuristics::post_initialize() {
ShenandoahHeuristics::post_initialize();
- _free_set = ShenandoahHeap::heap()->free_set();
assert(!ShenandoahHeap::heap()->mode()->is_generational(), "ShenandoahGenerationalHeuristics overrides this method");
compute_headroom_adjustment();
}
@@ -136,9 +80,9 @@ void ShenandoahAdaptiveHeuristics::compute_headroom_adjustment() {
// intend to finish GC before the amount of available memory is less than the allocation headroom. Headroom is the planned
// safety buffer to allow a small amount of additional allocation to take place in case we were overly optimistic in delaying
// our trigger.
- size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
- size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
- size_t penalties = capacity / 100 * _gc_time_penalties;
+ const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
+ const size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
+ const size_t penalties = capacity / 100 * _gc_time_penalties;
_headroom_adjustment = spike_headroom + penalties;
}
@@ -172,17 +116,14 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
// we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme,
// ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit.
- size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
- size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste);
- size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset;
- size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0);
+ const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
+ const size_t max_cset = shenandoah_safe_size_cast(1.0 * capacity / 100 * ShenandoahEvacReserve / ShenandoahEvacWaste);
+ const size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset;
+ const size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0);
- log_info(gc, ergo)("Adaptive CSet Selection. Target Free: %zu%s, Actual Free: "
- "%zu%s, Max Evacuation: %zu%s, Min Garbage: %zu%s",
- byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target),
- byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free),
- byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
- byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage));
+ log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " PROPERFMT ", Actual Free: " PROPERFMT
+ ", Max Evacuation: " PROPERFMT ", Min Garbage: " PROPERFMT ,
+ PROPERFMTARGS(free_target), PROPERFMTARGS(actual_free), PROPERFMTARGS(max_cset), PROPERFMTARGS(min_garbage));
// Better select garbage-first regions
QuickSort::sort(data, size, compare_by_garbage);
@@ -194,8 +135,8 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
for (size_t idx = 0; idx < size; idx++) {
ShenandoahHeapRegion* r = data[idx].get_region();
- size_t new_cset = cur_cset + r->get_live_data_bytes();
- size_t new_garbage = cur_garbage + r->garbage();
+ const size_t new_cset = cur_cset + r->get_live_data_bytes();
+ const size_t new_garbage = cur_garbage + r->garbage();
if (new_cset > max_cset) {
break;
@@ -210,122 +151,33 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
}
void ShenandoahAdaptiveHeuristics::add_degenerated_gc_time(double time_at_start, double gc_time) {
- // Conservatively add sample into linear model If this time is above the predicted concurrent gc time
- if (predict_gc_time(time_at_start) < gc_time) {
- add_gc_time(time_at_start, gc_time);
+ // Conservatively add sample into linear model, if this time is above the predicted concurrent gc time
+ if (_cycles.predict_duration(time_at_start, _margin_of_error_sd) < gc_time) {
+ _cycles.record_duration(time_at_start, gc_time);
}
}
-void ShenandoahAdaptiveHeuristics::add_gc_time(double time_at_start, double gc_time) {
- // Update best-fit linear predictor of GC time
- uint index = (_gc_time_first_sample_index + _gc_time_num_samples) % GC_TIME_SAMPLE_SIZE;
- if (_gc_time_num_samples == GC_TIME_SAMPLE_SIZE) {
- _gc_time_sum_of_timestamps -= _gc_time_timestamps[index];
- _gc_time_sum_of_samples -= _gc_time_samples[index];
- _gc_time_sum_of_xy -= _gc_time_xy[index];
- _gc_time_sum_of_xx -= _gc_time_xx[index];
- }
- _gc_time_timestamps[index] = time_at_start;
- _gc_time_samples[index] = gc_time;
- _gc_time_xy[index] = time_at_start * gc_time;
- _gc_time_xx[index] = time_at_start * time_at_start;
-
- _gc_time_sum_of_timestamps += _gc_time_timestamps[index];
- _gc_time_sum_of_samples += _gc_time_samples[index];
- _gc_time_sum_of_xy += _gc_time_xy[index];
- _gc_time_sum_of_xx += _gc_time_xx[index];
-
- if (_gc_time_num_samples < GC_TIME_SAMPLE_SIZE) {
- _gc_time_num_samples++;
- } else {
- _gc_time_first_sample_index = (_gc_time_first_sample_index + 1) % GC_TIME_SAMPLE_SIZE;
- }
-
- if (_gc_time_num_samples == 1) {
- // The predictor is constant (horizontal line)
- _gc_time_m = 0;
- _gc_time_b = gc_time;
- _gc_time_sd = 0.0;
- } else if (_gc_time_num_samples == 2) {
-
- assert(time_at_start > _gc_time_timestamps[_gc_time_first_sample_index],
- "Two GC cycles cannot finish at same time: %.6f vs %.6f, with GC times %.6f and %.6f", time_at_start,
- _gc_time_timestamps[_gc_time_first_sample_index], gc_time, _gc_time_samples[_gc_time_first_sample_index]);
-
- // Two points define a line
- double delta_x = time_at_start - _gc_time_timestamps[_gc_time_first_sample_index];
- double delta_y = gc_time - _gc_time_samples[_gc_time_first_sample_index];
- _gc_time_m = delta_y / delta_x;
- // y = mx + b
- // so b = y0 - mx0
- _gc_time_b = gc_time - _gc_time_m * time_at_start;
- _gc_time_sd = 0.0;
- } else {
- // Since timestamps are monotonically increasing, denominator does not equal zero.
- double denominator = _gc_time_num_samples * _gc_time_sum_of_xx - _gc_time_sum_of_timestamps * _gc_time_sum_of_timestamps;
- assert(denominator != 0.0, "Invariant: samples: %u, sum_of_xx: %.6f, sum_of_timestamps: %.6f",
- _gc_time_num_samples, _gc_time_sum_of_xx, _gc_time_sum_of_timestamps);
- _gc_time_m = ((_gc_time_num_samples * _gc_time_sum_of_xy - _gc_time_sum_of_timestamps * _gc_time_sum_of_samples) /
- denominator);
- _gc_time_b = (_gc_time_sum_of_samples - _gc_time_m * _gc_time_sum_of_timestamps) / _gc_time_num_samples;
- double sum_of_squared_deviations = 0.0;
- for (size_t i = 0; i < _gc_time_num_samples; i++) {
- uint index = (_gc_time_first_sample_index + i) % GC_TIME_SAMPLE_SIZE;
- double x = _gc_time_timestamps[index];
- double predicted_y = _gc_time_m * x + _gc_time_b;
- double deviation = predicted_y - _gc_time_samples[index];
- sum_of_squared_deviations += deviation * deviation;
- }
- _gc_time_sd = sqrt(sum_of_squared_deviations / _gc_time_num_samples);
- }
-}
-
-double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) {
- return _gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd;
-}
-
-void ShenandoahAdaptiveHeuristics::add_rate_to_acceleration_history(double timestamp, double rate) {
- uint new_sample_index =
- (_spike_acceleration_first_sample_index + _spike_acceleration_num_samples) % _spike_acceleration_buffer_size;
- _spike_acceleration_rate_timestamps[new_sample_index] = timestamp;
- _spike_acceleration_rate_samples[new_sample_index] = rate;
- if (_spike_acceleration_num_samples == _spike_acceleration_buffer_size) {
- _spike_acceleration_first_sample_index++;
- if (_spike_acceleration_first_sample_index == _spike_acceleration_buffer_size) {
- _spike_acceleration_first_sample_index = 0;
- }
- } else {
- _spike_acceleration_num_samples++;
- }
-}
-
-void ShenandoahAdaptiveHeuristics::record_cycle_start() {
- ShenandoahHeuristics::record_cycle_start();
- _allocation_rate.allocation_counter_reset();
-}
-
void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
ShenandoahHeuristics::record_success_concurrent();
- double now = os::elapsedTime();
- // Should we not add GC time if this was an abbreviated cycle?
- add_gc_time(_cycle_start, elapsed_cycle_time());
-
- size_t available = _space_info->available();
+ // We add this time even if it is a shortened cycle. There is a risk that this pulls
+ // the gc time trend down, but it is still a more accurate view than excluding times
+ // from shortened cycles. Suppose we did excluded shortened times, the risk would then
+ // be running the collector more often than necessary because it continues to believe
+ // the average cycle time is much higher than it otherwise would be.
+ _cycles.record_duration(_cycle_start, elapsed_cycle_time());
double z_score = 0.0;
- double available_sd = _available.sd();
+ const double available = static_cast(_space_info->available());
+ const double available_sd = _available.sd();
if (available_sd > 0) {
- double available_avg = _available.avg();
- z_score = (double(available) - available_avg) / available_sd;
- log_debug(gc, ergo)("Available: %zu %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",
- byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
- z_score,
- byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg),
- byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd));
+ const double available_avg = _available.avg();
+ z_score = (available - available_avg) / available_sd;
+ log_debug(gc, ergo)("Available: " PROPERFMT_F "B, z-score=%.3f. Average available: " PROPERFMT_F "B +/- " PROPERFMT_F "B.",
+ PROPERFMT_F_ARGS(available), z_score, PROPERFMT_F_ARGS(available_avg), PROPERFMT_F_ARGS(available_sd));
}
- _available.add(double(available));
+ _available.add(available);
// In the case when a concurrent GC cycle completes successfully but with an
// unusually small amount of available memory we will adjust our trigger
@@ -352,90 +204,24 @@ void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
// property allows us to adjust the trigger parameters proportionally.
//
// The `100` here is used to attenuate the size of our adjustments. This
- // number was chosen empirically. It also means the adjustments at the end of
- // a concurrent cycle are an order of magnitude smaller than the adjustments
- // made for a degenerated or full GC cycle (which themselves were also
- // chosen empirically).
- adjust_last_trigger_parameters(z_score / -100);
+ // number was chosen empirically.
+ if (_last_trigger == RATE) {
+ adjust_margin_of_error(z_score / -100);
+ }
}
}
void ShenandoahAdaptiveHeuristics::record_degenerated() {
ShenandoahHeuristics::record_degenerated();
add_degenerated_gc_time(_precursor_cycle_start, elapsed_degenerated_cycle_time());
- // Adjust both trigger's parameters in the case of a degenerated GC because
- // either of them should have triggered earlier to avoid this case.
- adjust_margin_of_error(DEGENERATE_PENALTY_SD);
- adjust_spike_threshold(DEGENERATE_PENALTY_SD);
}
-void ShenandoahAdaptiveHeuristics::record_success_full() {
- ShenandoahHeuristics::record_success_full();
- // Adjust both trigger's parameters in the case of a full GC because
- // either of them should have triggered earlier to avoid this case.
- adjust_margin_of_error(FULL_PENALTY_SD);
- adjust_spike_threshold(FULL_PENALTY_SD);
-}
-
-static double saturate(double value, double min, double max) {
- return MAX2(MIN2(value, max), min);
-}
-
-// Rationale:
-// The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of
-// allocations that exceed the average allocation rate. What do these spikes look like?
-//
-// 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly
-// allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec
-// allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike"
-// accommodation to give us enough runway to recalibrate our "average allocation rate".
-//
-// 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means
-// our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us
-// enough runway to recalibrate our "average allocation rate".
-//
-// 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We
-// allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for
-// the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the
-// period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking:
-//
-// a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing
-// the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately.
-// b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations.
-// If there are more than one of these bursts, we can "approximate" that these will be separated by spans of
-// time with very little or no allocations so the "average" allocation rate should be a suitable approximation
-// of how this will behave.
-//
-// For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change
-// in operation mode. We want some way to decide that the average rate has changed, while keeping average
-// allocation rate computation independent.
bool ShenandoahAdaptiveHeuristics::should_start_gc() {
- size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
- size_t available = _space_info->soft_mutator_available();
- size_t allocated = _space_info->bytes_allocated_since_gc_start();
+ const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
+ const size_t available = _space_info->soft_mutator_available();
- double avg_cycle_time = 0;
- double avg_alloc_rate = 0;
- double now = get_most_recent_wake_time();
- size_t allocatable_words = this->allocatable(available);
- double predicted_future_accelerated_gc_time = 0.0;
- size_t allocated_bytes_since_last_sample = 0;
- double instantaneous_rate_words_per_second = 0.0;
- size_t consumption_accelerated = 0;
- double acceleration = 0.0;
- double current_rate_by_acceleration = 0.0;
- size_t min_threshold = min_free_threshold();
- double predicted_future_gc_time = 0;
- double future_planned_gc_time = 0;
- bool future_planned_gc_time_is_average = false;
- bool is_spiking = false;
-
- log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", "
- "allocated_since_gc_start: " PROPERFMT,
- PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated));
-
- // Track allocation rate even if we decide to start a cycle for other reasons.
- double rate = _allocation_rate.sample(allocated);
+ log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT,
+ PROPERFMTARGS(available), PROPERFMTARGS(capacity));
if (_start_gc_is_pending) {
log_trigger("GC start is already pending");
@@ -444,467 +230,199 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() {
_last_trigger = OTHER;
- if (available < min_threshold) {
- log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")",
- PROPERFMTARGS(available), PROPERFMTARGS(min_threshold));
- accept_trigger_with_type(OTHER);
+ if (trigger_min_free_threshold(available, capacity)) {
return true;
}
- // Check if we need to learn a bit about the application
- const size_t max_learn = ShenandoahLearningSteps;
- if (_gc_times_learned < max_learn) {
- size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
- if (available < init_threshold) {
- log_trigger("Learning %zu of %zu. Free (%zu%s) is below initial threshold (%zu%s)",
- _gc_times_learned + 1, max_learn,
- byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
- byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold));
- accept_trigger_with_type(OTHER);
- return true;
- }
- }
-
- // The test (3 * allocated > available) below is intended to prevent triggers from firing so quickly that there
- // has not been sufficient time to create garbage that can be reclaimed during the triggered GC cycle. If we trigger before
- // garbage has been created, the concurrent GC will find no garbage. This has been observed to result in degens which
- // experience OOM during evac or that experience "bad progress", both of which escalate to Full GC. Note that garbage that
- // was allocated following the start of the current GC cycle cannot be reclaimed in this GC cycle. Here is the derivation
- // of the expression:
- //
- // Let R (runway) represent the total amount of memory that can be allocated following the start of GC(N). The runway
- // represents memory available at the start of the current GC plus garbage reclaimed by the current GC. In a balanced,
- // fully utilized configuration, we will be starting each new GC cycle immediately following completion of the preceding
- // GC cycle. In this configuration, we would expect half of R to be consumed during concurrent cycle GC(N) and half
- // to be consumed during concurrent GC(N+1).
- //
- // Assume we want to delay GC trigger until: A/V > 0.33
- // This is equivalent to enforcing that: A > 0.33V
- // which is: 3A > V
- // Since A+V equals R, we have: A + 3A > A + V = R
- // which is to say that: A > R/4
- //
- // Postponing the trigger until at least 1/4 of the runway has been consumed helps to improve the efficiency of the
- // triggered GC. Under heavy steady state workload, this delay condition generally has no effect: if the allocation
- // runway is divided "equally" between the current GC and the next GC, then at any potential trigger point (which cannot
- // happen any sooner than completion of the first GC), it is already the case that roughly A > R/2.
- if (3 * allocated <= available) {
- // Even though we will not issue an adaptive trigger unless a minimum threshold of memory has been allocated,
- // we still allow more generic triggers, such as guaranteed GC intervals, to act.
- return ShenandoahHeuristics::should_start_gc();
- }
-
- avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
- avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
- if ((now - _previous_acceleration_sample_timestamp) >= (ShenandoahAccelerationSamplePeriod / 1000.0)) {
- predicted_future_accelerated_gc_time =
- predict_gc_time(now + MAX2(get_planned_sleep_interval(), ShenandoahAccelerationSamplePeriod / 1000.0));
- double future_accelerated_planned_gc_time;
- bool future_accelerated_planned_gc_time_is_average;
- if (predicted_future_accelerated_gc_time > avg_cycle_time) {
- future_accelerated_planned_gc_time = predicted_future_accelerated_gc_time;
- future_accelerated_planned_gc_time_is_average = false;
- } else {
- future_accelerated_planned_gc_time = avg_cycle_time;
- future_accelerated_planned_gc_time_is_average = true;
- }
- allocated_bytes_since_last_sample = _free_set->get_bytes_allocated_since_previous_sample();
- instantaneous_rate_words_per_second =
- (allocated_bytes_since_last_sample / HeapWordSize) / (now - _previous_acceleration_sample_timestamp);
-
- _previous_acceleration_sample_timestamp = now;
- add_rate_to_acceleration_history(now, instantaneous_rate_words_per_second);
- current_rate_by_acceleration = instantaneous_rate_words_per_second;
- consumption_accelerated =
- accelerated_consumption(acceleration, current_rate_by_acceleration, avg_alloc_rate / HeapWordSize,
- (ShenandoahAccelerationSamplePeriod / 1000.0) + future_accelerated_planned_gc_time);
-
- // Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation
- // rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later,
- // and then twice as big again after another short delay. When a phase change causes many threads to increase their
- // allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience
- // the phase change.
- //
- // The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before
- // introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times
- // 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is
- // still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates.
- //
- // Sample Time (s) Allocation Rate (MB/s) Headroom (GB)
- // 101.807 0.0 26.93
- // <--- accelerated spike can trigger here, around time 101.9s
- // 101.907 477.6 26.85
- // 102.007 3,206.0 26.35
- // 102.108 23,797.8 24.19
- // 102.208 24,164.5 21.83
- // 102.309 23,965.0 19.47
- // 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here
- //
- // Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms
- // period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible
- // result that correlates with the actual measurements.
- //
- // For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase
- // change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period,
- // acceration of allocation can be triggered at approximately time 101.88s.
- //
- // In the default configuration, accelerated allocation rate is detected by examining a sequence of 8 allocation rate samples.
- //
- // Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example, the
- // the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC,
- // especially given the abundance of Headroom at this moment in time.
- //
- // TimeStamp (s) Alloc rate (MB/s)
- // 101.857 0
- // 101.860 0
- // 101.863 0
- // 101.866 0
- // 101.869 53.3
- //
- // At the next sample time, we will compute a slightly higher acceration, 9,150 MB/s/s. This is also insufficient to trigger
- // GC.
- //
- // TimeStamp (s) Alloc rate (MB/s)
- // 101.860 0
- // 101.863 0
- // 101.866 0
- // 101.869 53.3
- // 101.872 110.6
- //
- // Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will
- // trigger GC over 500 ms earlier than was previously possible.
- //
- // TimeStamp (s) Alloc rate (MB/s)
- // 101.866 0
- // 101.869 53.3
- // 101.872 110.6
- // 101.875 165.9
- // 101.878 221.2
- //
- // The accelerated rate heuristic is based on the following idea:
- //
- // Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent
- // sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC
- // cycle? If not, we should trigger right now.
- //
- // Outline of this heuristic triggering technique:
- //
- // 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2
- // 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2)
- // 3. Otherwise, use least squares method to compute best-fit line of rate vs time
- // 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate"
- // 5. Use r2 to rrpresent CurrentRate
- // 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime
- // (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2)
- // 7. if Consumption exceeds headroom, trigger now
- //
- // Though larger sample size may improve quality of predictor, it also delays trigger response. Smaller sample sizes
- // are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 8 and
- // a sample period of roughly 15 ms, spanning approximately 120 ms of execution.
- if (consumption_accelerated > allocatable_words) {
- size_t size_t_alloc_rate = (size_t) current_rate_by_acceleration * HeapWordSize;
- if (acceleration > 0) {
- size_t size_t_acceleration = (size_t) acceleration * HeapWordSize;
- log_trigger("Accelerated consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at "
- "current rate (" PROPERFMT "/s) with acceleration (" PROPERFMT "/s/s) for planned %s GC time (%.2f ms)",
- PROPERFMTARGS(consumption_accelerated * HeapWordSize),
- PROPERFMTARGS(allocatable_words * HeapWordSize),
- PROPERFMTARGS(size_t_alloc_rate),
- PROPERFMTARGS(size_t_acceleration),
- future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)",
- future_accelerated_planned_gc_time * 1000);
- } else {
- log_trigger("Momentary spike consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at "
- "current rate (" PROPERFMT "/s) for planned %s GC time (%.2f ms) (spike threshold = %.2f)",
- PROPERFMTARGS(consumption_accelerated * HeapWordSize),
- PROPERFMTARGS(allocatable_words * HeapWordSize),
- PROPERFMTARGS(size_t_alloc_rate),
- future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)",
- future_accelerated_planned_gc_time * 1000, _spike_threshold_sd);
-
-
- }
- _spike_acceleration_num_samples = 0;
- _spike_acceleration_first_sample_index = 0;
-
- // Count this as a form of RATE trigger for purposes of adjusting heuristic triggering configuration because this
- // trigger is influenced more by margin_of_error_sd than by spike_threshold_sd.
- accept_trigger_with_type(RATE);
- return true;
- }
- }
-
- // Suppose we don't trigger now, but decide to trigger in the next regulator cycle. What will be the GC time then?
- predicted_future_gc_time = predict_gc_time(now + get_planned_sleep_interval());
- if (predicted_future_gc_time > avg_cycle_time) {
- future_planned_gc_time = predicted_future_gc_time;
- future_planned_gc_time_is_average = false;
- } else {
- future_planned_gc_time = avg_cycle_time;
- future_planned_gc_time_is_average = true;
- }
-
- log_debug(gc)("%s: average GC time: %.2f ms, predicted GC time: %.2f ms, allocation rate: %.0f %s/s",
- _space_info->name(), avg_cycle_time * 1000, predicted_future_gc_time * 1000,
- byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate));
- size_t allocatable_bytes = allocatable_words * HeapWordSize;
-
- if (future_planned_gc_time * avg_alloc_rate > allocatable_bytes) {
- log_trigger("%s GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)"
- " to deplete free headroom (%zu%s) (margin of error = %.2f)",
- future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000,
- byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
- byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes),
- _margin_of_error_sd);
-
- size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
- size_t penalties = capacity / 100 * _gc_time_penalties;
- size_t allocation_headroom = available;
- allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
- allocation_headroom -= MIN2(allocation_headroom, penalties);
- log_info(gc, ergo)("Free headroom: " PROPERFMT " (free) - " PROPERFMT "(spike) - " PROPERFMT " (penalties) = " PROPERFMT,
- PROPERFMTARGS(available),
- PROPERFMTARGS(spike_headroom),
- PROPERFMTARGS(penalties),
- PROPERFMTARGS(allocation_headroom));
- accept_trigger_with_type(RATE);
+ if (trigger_learning(available, capacity)) {
return true;
}
- is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
- if (is_spiking && (future_planned_gc_time * rate > allocatable_bytes)) {
- log_trigger("%s GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)"
- " to deplete free headroom (%zu%s) (spike threshold = %.2f)",
- future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000,
- byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
- byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes),
- _spike_threshold_sd);
- accept_trigger_with_type(SPIKE);
+ const double anticipated_gc_start_time = get_most_recent_wake_time() + get_planned_sleep_interval();
+ const double anticipated_gc_duration = _cycles.predict_duration(anticipated_gc_start_time, _margin_of_error_sd);
+ ShenandoahAllocationRate& alloc_rate = ShenandoahHeap::heap()->alloc_rate();
+ const ShenandoahAnticipatedConsumption consumption = alloc_rate.snapshot(anticipated_gc_duration, _margin_of_error_sd);
+ const size_t allocatable_bytes = allocatable(available);
+ maybe_log_rate_trigger_parameters(consumption, allocatable_bytes);
+
+ if (trigger_accelerating_allocation_rate(consumption, allocatable_bytes)) {
return true;
}
+
+ if (trigger_average_allocation_rate(consumption, allocatable_bytes)) {
+ return true;
+ }
+
return ShenandoahHeuristics::should_start_gc();
}
-void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
- switch (_last_trigger) {
- case RATE:
- adjust_margin_of_error(amount);
- break;
- case SPIKE:
- adjust_spike_threshold(amount);
- break;
- case OTHER:
- // nothing to adjust here.
- break;
- default:
- ShouldNotReachHere();
+bool ShenandoahAdaptiveHeuristics::trigger_min_free_threshold(size_t available, size_t capacity) {
+ const size_t min_threshold = min_free_threshold(capacity);
+ if (available < min_threshold) {
+ log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")",
+ PROPERFMTARGS(available), PROPERFMTARGS(min_threshold));
+ accept_trigger_with_type(OTHER);
+ return true;
}
+ return false;
}
-void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
- _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
- log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd);
-}
-
-void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) {
- _spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
- log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd);
-}
-
-size_t ShenandoahAdaptiveHeuristics::min_free_threshold() {
- return ShenandoahHeap::heap()->soft_max_capacity() / 100 * ShenandoahMinFreeThreshold;
-}
-
-// This is called each time a new rate sample has been gathered, as governed by ShenandoahAccelerationSamplePeriod.
-// Unlike traditional calculation of average allocation rate, there is no adjustment for standard deviation of the
-// accelerated rate prediction.
-size_t ShenandoahAdaptiveHeuristics::accelerated_consumption(double& acceleration, double& current_rate,
- double avg_alloc_rate_words_per_second,
- double predicted_cycle_time) const
-{
- double *x_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
- double *y_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
- double x_sum = 0.0;
- double y_sum = 0.0;
-
- assert(_spike_acceleration_num_samples > 0, "At minimum, we should have sample from this period");
-
- double weighted_average_alloc;
- if (_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) {
- double weighted_y_sum = 0;
- double total_weight = 0;
- double previous_x = 0;
- uint delta = _spike_acceleration_num_samples - ShenandoahRateAccelerationSampleSize;
- for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) {
- uint index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size;
- x_array[i] = _spike_acceleration_rate_timestamps[index];
- x_sum += x_array[i];
- y_array[i] = _spike_acceleration_rate_samples[index];
- if (i > 0) {
- // first sample not included in weighted average because it has no weight.
- double sample_weight = x_array[i] - x_array[i-1];
- weighted_y_sum += y_array[i] * sample_weight;
- total_weight += sample_weight;
- }
- y_sum += y_array[i];
- }
- weighted_average_alloc = (total_weight > 0)? weighted_y_sum / total_weight: 0;
- } else {
- weighted_average_alloc = 0;
- }
-
- double momentary_rate;
- if (_spike_acceleration_num_samples > ShenandoahMomentaryAllocationRateSpikeSampleSize) {
- // Num samples must be strictly greater than sample size, because we need one extra sample to compute rate and weights
- // In this context, the weight of a y value (an allocation rate) is the duration for which this allocation rate was
- // active (the time since previous y value was reported). An allocation rate measured over a span of 300 ms (e.g. during
- // concurrent GC) has much more "weight" than an allocation rate measured over a span of 15 s.
- double weighted_y_sum = 0;
- double total_weight = 0;
- double sum_for_average = 0.0;
- uint delta = _spike_acceleration_num_samples - ShenandoahMomentaryAllocationRateSpikeSampleSize;
- for (uint i = 0; i < ShenandoahMomentaryAllocationRateSpikeSampleSize; i++) {
- uint sample_index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size;
- uint preceding_index = (sample_index == 0)? _spike_acceleration_buffer_size - 1: sample_index - 1;
- double sample_weight = (_spike_acceleration_rate_timestamps[sample_index]
- - _spike_acceleration_rate_timestamps[preceding_index]);
- weighted_y_sum += _spike_acceleration_rate_samples[sample_index] * sample_weight;
- total_weight += sample_weight;
- }
- momentary_rate = weighted_y_sum / total_weight;
- bool is_spiking = _allocation_rate.is_spiking(momentary_rate, _spike_threshold_sd);
- if (!is_spiking) {
- // Disable momentary spike trigger unless allocation rate delta from average exceeds sd
- momentary_rate = 0.0;
- }
- } else {
- momentary_rate = 0.0;
- }
-
- // By default, use momentary_rate for current rate and zero acceleration. Overwrite iff best-fit line has positive slope.
- current_rate = momentary_rate;
- acceleration = 0.0;
- if ((_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize)
- && (weighted_average_alloc >= avg_alloc_rate_words_per_second)) {
- // If the average rate across the acceleration samples is below the overall average, this sample is not eligible to
- // represent acceleration of allocation rate. We may just be catching up with allocations after a lull.
-
- double *xy_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
- double *x2_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
- double xy_sum = 0.0;
- double x2_sum = 0.0;
- for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) {
- xy_array[i] = x_array[i] * y_array[i];
- xy_sum += xy_array[i];
- x2_array[i] = x_array[i] * x_array[i];
- x2_sum += x2_array[i];
- }
- // Find the best-fit least-squares linear representation of rate vs time
- double m; /* slope */
- double b; /* y-intercept */
-
- m = ((ShenandoahRateAccelerationSampleSize * xy_sum - x_sum * y_sum)
- / (ShenandoahRateAccelerationSampleSize * x2_sum - x_sum * x_sum));
- b = (y_sum - m * x_sum) / ShenandoahRateAccelerationSampleSize;
-
- if (m > 0) {
- double proposed_current_rate = m * x_array[ShenandoahRateAccelerationSampleSize - 1] + b;
- acceleration = m;
- current_rate = proposed_current_rate;
- }
- // else, leave current_rate = momentary_rate, acceleration = 0
- }
- // and here also, leave current_rate = momentary_rate, acceleration = 0
-
- double time_delta = get_planned_sleep_interval() + predicted_cycle_time;
- size_t words_to_be_consumed = (size_t) (current_rate * time_delta + 0.5 * acceleration * time_delta * time_delta);
- return words_to_be_consumed;
-}
-
-ShenandoahAllocationRate::ShenandoahAllocationRate() :
- _last_sample_time(os::elapsedTime()),
- _last_sample_value(0),
- _interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz),
- _rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor),
- _rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) {
-}
-
-double ShenandoahAllocationRate::force_sample(size_t allocated, size_t &unaccounted_bytes_allocated) {
- const double MinSampleTime = 0.002; // Do not sample if time since last update is less than 2 ms
- double now = os::elapsedTime();
- double time_since_last_update = now - _last_sample_time;
- double rate = 0.0;
- if (time_since_last_update < MinSampleTime) {
- // If we choose not to sample right now, the unaccounted_bytes_allocated will be added
- // into the next sample taken. These unaccounted_bytes_allocated will be added to
- // any additional bytes that are allocated during this GC cycle at the time the rate is
- // next sampled. We do not overwrite _last_sample_time on this path, because the
- // unaccounted_bytes_allocated were allocated following _last_sample_time.
- unaccounted_bytes_allocated = allocated - _last_sample_value;
- } else {
- rate = instantaneous_rate(now, allocated);
- _rate.add(rate);
- _rate_avg.add(_rate.avg());
- _last_sample_time = now;
- unaccounted_bytes_allocated = 0;
- }
- // force_sample() is called when resetting bytes allocated since gc start. All subsequent
- // requests to sample allocated bytes during this GC cycle are measured as a delta from
- // _last_sample_value. In the case that we choose not to sample now, we will count the
- // unaccounted_bytes_allocated as if they were allocated following the start of this GC
- // cycle (but the time span over which these bytes were allocated begins at
- // _last_sample_time, which we do not overwrite).
- _last_sample_value = 0;
- return rate;
-}
-
-double ShenandoahAllocationRate::sample(size_t allocated) {
- double now = os::elapsedTime();
- double rate = 0.0;
- if (now - _last_sample_time > _interval_sec) {
- rate = instantaneous_rate(now, allocated);
- _rate.add(rate);
- _rate_avg.add(_rate.avg());
- _last_sample_time = now;
- _last_sample_value = allocated;
- }
- return rate;
-}
-
-double ShenandoahAllocationRate::upper_bound(double sds) const {
- // Here we are using the standard deviation of the computed running
- // average, rather than the standard deviation of the samples that went
- // into the moving average. This is a much more stable value and is tied
- // to the actual statistic in use (moving average over samples of averages).
- return _rate.davg() + (sds * _rate_avg.dsd());
-}
-
-void ShenandoahAllocationRate::allocation_counter_reset() {
- _last_sample_time = os::elapsedTime();
- _last_sample_value = 0;
-}
-
-bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const {
- if (rate <= 0.0) {
- return false;
- }
-
- double sd = _rate.sd();
- if (sd > 0) {
- // There is a small chance that that rate has already been sampled, but it seems not to matter in practice.
- // Note that z_score reports how close the rate is to the average. A value between -1 and 1 means we are within one
- // standard deviation. A value between -3 and +3 means we are within 3 standard deviations. We only check for z_score
- // greater than threshold because we are looking for an allocation spike which is greater than the mean.
- double z_score = (rate - _rate.avg()) / sd;
- if (z_score > threshold) {
+bool ShenandoahAdaptiveHeuristics::trigger_learning(size_t available, size_t capacity) {
+ // Check if we need to learn a bit about the application
+ if (_gc_times_learned < ShenandoahLearningSteps) {
+ const size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
+ if (available < init_threshold) {
+ log_trigger("Learning %zu of %zu. Free (" PROPERFMT ") is below initial threshold (" PROPERFMT ")",
+ _gc_times_learned + 1, ShenandoahLearningSteps, PROPERFMTARGS(available), PROPERFMTARGS(init_threshold));
+ accept_trigger_with_type(OTHER);
return true;
}
}
return false;
}
-double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const {
- assert(allocated >= _last_sample_value, "Must be");
- assert(time > _last_sample_time, "Must be");
- return (allocated - _last_sample_value) / (time - _last_sample_time);
+bool ShenandoahAdaptiveHeuristics::trigger_average_allocation_rate(const ShenandoahAnticipatedConsumption& rate, const size_t allocatable_bytes) {
+ if (rate.baseline_consumption() > allocatable_bytes) {
+ log_trigger("Anticipated GC duration (%.2f ms) is above the time for average allocation rate (" PROPERFMT_F "/s)"
+ " to deplete free headroom (" PROPERFMT ") (margin of error = %.2f)",
+ rate.duration_seconds() * 1000,
+ PROPERFMT_F_ARGS(rate.baseline_rate()), PROPERFMTARGS(allocatable_bytes), _margin_of_error_sd);
+ accept_trigger_with_type(RATE);
+ return true;
+ }
+ return false;
}
+
+// Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation
+// rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later,
+// and then twice as big again after another short delay. When a phase change causes many threads to increase their
+// allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience
+// the phase change.
+//
+// The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before
+// introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times
+// 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is
+// still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates.
+//
+// Sample Time (s) Allocation Rate (MB/s) Headroom (GB)
+// 101.807 0.0 26.93
+// <--- accelerated spike can trigger here, around time 101.9s
+// 101.907 477.6 26.85
+// 102.007 3,206.0 26.35
+// 102.108 23,797.8 24.19
+// 102.208 24,164.5 21.83
+// 102.309 23,965.0 19.47
+// 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here
+//
+// Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms
+// period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible
+// result that correlates with the actual measurements.
+//
+// For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase
+// change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period,
+// acceleration of allocation can be triggered at approximately time 101.88s.
+//
+// In the default configuration, accelerated allocation rate is detected by examining a sequence of 8 allocation rate samples.
+//
+// Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example,
+// the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC,
+// especially given the abundance of Headroom at this moment in time.
+//
+// TimeStamp (s) Alloc rate (MB/s)
+// 101.857 0
+// 101.860 0
+// 101.863 0
+// 101.866 0
+// 101.869 53.3
+//
+// At the next sample time, we will compute a slightly higher acceleration, 9,150 MB/s/s. This is also insufficient to trigger
+// GC.
+//
+// TimeStamp (s) Alloc rate (MB/s)
+// 101.860 0
+// 101.863 0
+// 101.866 0
+// 101.869 53.3
+// 101.872 110.6
+//
+// Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will
+// trigger GC over 500 ms earlier than was previously possible.
+//
+// TimeStamp (s) Alloc rate (MB/s)
+// 101.866 0
+// 101.869 53.3
+// 101.872 110.6
+// 101.875 165.9
+// 101.878 221.2
+//
+// The accelerated rate heuristic is based on the following idea:
+//
+// Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent
+// sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC
+// cycle? If not, we should trigger right now.
+//
+// Outline of this heuristic triggering technique:
+//
+// 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2
+// 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2)
+// 3. Otherwise, use least squares method to compute best-fit line of rate vs time
+// 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate"
+// 5. Use r2 to represent CurrentRate
+// 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime
+// (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2)
+// 7. if Consumption exceeds headroom, trigger now
+//
+// Though larger sample size may improve quality of predictor, it also delays trigger response. Smaller sample sizes
+// are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 8 and
+// a sample period of roughly 15 ms, spanning approximately 120 ms of execution.
+bool ShenandoahAdaptiveHeuristics::trigger_accelerating_allocation_rate(const ShenandoahAnticipatedConsumption& rate, const size_t allocatable_bytes) {
+ if (rate.momentary_consumption() > allocatable_bytes) {
+ assert(rate.accelerated_consumption() == 0, "Momentary trigger is meant to exclude acceleration trigger");
+ log_trigger("Momentary spike consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at "
+ "current rate (" PROPERFMT_F "/s) for anticipated GC duration (%.2f ms)",
+ PROPERFMTARGS(rate.momentary_consumption()), PROPERFMTARGS(allocatable_bytes),
+ PROPERFMT_F_ARGS(rate.momentary_rate()), rate.duration_seconds() * 1000);
+ accept_trigger_with_type(RATE);
+ return true;
+ }
+
+ if (rate.accelerated_consumption() > allocatable_bytes) {
+ assert(rate.momentary_consumption() == 0, "Acceleration trigger is meant to exclude momentary trigger");
+ log_trigger("Accelerated consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at "
+ "current rate (" PROPERFMT_F "/s) with acceleration (" PROPERFMT_F "/s/s) for anticipated GC duration (%.2f ms)",
+ PROPERFMTARGS(rate.accelerated_consumption()), PROPERFMTARGS(allocatable_bytes),
+ PROPERFMT_F_ARGS(rate.predicted_rate()), PROPERFMT_F_ARGS(rate.acceleration()), rate.duration_seconds() * 1000);
+ accept_trigger_with_type(RATE);
+ return true;
+ }
+
+ return false;
+}
+
+void ShenandoahAdaptiveHeuristics::maybe_log_rate_trigger_parameters(const ShenandoahAnticipatedConsumption &consumption,
+ size_t allocatable_bytes) const {
+ if (log_is_enabled(Debug, gc, sampling)) {
+ log_debug(gc, sampling)(
+ "%s: Anticipated cycle duration: %.3fs, head room: " PROPERFMT ", margin of error: %.3f "
+ "Baseline consumption: " PROPERFMT ", Baseline rate: " PROPERFMT_F "/s, "
+ "Momentary consumption: " PROPERFMT ", Momentary rate: " PROPERFMT_F "/s, "
+ "Accelerated consumption: " PROPERFMT ", Predicted rate: " PROPERFMT_F "/s, Acceleration: %.3f",
+ _space_info->name(), consumption.duration_seconds(), PROPERFMTARGS(allocatable_bytes), _margin_of_error_sd,
+ PROPERFMTARGS(consumption.baseline_consumption()), PROPERFMT_F_ARGS(consumption.baseline_rate()),
+ PROPERFMTARGS(consumption.momentary_consumption()), PROPERFMT_F_ARGS(consumption.momentary_rate()),
+ PROPERFMTARGS(consumption.accelerated_consumption()), PROPERFMT_F_ARGS(consumption.predicted_rate()), consumption.acceleration()
+ );
+ }
+}
+
+void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
+ _margin_of_error_sd = clamp(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
+ log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd);
+}
+
+size_t ShenandoahAdaptiveHeuristics::min_free_threshold(size_t capacity) const {
+ return capacity / 100 * ShenandoahMinFreeThreshold;
+}
+
+#undef PROPERFMT_F
+#undef PROPERFMT_F_ARGS
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp
index c761f2a82f3..75eb3a7facb 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -27,71 +27,11 @@
#define SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
-#include "gc/shenandoah/shenandoahFreeSet.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.hpp"
+#include "gc/shenandoah/shenandoahCycleDuration.hpp"
#include "gc/shenandoah/shenandoahPhaseTimings.hpp"
-#include "gc/shenandoah/shenandoahRegulatorThread.hpp"
-#include "gc/shenandoah/shenandoahSharedVariables.hpp"
-#include "memory/allocation.hpp"
#include "utilities/numberSeq.hpp"
-/**
- * ShenandoahAllocationRate maintains a truncated history of recently sampled allocation rates for the purpose of providing
- * informed estimates of current and future allocation rates based on weighted averages and standard deviations of the
- * truncated history. More recently sampled allocations are weighted more heavily than older samples when computing
- * averages and standard deviations.
- */
-class ShenandoahAllocationRate : public CHeapObj {
- public:
- explicit ShenandoahAllocationRate();
-
- // Reset the _last_sample_value to zero, _last_sample_time to current time.
- void allocation_counter_reset();
-
- // Force an allocation rate sample to be taken, even if the time since last sample is not greater than
- // 1s/ShenandoahAdaptiveSampleFrequencyHz, except when current_time - _last_sample_time < MinSampleTime (2 ms).
- // The sampled allocation rate is computed from (allocated - _last_sample_value) / (current_time - _last_sample_time).
- // Return the newly computed rate if the sample is taken, zero if it is not an appropriate time to add a sample.
- // In the case that a new sample is not taken, overwrite unaccounted_bytes_allocated with bytes allocated since
- // the previous sample was taken (allocated - _last_sample_value). Otherwise, overwrite unaccounted_bytes_allocated
- // with 0.
- double force_sample(size_t allocated, size_t &unaccounted_bytes_allocated);
-
- // Add an allocation rate sample if the time since last sample is greater than 1s/ShenandoahAdaptiveSampleFrequencyHz.
- // The sampled allocation rate is computed from (allocated - _last_sample_value) / (current_time - _last_sample_time).
- // Return the newly computed rate if the sample is taken, zero if it is not an appropriate time to add a sample.
- double sample(size_t allocated);
-
- // Return an estimate of the upper bound on allocation rate, with the upper bound computed as the weighted average
- // of recently sampled instantaneous allocation rates added to sds times the standard deviation computed for the
- // sequence of recently sampled average allocation rates.
- double upper_bound(double sds) const;
-
- // Test whether rate significantly diverges from the computed average allocation rate. If so, return true.
- // Otherwise, return false. Significant divergence is recognized if (rate - _rate.avg()) / _rate.sd() > threshold.
- bool is_spiking(double rate, double threshold) const;
-
- private:
-
- // Return the instantaneous rate calculated from (allocated - _last_sample_value) / (time - _last_sample_time).
- // Return Sentinel value 0.0 if (time - _last_sample_time) == 0 or if (allocated <= _last_sample_value).
- double instantaneous_rate(double time, size_t allocated) const;
-
- // Time at which previous allocation rate sample was collected.
- double _last_sample_time;
-
- // Bytes allocated as of the time at which previous allocation rate sample was collected.
- size_t _last_sample_value;
-
- // The desired interval of time between consecutive samples of the allocation rate.
- double _interval_sec;
-
- // Holds a sequence of the most recently sampled instantaneous allocation rates
- TruncatedSeq _rate;
-
- // Holds a sequence of the most recently computed weighted average of allocation rates, with each weighted average
- // computed immediately after an instantaneous rate was sampled
- TruncatedSeq _rate_avg;
-};
/*
* The adaptive heuristic tracks the allocation behavior and average cycle
@@ -106,38 +46,18 @@ class ShenandoahAllocationRate : public CHeapObj {
*/
class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {
public:
- ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info);
+ explicit ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info);
- virtual ~ShenandoahAdaptiveHeuristics();
+ void initialize() override;
- virtual void initialize() override;
-
- virtual void post_initialize() override;
-
- virtual void adjust_penalty(intx step) override;
+ void post_initialize() override;
// At the end of GC(N), we idle GC until necessary to start the next GC. Compute the threshold of memory that can be allocated
// before we need to start the next GC.
void start_idle_span() override;
- // Having observed a new allocation rate sample, add this to the acceleration history so that we can determine if allocation
- // rate is accelerating.
- void add_rate_to_acceleration_history(double timestamp, double rate);
-
- // Compute and return the current allocation rate, the current rate of acceleration, and the amount of memory that we expect
- // to consume if we start GC right now and gc takes predicted_cycle_time to complete.
- size_t accelerated_consumption(double& acceleration, double& current_rate,
- double avg_rate_words_per_sec, double predicted_cycle_time) const;
-
-
- void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
- RegionData* data, size_t size,
- size_t actual_free) override;
-
- void record_cycle_start() override;
void record_success_concurrent() override;
void record_degenerated() override;
- void record_success_full() override;
bool should_start_gc() override;
@@ -145,47 +65,33 @@ public:
bool is_diagnostic() override { return false; }
bool is_experimental() override { return false; }
+ // In preparation for a span during which GC will be idle, compute the headroom adjustment that will be used to
+ // detect when GC needs to trigger.
+ void compute_headroom_adjustment() override;
+
private:
- // These are used to adjust the margin of error and the spike threshold
- // in response to GC cycle outcomes. These values are shared, but the
- // margin of error and spike threshold trend in opposite directions.
- const static double FULL_PENALTY_SD;
- const static double DEGENERATE_PENALTY_SD;
-
- const static double MINIMUM_CONFIDENCE;
- const static double MAXIMUM_CONFIDENCE;
-
- const static double LOWEST_EXPECTED_AVAILABLE_AT_END;
- const static double HIGHEST_EXPECTED_AVAILABLE_AT_END;
-
- const static size_t GC_TIME_SAMPLE_SIZE;
-
- friend class ShenandoahAllocationRate;
-
- // Used to record the last trigger that signaled to start a GC.
- // This itself is used to decide whether or not to adjust the margin of
- // error for the average cycle time and allocation rate or the allocation
- // spike detection threshold.
- enum Trigger {
- SPIKE, RATE, OTHER
- };
-
- void adjust_last_trigger_parameters(double amount);
void adjust_margin_of_error(double amount);
- void adjust_spike_threshold(double amount);
- // Returns number of words that can be allocated before we need to trigger next GC, given available in bytes.
- inline size_t allocatable(size_t available) const {
- return (available > _headroom_adjustment)? (available - _headroom_adjustment) / HeapWordSize: 0;
+ // Returns number of bytes that can be allocated before we need to trigger next GC, given available in bytes.
+ size_t allocatable(size_t available) const {
+ return available > _headroom_adjustment ? available - _headroom_adjustment : 0;
}
protected:
- ShenandoahAllocationRate _allocation_rate;
+ void adjust_penalty(intx step) override;
+ void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
+ RegionData* data, size_t size,
+ size_t actual_free) override;
- // Invocations of should_start_gc() happen approximately once per ms. Queries of allocation rate only happen if a
- // a certain amount of time has passed since the previous query.
- size_t _allocated_at_previous_query;
- double _time_of_previous_allocation_query;
+
+ ShenandoahCycleDuration _cycles;
+
+ // Used to record the last trigger that signaled to start a GC.
+ // This itself is used to decide whether to adjust the margin of
+ // error for the average cycle time.
+ enum Trigger {
+ RATE, OTHER
+ };
// The margin of error expressed in standard deviations to add to our
// average cycle time and allocation rate. As this value increases we
@@ -194,18 +100,9 @@ protected:
// concurrent GCs.
double _margin_of_error_sd;
- // The allocation spike threshold is expressed in standard deviations.
- // If the standard deviation of the most recent sample of the allocation
- // rate exceeds this threshold, a GC cycle is started. As this value
- // decreases the sensitivity to allocation spikes increases. In other
- // words, lowering the spike threshold will tend to increase the number
- // of concurrent GCs.
- double _spike_threshold_sd;
-
// Remember which trigger is responsible for the last GC cycle. When the
// outcome of the cycle is evaluated we will adjust the parameters for the
- // corresponding triggers. Note that successful outcomes will raise
- // the spike threshold and lower the margin of error.
+ // corresponding triggers.
Trigger _last_trigger;
// Keep track of the available memory at the end of a GC cycle. This
@@ -213,67 +110,29 @@ protected:
// source of feedback to adjust trigger parameters.
TruncatedSeq _available;
- ShenandoahFreeSet* _free_set;
-
- // This represents the time at which the allocation rate was most recently sampled for the purpose of detecting acceleration.
- double _previous_acceleration_sample_timestamp;
- size_t _total_allocations_at_start_of_idle;
-
// bytes of headroom at which we should trigger GC
size_t _headroom_adjustment;
- // Keep track of GC_TIME_SAMPLE_SIZE most recent concurrent GC cycle times
- uint _gc_time_first_sample_index;
- uint _gc_time_num_samples;
- double* const _gc_time_timestamps;
- double* const _gc_time_samples;
- double* const _gc_time_xy; // timestamp * sample
- double* const _gc_time_xx; // timestamp squared
- double _gc_time_sum_of_timestamps;
- double _gc_time_sum_of_samples;
- double _gc_time_sum_of_xy;
- double _gc_time_sum_of_xx;
-
- double _gc_time_m; // slope
- double _gc_time_b; // y-intercept
- double _gc_time_sd; // sd on deviance from prediction
-
- // In preparation for a span during which GC will be idle, compute the headroom adjustment that will be used to
- // detect when GC needs to trigger.
- void compute_headroom_adjustment() override;
-
- void add_gc_time(double timestamp_at_start, double duration);
void add_degenerated_gc_time(double timestamp_at_start, double duration);
- double predict_gc_time(double timestamp_at_start);
-
- // Keep track of SPIKE_ACCELERATION_SAMPLE_SIZE most recent spike allocation rate measurements. Note that it is
- // typical to experience a small spike following end of GC cycle, as mutator threads refresh their TLABs. But
- // there is generally an abundance of memory at this time as well, so this will not generally trigger GC.
- uint _spike_acceleration_buffer_size;
- uint _spike_acceleration_first_sample_index;
- uint _spike_acceleration_num_samples;
- double* const _spike_acceleration_rate_samples; // holds rates in words/second
- double* const _spike_acceleration_rate_timestamps;
// A conservative minimum threshold of free space that we'll try to maintain when possible.
// For example, we might trigger a concurrent gc if we are likely to drop below
// this threshold, or we might consider this when dynamically resizing generations
// in the generational case. Controlled by global flag ShenandoahMinFreeThreshold.
- size_t min_free_threshold();
+ size_t min_free_threshold(size_t capacity) const;
void accept_trigger_with_type(Trigger trigger_type) {
_last_trigger = trigger_type;
- ShenandoahHeuristics::accept_trigger();
+ accept_trigger();
}
-public:
- // Sample the allocation rate at GC trigger time if possible. Return the number of allocated bytes that were
- // not accounted for in the sample. This must be called before resetting bytes allocated since gc start.
- size_t force_alloc_rate_sample(size_t bytes_allocated) override {
- size_t unaccounted_bytes;
- _allocation_rate.force_sample(bytes_allocated, unaccounted_bytes);
- return unaccounted_bytes;
- }
+ bool trigger_min_free_threshold(size_t available, size_t capacity);
+ bool trigger_learning(size_t available, size_t capacity);
+ bool trigger_average_allocation_rate(const ShenandoahAnticipatedConsumption& rate, size_t allocatable_bytes);
+ bool trigger_accelerating_allocation_rate(const ShenandoahAnticipatedConsumption& rate, size_t allocatable_bytes);
+
+private:
+ void maybe_log_rate_trigger_parameters(const ShenandoahAnticipatedConsumption & consumption, size_t allocatable_bytes) const;
};
#endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp
index 28673b28612..26a2363d4d5 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp
@@ -23,16 +23,17 @@
*
*/
-
#include "gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp"
#include "gc/shenandoah/shenandoahCollectionSet.hpp"
#include "gc/shenandoah/shenandoahHeap.inline.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
+#include "gc/shenandoah/shenandoahUtils.hpp"
#include "logging/log.hpp"
#include "logging/logTag.hpp"
ShenandoahCompactHeuristics::ShenandoahCompactHeuristics(ShenandoahSpaceInfo* space_info) :
- ShenandoahHeuristics(space_info) {
+ ShenandoahHeuristics(space_info),
+ _bytes_used_at_end_of_gc(0) {
SHENANDOAH_ERGO_ENABLE_FLAG(ExplicitGCInvokesConcurrent);
SHENANDOAH_ERGO_ENABLE_FLAG(ShenandoahImplicitGCInvokesConcurrent);
SHENANDOAH_ERGO_ENABLE_FLAG(ShenandoahUncommit);
@@ -46,29 +47,28 @@ ShenandoahCompactHeuristics::ShenandoahCompactHeuristics(ShenandoahSpaceInfo* sp
}
bool ShenandoahCompactHeuristics::should_start_gc() {
- size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
- size_t available = _space_info->soft_mutator_available();
- size_t bytes_allocated = _space_info->bytes_allocated_since_gc_start();
+ const size_t used = _space_info->used();
+ const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
+ const size_t available = _space_info->soft_mutator_available();
+ const size_t bytes_allocated = used > _bytes_used_at_end_of_gc ? used - _bytes_used_at_end_of_gc : 0;
log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", "
"allocated_since_gc_start: " PROPERFMT,
PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(bytes_allocated));
- size_t threshold_bytes_allocated = capacity / 100 * ShenandoahAllocationThreshold;
- size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold;
+ const size_t threshold_bytes_allocated = capacity / 100 * ShenandoahAllocationThreshold;
+ const size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold;
if (available < min_threshold) {
- log_trigger("Free (Soft) (%zu%s) is below minimum threshold (%zu%s)",
- byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
- byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold));
+ log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")",
+ PROPERFMTARGS(available), PROPERFMTARGS(min_threshold));
accept_trigger();
return true;
}
if (bytes_allocated > threshold_bytes_allocated) {
- log_trigger("Allocated since last cycle (%zu%s) is larger than allocation threshold (%zu%s)",
- byte_size_in_proper_unit(bytes_allocated), proper_unit_for_byte_size(bytes_allocated),
- byte_size_in_proper_unit(threshold_bytes_allocated), proper_unit_for_byte_size(threshold_bytes_allocated));
+ log_trigger("Allocated since last cycle started (" PROPERFMT ") is larger than allocation threshold (" PROPERFMT ")",
+ PROPERFMTARGS(bytes_allocated), PROPERFMTARGS(threshold_bytes_allocated));
accept_trigger();
return true;
}
@@ -76,22 +76,25 @@ bool ShenandoahCompactHeuristics::should_start_gc() {
return ShenandoahHeuristics::should_start_gc();
}
+void ShenandoahCompactHeuristics::record_cycle_end() {
+ ShenandoahHeuristics::record_cycle_end();
+ _bytes_used_at_end_of_gc = _space_info->used();
+}
+
void ShenandoahCompactHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
RegionData* data, size_t size,
size_t actual_free) {
// Do not select too large CSet that would overflow the available free space
- size_t max_cset = actual_free * 3 / 4;
+ const size_t max_cset = actual_free * 3 / 4;
- log_info(gc, ergo)("CSet Selection. Actual Free: %zu%s, Max CSet: %zu%s",
- byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free),
- byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset));
-
- size_t threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;
+ log_info(gc, ergo)("CSet Selection. Actual Free: " PROPERFMT ", Max CSet: " PROPERFMT,
+ PROPERFMTARGS(actual_free), PROPERFMTARGS(max_cset));
+ const size_t threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;
size_t live_cset = 0;
for (size_t idx = 0; idx < size; idx++) {
ShenandoahHeapRegion* r = data[idx].get_region();
- size_t new_cset = live_cset + r->get_live_data_bytes();
+ const size_t new_cset = live_cset + r->get_live_data_bytes();
if (new_cset < max_cset && r->garbage() > threshold) {
live_cset = new_cset;
cset->add_region(r);
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp
index a32c9c88478..3670cdec9f0 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,14 +37,19 @@ public:
explicit ShenandoahCompactHeuristics(ShenandoahSpaceInfo* space_info);
bool should_start_gc() override;
+ const char* name() override { return "Compact"; }
+ bool is_diagnostic() override { return false; }
+ bool is_experimental() override { return false; }
+ void record_cycle_end() override;
+
+protected:
void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
RegionData* data, size_t size,
size_t actual_free) override;
- const char* name() override { return "Compact"; }
- bool is_diagnostic() override { return false; }
- bool is_experimental() override { return false; }
+private:
+ size_t _bytes_used_at_end_of_gc;
};
#endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHCOMPACTHEURISTICS_HPP
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
index 840459288c3..ca4dfc71c61 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
@@ -1,6 +1,6 @@
/*
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
*/
#include "gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.inline.hpp"
#include "gc/shenandoah/shenandoahCollectionSet.hpp"
#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
#include "gc/shenandoah/shenandoahGeneration.hpp"
@@ -32,6 +33,7 @@
#include "gc/shenandoah/shenandoahInPlacePromoter.hpp"
#include "gc/shenandoah/shenandoahOldGeneration.hpp"
#include "gc/shenandoah/shenandoahTrace.hpp"
+#include "gc/shenandoah/shenandoahUtils.hpp"
#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
#include "logging/log.hpp"
#include "utilities/quickSort.hpp"
@@ -48,10 +50,16 @@ static int compare_by_aged_live(AgedRegionData a, AgedRegionData b) {
void ShenandoahGenerationalHeuristics::post_initialize() {
ShenandoahHeuristics::post_initialize();
- _free_set = ShenandoahHeap::heap()->free_set();
compute_headroom_adjustment();
}
+void ShenandoahGenerationalHeuristics::record_cycle_end() {
+ ShenandoahAdaptiveHeuristics::record_cycle_end();
+
+ ShenandoahAllocationRate& alloc_rate = ShenandoahHeap::heap()->alloc_rate();
+ alloc_rate.update_minimum_sample_size(_space_info->soft_mutator_available());
+}
+
inline void assert_no_in_place_promotions() {
#ifdef ASSERT
class ShenandoahNoInPlacePromotions : public ShenandoahHeapRegionClosure {
@@ -350,7 +358,7 @@ size_t ShenandoahGenerationalHeuristics::select_aged_regions(ShenandoahInPlacePr
// Having chosen the collection set, adjust the budgets for generational mode based on its composition. Note
// that young_generation->available() now knows about recently discovered immediate garbage.
-void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap* const heap,
+void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahGenerationalHeap* const heap,
ShenandoahCollectionSet* const collection_set) {
shenandoah_assert_generational();
// We may find that old_evacuation_reserve and/or loaned_for_young_evacuation are not fully consumed, in which case we may
@@ -373,7 +381,7 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap*
ShenandoahYoungGeneration* const young_generation = heap->young_generation();
const size_t old_evacuated = collection_set->get_live_bytes_in_old_regions();
- size_t old_evacuated_committed = (size_t) (ShenandoahOldEvacWaste * double(old_evacuated));
+ size_t old_evacuated_committed = shenandoah_safe_size_cast(ShenandoahOldEvacWaste * static_cast(old_evacuated));
size_t old_evacuation_reserve = old_generation->get_evacuation_reserve();
if (old_evacuated_committed > old_evacuation_reserve) {
@@ -391,11 +399,11 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap*
old_generation->set_evacuation_reserve(old_evacuation_reserve);
}
- size_t young_advance_promoted = collection_set->get_live_bytes_in_tenurable_regions();
- size_t young_advance_promoted_reserve_used = (size_t) (ShenandoahPromoEvacWaste * double(young_advance_promoted));
+ const double young_advance_promoted = collection_set->get_live_bytes_in_tenurable_regions();
+ size_t young_advance_promoted_reserve_used = shenandoah_safe_size_cast(ShenandoahPromoEvacWaste * young_advance_promoted);
- size_t young_evacuated = collection_set->get_live_bytes_in_untenurable_regions();
- size_t young_evacuated_reserve_used = (size_t) (ShenandoahEvacWaste * double(young_evacuated));
+ const double young_evacuated = collection_set->get_live_bytes_in_untenurable_regions();
+ const size_t young_evacuated_reserve_used = shenandoah_safe_size_cast(ShenandoahEvacWaste * young_evacuated);
// In top_off_collection_set(), we shrunk planned future reserve by _add_regions_to_old * region_size_bytes, but we
// didn't shrink available. The current reserve is not affected by the planned future reserve. Current available is
@@ -473,6 +481,16 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap*
if (add_regions_to_young > 0) {
assert(excess_old >= add_regions_to_young * region_size_bytes, "Cannot xfer more than excess old");
+ if (heap->age_census()->is_always_tenure()) {
+ // Cap excess_old at one min-PLAB per worker so this much stays in old's promotion reserve
+ // instead of being transferred to young.
+ const size_t min_plab_total = heap->plab_min_size() * HeapWordSize * heap->workers()->max_workers();
+ if (excess_old > min_plab_total) {
+ excess_old = min_plab_total;
+ // Avoid underflowing excess_old when we subtract below.
+ add_regions_to_young = 0;
+ }
+ }
excess_old -= add_regions_to_young * region_size_bytes;
log_debug(gc, ergo)("Before start of evacuation, total_promotion reserve is young_advance_promoted_reserve: %zu "
"plus excess: old: %zu", young_advance_promoted_reserve_used, excess_old);
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
index 8ea5cdb36c8..1860e3d4c0f 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
@@ -55,6 +55,9 @@ public:
void post_initialize() override;
+ void record_cycle_end() override;
+
+protected:
// Wraps budget computation, subclass region selection, budget adjustment, and tracing.
void choose_collection_set_from_regiondata(ShenandoahCollectionSet* set,
RegionData* data, size_t data_size,
@@ -89,7 +92,7 @@ private:
// Adjust evacuation budgets after choosing collection set. On entry, the instance variable _regions_to_xfer
// represents regions to be transferred to old based on decisions made in top_off_collection_set()
- void adjust_evacuation_budgets(ShenandoahHeap* const heap,
+ void adjust_evacuation_budgets(ShenandoahGenerationalHeap* const heap,
ShenandoahCollectionSet* const collection_set);
protected:
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
index 9452e8b28cb..d9f3bdee828 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
@@ -25,10 +25,11 @@
#include "gc/shenandoah/heuristics/shenandoahGlobalHeuristics.hpp"
#include "gc/shenandoah/shenandoahAsserts.hpp"
-#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
#include "gc/shenandoah/shenandoahGenerationalHeap.inline.hpp"
#include "gc/shenandoah/shenandoahGlobalGeneration.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
+#include "gc/shenandoah/shenandoahUtils.hpp"
+#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
#include "utilities/quickSort.hpp"
bool ShenandoahEvacuationBudget::try_reserve(size_t bytes) {
@@ -178,6 +179,12 @@ void ShenandoahGlobalHeuristics::choose_global_collection_set(ShenandoahCollecti
size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + original_young_evac_reserve;
size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
+ // Admit every region with any garbage so every live object gets a chance to be promoted.
+ if (heap->age_census()->is_always_tenure()) {
+ ignore_threshold = 0;
+ min_garbage = SIZE_MAX;
+ }
+
ShenandoahGlobalCSetBudget budget(region_size_bytes,
shared_reserve_regions * region_size_bytes,
garbage_threshold, ignore_threshold, min_garbage,
@@ -248,17 +255,17 @@ void ShenandoahGlobalCSetBudget::assert_budget_constraints_hold(size_t original_
assert(young_evac.live_bytes() * young_evac.waste_factor() <=
young_evac.reserve() + young_evac.region_count(),
"Young evac consumption (%zu) exceeds reserve (%zu) + region count (%zu)",
- (size_t)(young_evac.live_bytes() * young_evac.waste_factor()),
+ shenandoah_safe_size_cast(young_evac.live_bytes() * young_evac.waste_factor()),
young_evac.reserve(), young_evac.region_count());
assert(old_evac.live_bytes() * old_evac.waste_factor() <=
old_evac.reserve() + old_evac.region_count(),
"Old evac consumption (%zu) exceeds reserve (%zu) + region count (%zu)",
- (size_t)(old_evac.live_bytes() * old_evac.waste_factor()),
+ shenandoah_safe_size_cast(old_evac.live_bytes() * old_evac.waste_factor()),
old_evac.reserve(), old_evac.region_count());
assert(promo.live_bytes() * promo.waste_factor() <=
promo.reserve() + promo.region_count(),
"Promo consumption (%zu) exceeds reserve (%zu) + region count (%zu)",
- (size_t)(promo.live_bytes() * promo.waste_factor()),
+ shenandoah_safe_size_cast(promo.live_bytes() * promo.waste_factor()),
promo.reserve(), promo.region_count());
size_t total_post_reserves = young_evac.reserve() + old_evac.reserve() + promo.reserve();
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
index d2010d921b1..2f247db2951 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -26,10 +26,12 @@
#include "gc/shared/gcCause.hpp"
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.inline.hpp"
#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
#include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
#include "gc/shenandoah/shenandoahTrace.hpp"
+#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
#include "logging/log.hpp"
#include "logging/logTag.hpp"
#include "runtime/globals_extension.hpp"
@@ -60,7 +62,6 @@ ShenandoahHeuristics::ShenandoahHeuristics(ShenandoahSpaceInfo* space_info) :
_last_cycle_end(0),
_gc_times_learned(0),
_gc_time_penalties(0),
- _gc_cycle_time_history(new TruncatedSeq(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor)),
_metaspace_oom()
{
size_t num_regions = ShenandoahHeap::heap()->num_regions();
@@ -174,6 +175,12 @@ void ShenandoahHeuristics::record_cycle_start() {
void ShenandoahHeuristics::record_cycle_end() {
_last_cycle_end = os::elapsedTime();
+
+ ShenandoahHeap* heap = ShenandoahHeap::heap();
+ if (!heap->mode()->is_generational()) {
+ const size_t available = _space_info->soft_mutator_available();
+ heap->alloc_rate().update_minimum_sample_size(available);
+ }
}
bool ShenandoahHeuristics::should_start_gc() {
@@ -247,7 +254,6 @@ void ShenandoahHeuristics::log_trigger(const char* fmt, ...) {
}
void ShenandoahHeuristics::record_success_concurrent() {
- _gc_cycle_time_history->add(elapsed_cycle_time());
_gc_times_learned++;
adjust_penalty(Concurrent_Adjust);
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp
index 9066cdfccac..3f3b3898f54 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -83,7 +83,7 @@ private:
double _most_recent_planned_sleep_interval;
protected:
- static const uint Moving_Average_Samples = 10; // Number of samples to store in moving averages
+ static constexpr uint Moving_Average_Samples = 10; // Number of samples to store in moving averages
bool _start_gc_is_pending; // True denotes that GC has been triggered, so no need to trigger again.
size_t _declined_trigger_count; // This counts how many times since previous GC finished that this
@@ -181,7 +181,6 @@ protected:
size_t _gc_times_learned;
intx _gc_time_penalties;
- TruncatedSeq* _gc_cycle_time_history;
// There may be many threads that contend to set this flag
ShenandoahSharedFlag _metaspace_oom;
@@ -230,6 +229,10 @@ public:
// Default implementation does nothing.
}
+ double cycle_start_time_seconds() const {
+ return _cycle_start;
+ }
+
virtual void record_cycle_start();
void record_degenerated_cycle_start(bool out_of_cycle);
@@ -278,11 +281,6 @@ public:
double elapsed_cycle_time() const;
double elapsed_degenerated_cycle_time() const;
- virtual size_t force_alloc_rate_sample(size_t bytes_allocated) {
- // do nothing
- return 0;
- }
-
// Format prefix and emit log message indicating a GC cycle hs been triggered
void log_trigger(const char* fmt, ...) ATTRIBUTE_PRINTF(2, 3);
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp
index 765061a43ed..85c5d9fb2fb 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp
@@ -44,11 +44,6 @@ public:
virtual size_t available() const = 0;
virtual size_t used() const = 0;
- // Return an approximation of the bytes allocated since GC start. The value returned is monotonically non-decreasing
- // in time within each GC cycle. For certain GC cycles, the value returned may include some bytes allocated before
- // the start of the current GC cycle.
- virtual size_t bytes_allocated_since_gc_start() const = 0;
-
// Return true if this region belongs to this space.
virtual bool contains(ShenandoahHeapRegion* region) const = 0;
};
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp
index 5f384f3dc73..98d679f86d9 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp
@@ -40,14 +40,11 @@ ShenandoahStaticHeuristics::ShenandoahStaticHeuristics(ShenandoahSpaceInfo* spac
bool ShenandoahStaticHeuristics::should_start_gc() {
size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
size_t available = _space_info->soft_mutator_available();
- size_t allocated = _space_info->bytes_allocated_since_gc_start();
- log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", "
- "allocated_since_gc_start: " PROPERFMT,
- PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated));
+ log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT,
+ PROPERFMTARGS(available), PROPERFMTARGS(capacity));
size_t threshold_available = capacity / 100 * ShenandoahMinFreeThreshold;
-
if (available < threshold_available) {
log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")",
PROPERFMTARGS(available), PROPERFMTARGS(threshold_available));
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp
index 27aa9a47510..28007637759 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp
@@ -1,6 +1,6 @@
/*
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,13 +23,17 @@
*
*/
+#include "gc/shared/gc_globals.hpp"
+#include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
#include "gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp"
#include "gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp"
-#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.inline.hpp"
#include "gc/shenandoah/shenandoahGenerationalHeap.inline.hpp"
+#include "gc/shenandoah/shenandoahHeap.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
#include "gc/shenandoah/shenandoahOldGeneration.hpp"
#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
+#include "utilities/globalDefinitions.hpp"
#include "utilities/quickSort.hpp"
ShenandoahYoungHeuristics::ShenandoahYoungHeuristics(ShenandoahYoungGeneration* generation)
@@ -80,7 +84,7 @@ void ShenandoahYoungHeuristics::choose_young_collection_set(ShenandoahCollection
// If this is mixed evacuation, the old-gen candidate regions have already been added.
size_t cur_cset = 0;
size_t cur_young_garbage = cset->garbage();
- const size_t max_cset = (size_t) (heap->young_generation()->get_evacuation_reserve() / ShenandoahEvacWaste);
+ const size_t max_cset = shenandoah_safe_size_cast(heap->young_generation()->get_evacuation_reserve() / ShenandoahEvacWaste);
const size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset;
const size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
@@ -113,38 +117,24 @@ void ShenandoahYoungHeuristics::choose_young_collection_set(ShenandoahCollection
}
}
-
-bool ShenandoahYoungHeuristics::should_start_gc() {
- auto heap = ShenandoahGenerationalHeap::heap();
- ShenandoahOldGeneration* old_generation = heap->old_generation();
- ShenandoahOldHeuristics* old_heuristics = old_generation->heuristics();
-
- // Checks that an old cycle has run for at least ShenandoahMinimumOldTimeMs before allowing a young cycle.
+bool ShenandoahYoungHeuristics::old_collection_needs_more_time(ShenandoahOldGeneration* old_generation, ShenandoahOldHeuristics* old_heuristics) {
if (ShenandoahMinimumOldTimeMs > 0) {
if (old_generation->is_preparing_for_mark() || old_generation->is_concurrent_mark_in_progress()) {
- size_t old_time_elapsed = size_t(old_heuristics->elapsed_cycle_time() * 1000);
- if (old_time_elapsed < ShenandoahMinimumOldTimeMs) {
- // Do not decline_trigger() when waiting for minimum quantum of Old-gen marking. It is not at our discretion
- // to trigger at this time.
- log_debug(gc)("Young heuristics declines to trigger because old_time_elapsed < ShenandoahMinimumOldTimeMs");
- return false;
- }
+ const auto old_time_elapsed = shenandoah_safe_size_cast(old_heuristics->elapsed_cycle_time() * 1000);
+ return old_time_elapsed < ShenandoahMinimumOldTimeMs;
}
}
+ return false;
+}
- // inherited triggers have already decided to start a cycle, so no further evaluation is required
- if (ShenandoahAdaptiveHeuristics::should_start_gc()) {
- // ShenandoahAdaptiveHeuristics::should_start_gc() has already accepted trigger, or declined it.
- return true;
- }
-
+bool ShenandoahYoungHeuristics::trigger_expedite_promotions(ShenandoahGenerationalHeap* heap, ShenandoahOldGeneration* old_generation) {
// Get through promotions and mixed evacuations as quickly as possible. These cycles sometimes require significantly
// more time than traditional young-generation cycles so start them up as soon as possible. This is a "mitigation"
// for the reality that old-gen and young-gen activities are not truly "concurrent". If there is old-gen work to
// be done, we start up the young-gen GC threads so they can do some of this old-gen work. As implemented, promotion
// gets priority over old-gen marking.
- size_t promo_expedite_threshold = percent_of(heap->young_generation()->max_capacity(), ShenandoahExpeditePromotionsThreshold);
- size_t promo_potential = old_generation->get_promotion_potential();
+ const size_t promo_expedite_threshold = percent_of(heap->young_generation()->max_capacity(), ShenandoahExpeditePromotionsThreshold);
+ const size_t promo_potential = old_generation->get_promotion_potential();
if (promo_potential > promo_expedite_threshold) {
// Detect unsigned arithmetic underflow
assert(promo_potential < heap->capacity(), "Sanity");
@@ -152,8 +142,11 @@ bool ShenandoahYoungHeuristics::should_start_gc() {
accept_trigger();
return true;
}
+ return false;
+}
- size_t mixed_candidates = old_heuristics->unprocessed_old_collection_candidates();
+bool ShenandoahYoungHeuristics::trigger_expedite_mixed(ShenandoahGenerationalHeap* heap, ShenandoahOldHeuristics* old_heuristics) {
+ const size_t mixed_candidates = old_heuristics->unprocessed_old_collection_candidates();
if (mixed_candidates > ShenandoahExpediteMixedThreshold && !heap->is_concurrent_weak_root_in_progress()) {
// We need to run young GC in order to open up some free heap regions so we can finish mixed evacuations.
// If concurrent weak root processing is in progress, it means the old cycle has chosen mixed collection
@@ -163,6 +156,33 @@ bool ShenandoahYoungHeuristics::should_start_gc() {
accept_trigger();
return true;
}
+ return false;
+}
+
+bool ShenandoahYoungHeuristics::should_start_gc() {
+ auto heap = ShenandoahGenerationalHeap::heap();
+ ShenandoahOldGeneration* old_generation = heap->old_generation();
+ ShenandoahOldHeuristics* old_heuristics = old_generation->heuristics();
+
+ // Checks that an old cycle has run for at least ShenandoahMinimumOldTimeMs before allowing a young cycle.
+ if (old_collection_needs_more_time(old_generation, old_heuristics)) {
+ log_debug(gc)("Young heuristics declines to trigger because old_time_elapsed < ShenandoahMinimumOldTimeMs");
+ return false;
+ }
+
+ if (ShenandoahAdaptiveHeuristics::should_start_gc()) {
+ // Inherited triggers have already decided to start a cycle, so no further evaluation is required
+ // ShenandoahAdaptiveHeuristics::should_start_gc() has already accepted trigger, or declined it.
+ return true;
+ }
+
+ if (trigger_expedite_promotions(heap, old_generation)) {
+ return true;
+ }
+
+ if (trigger_expedite_mixed(heap, old_heuristics)) {
+ return true;
+ }
// Don't decline_trigger() here That was done in ShenandoahAdaptiveHeuristics::should_start_gc()
return false;
@@ -173,20 +193,16 @@ bool ShenandoahYoungHeuristics::should_start_gc() {
// generation at the end of the current cycle (as represented by young_regions_to_be_reclaimed) and on the anticipated
// amount of time required to perform a GC.
size_t ShenandoahYoungHeuristics::bytes_of_allocation_runway_before_gc_trigger(size_t young_regions_to_be_reclaimed) {
- size_t capacity = _space_info->max_capacity();
- size_t usage = _space_info->used();
- size_t available = (capacity > usage)? capacity - usage: 0;
- size_t allocated = _free_set->get_bytes_allocated_since_gc_start();
- size_t anticipated_available = available + young_regions_to_be_reclaimed * ShenandoahHeapRegion::region_size_bytes();
+ const size_t capacity = _space_info->max_capacity();
+ const size_t usage = _space_info->used();
+ const size_t available = (capacity > usage) ? capacity - usage: 0;
+ const size_t anticipated_available = available + young_regions_to_be_reclaimed * ShenandoahHeapRegion::region_size_bytes();
- size_t spike_headroom = capacity * ShenandoahAllocSpikeFactor / 100;
- size_t penalties = capacity * _gc_time_penalties / 100;
+ const size_t spike_headroom = capacity * ShenandoahAllocSpikeFactor / 100;
+ const size_t penalties = capacity * _gc_time_penalties / 100;
- double rate = _allocation_rate.sample(allocated);
-
- // At what value of available, would avg and spike triggers occur?
+ // At what value of available, would avg rate trigger occur?
// if allocation_headroom < avg_cycle_time * avg_alloc_rate, then we experience avg trigger
- // if allocation_headroom < avg_cycle_time * rate, then we experience spike trigger if is_spiking
//
// allocation_headroom =
// 0, if penalties > available or if penalties + spike_headroom > available
@@ -199,34 +215,19 @@ size_t ShenandoahYoungHeuristics::bytes_of_allocation_runway_before_gc_trigger(s
// since avg_cycle_time * avg_alloc_rate > 0, the first test is sufficient to test both conditions
//
// thus, evac_slack_avg is MIN2(0, available - avg_cycle_time * avg_alloc_rate + penalties + spike_headroom)
- //
- // similarly, evac_slack_spiking is MIN2(0, available - avg_cycle_time * rate + penalties + spike_headroom)
- // but evac_slack_spiking is only relevant if is_spiking, as defined below.
- double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
- double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
+ const double avg_cycle_time = _cycles.predict_duration(os::elapsedTime(), _margin_of_error_sd);
+ const double avg_alloc_rate = ShenandoahHeap::heap()->alloc_rate().upper_bound(_margin_of_error_sd);
+ const double remaining_before_gc = avg_cycle_time * avg_alloc_rate + penalties + spike_headroom;
size_t evac_slack_avg;
- if (anticipated_available > avg_cycle_time * avg_alloc_rate + penalties + spike_headroom) {
- evac_slack_avg = anticipated_available - (avg_cycle_time * avg_alloc_rate + penalties + spike_headroom);
+ if (anticipated_available > remaining_before_gc) {
+ evac_slack_avg = shenandoah_safe_size_cast(anticipated_available - remaining_before_gc);
} else {
// we have no slack because it's already time to trigger
evac_slack_avg = 0;
}
- bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
- size_t evac_slack_spiking;
- if (is_spiking) {
- if (anticipated_available > avg_cycle_time * rate + penalties + spike_headroom) {
- evac_slack_spiking = anticipated_available - (avg_cycle_time * rate + penalties + spike_headroom);
- } else {
- // we have no slack because it's already time to trigger
- evac_slack_spiking = 0;
- }
- } else {
- evac_slack_spiking = evac_slack_avg;
- }
-
- size_t threshold = min_free_threshold();
- size_t evac_min_threshold = (anticipated_available > threshold)? anticipated_available - threshold: 0;
- return MIN3(evac_slack_spiking, evac_slack_avg, evac_min_threshold);
+ const size_t threshold = min_free_threshold(capacity);
+ const size_t evac_min_threshold = anticipated_available > threshold ? anticipated_available - threshold : 0;
+ return MIN2(evac_slack_avg, evac_min_threshold);
}
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp
index 8fabc40693c..723fb631e75 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp
@@ -27,6 +27,8 @@
#include "gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp"
class ShenandoahYoungGeneration;
+class ShenandoahOldGeneration;
+class ShenandoahOldHeuristics;
/*
* This is a specialization of the generational heuristic which chooses
@@ -37,20 +39,26 @@ class ShenandoahYoungHeuristics : public ShenandoahGenerationalHeuristics {
public:
explicit ShenandoahYoungHeuristics(ShenandoahYoungGeneration* generation);
-
- void select_collection_set_regions(ShenandoahCollectionSet* cset,
- RegionData* data, size_t size,
- size_t actual_free) override;
-
bool should_start_gc() override;
size_t bytes_of_allocation_runway_before_gc_trigger(size_t young_regions_to_be_reclaimed);
+protected:
+ void select_collection_set_regions(ShenandoahCollectionSet* cset,
+ RegionData* data, size_t size,
+ size_t actual_free) override;
+
private:
void choose_young_collection_set(ShenandoahCollectionSet* cset,
const RegionData* data,
size_t size, size_t actual_free) const;
+ bool old_collection_needs_more_time(ShenandoahOldGeneration* old_generation,
+ ShenandoahOldHeuristics* old_heuristics);
+
+ bool trigger_expedite_promotions(ShenandoahGenerationalHeap* heap, ShenandoahOldGeneration* old_generation);
+
+ bool trigger_expedite_mixed(ShenandoahGenerationalHeap* heap, ShenandoahOldHeuristics* old_heuristics);
};
#endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHYOUNGHEURISTICS_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAffiliation.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAffiliation.hpp
index 6b3c846f5fe..3dc8becfb62 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAffiliation.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAffiliation.hpp
@@ -25,6 +25,8 @@
#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHAFFILIATION_HPP
#define SHARE_GC_SHENANDOAH_SHENANDOAHAFFILIATION_HPP
+#include "utilities/debug.hpp"
+
enum ShenandoahAffiliation {
FREE,
YOUNG_GENERATION,
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp
index 4989c929b32..8fa497802fd 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp
@@ -34,7 +34,7 @@ ShenandoahAgeCensus::ShenandoahAgeCensus()
}
ShenandoahAgeCensus::ShenandoahAgeCensus(uint max_workers)
- : _max_workers(max_workers)
+ : _max_workers(max_workers), _always_tenure(false)
{
if (ShenandoahGenerationalMinTenuringAge > ShenandoahGenerationalMaxTenuringAge) {
vm_exit_during_initialization(
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp
index c140f445e21..5636dee3ae2 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp
@@ -121,6 +121,8 @@ class ShenandoahAgeCensus: public CHeapObj {
uint _max_workers; // Maximum number of workers for parallel tasks
+ bool _always_tenure; // When true, every age is tenurable.
+
// Mortality rate of a cohort, given its population in
// previous and current epochs
double mortality_rate(size_t prev_pop, size_t cur_pop);
@@ -150,9 +152,9 @@ class ShenandoahAgeCensus: public CHeapObj {
return _tenuring_threshold[prev];
}
- // Override the tenuring threshold for the current epoch. This is used to
- // cause everything to be promoted for a whitebox full gc request.
- void set_tenuring_threshold(uint threshold) { _tenuring_threshold[_epoch] = threshold; }
+ // Set always tenure mode. Currently only used by ShenandoahTenuringOverride
+ // to force is_tenurable() to be true for every age during WB.fullGC tests.
+ void set_always_tenure(bool always_tenure) { _always_tenure = always_tenure; }
#ifndef PRODUCT
// Return the sum of size of objects of all ages recorded in the
@@ -187,11 +189,13 @@ class ShenandoahAgeCensus: public CHeapObj {
// Visible for testing. Use is_tenurable for consistent tenuring comparisons.
uint tenuring_threshold() const { return _tenuring_threshold[_epoch]; }
- // Return true if this age is at or above the tenuring threshold.
+ // Return true if this age is at or above the tenuring threshold, or if always tenure is enabled.
bool is_tenurable(uint age) const {
- return age >= tenuring_threshold();
+ return age >= tenuring_threshold() || _always_tenure;
}
+ bool is_always_tenure() const { return _always_tenure; }
+
// Update the local age table for worker_id by size for
// given obj_age, region_age, and region_youth
CENSUS_NOISE(void add(uint obj_age, uint region_age, uint region_youth, size_t size, uint worker_id);)
@@ -244,24 +248,22 @@ class ShenandoahAgeCensus: public CHeapObj {
void print();
};
-// RAII object that temporarily overrides the tenuring threshold for the
-// duration of a scope, restoring the original value on destruction.
-// Used to force promotion of all young objects during whitebox full GCs.
+// RAII object that enables ShenandoahAgeCensus always tenure mode for the
+// duration of a scope and disables it on destruction. Used to force promotion
+// of all young objects during whitebox full GCs.
class ShenandoahTenuringOverride : public StackObj {
ShenandoahAgeCensus* _census;
- uint _saved_threshold;
bool _active;
public:
ShenandoahTenuringOverride(bool active, ShenandoahAgeCensus* census) :
- _census(census), _saved_threshold(0), _active(active) {
+ _census(census), _active(active) {
if (_active) {
- _saved_threshold = _census->tenuring_threshold();
- _census->set_tenuring_threshold(0);
+ _census->set_always_tenure(true);
}
}
~ShenandoahTenuringOverride() {
if (_active) {
- _census->set_tenuring_threshold(_saved_threshold);
+ _census->set_always_tenure(false);
}
}
};
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp
new file mode 100644
index 00000000000..ff0a425e260
--- /dev/null
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP
+#define SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP
+
+#include "gc/shenandoah/shenandoahWeightedSeq.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/os.hpp"
+#include "runtime/task.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class ShenandoahAllocationClock {
+public:
+ static jlong elapsed_counter() {
+ return os::elapsed_counter();
+ }
+
+ static jlong elapsed_frequency() {
+ return os::elapsed_frequency();
+ }
+};
+
+// Snapshot values used by heuristic triggers to avoid lock contention
+struct ShenandoahAnticipatedConsumption {
+ template friend class ShenandoahAllocRate;
+ explicit ShenandoahAnticipatedConsumption(double duration_seconds)
+ : _duration_seconds(duration_seconds)
+ , _baseline(0.0)
+ , _momentary(0.0)
+ , _acceleration(0.0)
+ , _predicted_rate(0.0) {
+ }
+
+ // Anticipated duration in seconds of next gc cycle
+ double duration_seconds() const {
+ return _duration_seconds;
+ }
+
+ // Consumption in bytes based on baseline allocation rate for the next gc cycle
+ size_t baseline_consumption() const;
+ double baseline_rate() const {
+ return _baseline;
+ }
+
+ // Consumption in bytes based on momentary allocation rate for the next gc cycle
+ size_t momentary_consumption() const;
+ double momentary_rate() const {
+ return _momentary;
+ }
+
+ // Consumption in bytes based on an accelerating allocation rate for the next gc cycle
+ size_t accelerated_consumption() const;
+
+ // The acceleration of the allocation rate (based on slope of linear regression)
+ double acceleration() const {
+ return _acceleration;
+ }
+
+ // Predicated allocation rate based on weighted linear regression
+ double predicted_rate() const {
+ return _predicted_rate;
+ }
+
+private:
+ double _duration_seconds;
+ double _baseline;
+ double _momentary;
+ double _acceleration;
+ double _predicted_rate;
+};
+
+// This class tracks three moving averages of the allocation rate:
+// 1. Momentary: this is the shortest and acts as a sort of 'spike' detector
+// 2. Recent: larger than momentary, these samples are used to detect 'acceleration' of the rate
+// 3. Baseline: the largest sample window, this is meant to establish the baseline allocation rate
+//
+// Samples are taken whenever the accumulating count of bytes allocated exceeds the
+// minimum sample size. The minimum sample size is generally derived from the heap
+// capacity. The thinking is that larger heaps require less frequent sampling. Note
+// that as the allocation rate increases, the timeliness of the averages and other
+// estimates increases.
+template
+class ShenandoahAllocRate {
+ static constexpr size_t ALLOC_SAMPLE_PORTION = 128;
+ static constexpr size_t ALLOC_SAMPLE_MIN = M;
+ static constexpr size_t ALLOC_SAMPLE_MAX = G;
+
+ PaddedMonitor _sample_lock;
+ Atomic _allocated_bytes_since_last_sample;
+ Atomic _minimum_sample_size; // bytes, read by mutator, updated by gc
+ jlong _last_sample_time;
+
+ ShenandoahWeightedSeq _baseline;
+ ShenandoahWeightedSeq _recent;
+ ShenandoahWeightedSeq _momentary;
+
+public:
+ explicit ShenandoahAllocRate(const uint minimum_sample_size = ALLOC_SAMPLE_MIN,
+ const uint baseline_window_size = ShenandoahAllocRateSampleWindow,
+ const uint recent_window_size = ShenandoahRecentAllocRateSampleWindow,
+ const uint momentary_window_size = ShenandoahMomentaryAllocRateSampleWindow)
+ : _sample_lock(Mutex::nosafepoint - 2, "ShenandoahAllocSample_lock", true)
+ , _allocated_bytes_since_last_sample(0)
+ , _minimum_sample_size(minimum_sample_size)
+ , _last_sample_time(Clock::elapsed_counter())
+ , _baseline(baseline_window_size)
+ , _recent(recent_window_size)
+ , _momentary(momentary_window_size)
+ {
+ }
+
+ // Update minimum sample size based on the given available bytes
+ void update_minimum_sample_size(size_t available);
+
+ // Set minimum sample size in bytes
+ void set_minimum_sample_size(const size_t minimum_sample_size) {
+ _minimum_sample_size.store_relaxed(minimum_sample_size);
+ }
+
+ // Indicate that this many bytes have been allocated (by the mutator).
+ void allocated(size_t allocated_bytes);
+
+ // Shenandoah currently evaluates triggers on a dedicated thread to lighten the workload
+ // for allocators. However, this means that when there isn't enough allocations to update
+ // the rate, the heuristics will continue to see a high allocation rate. This method is
+ // for heuristics to periodically force the rate to update and decay the allocation rate.
+ void force_update();
+
+ // Returns a snapshot of the parameters necessary to evaluate allocation rate triggers.
+ // Note that momentary consumption and accelerated consumption may both be zero, but may
+ // not both be non-zero. The `time_delta` parameter is the anticipated duration of the
+ // next gc cycle. The `standard_deviations` parameter is the margin of error applied to
+ // the baseline allocation rate expressed as a multiple of the standard deviation.
+ ShenandoahAnticipatedConsumption snapshot(double time_delta, double standard_deviations);
+
+ // Returns the weighted average of the samples.
+ double weighted_average() {
+ MonitorLocker locker(&_sample_lock, Mutex::_no_safepoint_check_flag);
+ return _baseline.weighted_average();
+ }
+
+ // Returns the upper bound of the confidence interval about the mean in terms of the given deviation.
+ double upper_bound(const double standard_deviations) {
+ MonitorLocker locker(&_sample_lock, Mutex::_no_safepoint_check_flag);
+ return upper_bound_no_lock(standard_deviations);
+ }
+
+private:
+ // Record the sample under the sample lock
+ void take_sample(jlong now, jlong elapsed, size_t unsampled);
+
+ double upper_bound_no_lock(const double standard_deviations) const {
+ assert(_sample_lock.is_locked(), "Caller must hold lock");
+ return _baseline.weighted_average() + standard_deviations * _baseline.weighted_sd();
+ }
+};
+
+typedef ShenandoahAllocRate<> ShenandoahAllocationRate;
+
+// See description of `force_update`
+class ShenandoahDecayAllocRate : public PeriodicTask {
+ static constexpr size_t DECAY_INTERVAL_MS = 100;
+ ShenandoahAllocationRate* _rate;
+public:
+ ShenandoahDecayAllocRate(ShenandoahAllocationRate* rate) : PeriodicTask(DECAY_INTERVAL_MS), _rate(rate) {}
+ void task() override;
+};
+
+#endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp
new file mode 100644
index 00000000000..eedda19b86e
--- /dev/null
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_INLINE_HPP
+#define SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_INLINE_HPP
+
+#include "gc/shenandoah/shenandoahAllocRate.hpp"
+
+#include "gc/shenandoah/shenandoahUtils.hpp"
+#include "logging/log.hpp"
+
+
+inline size_t ShenandoahAnticipatedConsumption::baseline_consumption() const {
+ return shenandoah_safe_size_cast(_baseline * _duration_seconds);
+}
+
+inline size_t ShenandoahAnticipatedConsumption::momentary_consumption() const {
+ return shenandoah_safe_size_cast(_momentary * _duration_seconds);
+}
+
+inline size_t ShenandoahAnticipatedConsumption::accelerated_consumption() const {
+ const double consumption = _predicted_rate * _duration_seconds + 0.5 * _acceleration * _duration_seconds * _duration_seconds;
+ return shenandoah_safe_size_cast(consumption);
+}
+
+inline void ShenandoahDecayAllocRate::task() {
+ _rate->force_update();
+}
+
+template
+void ShenandoahAllocRate::update_minimum_sample_size(const size_t available) {
+ const size_t min_sample_size = clamp(available / ALLOC_SAMPLE_PORTION, ALLOC_SAMPLE_MIN, ALLOC_SAMPLE_MAX);
+ log_info(gc, ergo)("Adjust minimum allocation sample size to: " PROPERFMT, PROPERFMTARGS(min_sample_size));
+ set_minimum_sample_size(min_sample_size);
+}
+
+template
+void ShenandoahAllocRate::allocated(const size_t allocated_bytes) {
+ size_t unsampled = _allocated_bytes_since_last_sample.add_then_fetch(allocated_bytes);
+ const size_t minimum_sample_size = _minimum_sample_size.load_relaxed();
+ if (unsampled < minimum_sample_size) {
+ // Not enough to sample yet
+ return;
+ }
+
+ if (!_sample_lock.try_lock()) {
+ // Another thread has the lock and will take the sample
+ return;
+ }
+
+ unsampled = _allocated_bytes_since_last_sample.load_relaxed();
+ if (unsampled < minimum_sample_size) {
+ // Another thread has sampled and reset the allocated bytes under the lock
+ _sample_lock.unlock();
+ return;
+ }
+
+ const jlong now = Clock::elapsed_counter();
+ const jlong elapsed = now - _last_sample_time;
+
+ if (elapsed <= 0) {
+ // Avoid sampling nonsense allocation rates
+ _sample_lock.unlock();
+ return;
+ }
+
+ take_sample(now, elapsed, unsampled);
+
+ _sample_lock.unlock();
+}
+
+template
+void ShenandoahAllocRate::force_update() {
+ if (!_sample_lock.try_lock()) {
+ // Another thread has the lock and will take the sample
+ return;
+ }
+
+ const size_t unsampled = _allocated_bytes_since_last_sample.load_relaxed();
+ const jlong now = Clock::elapsed_counter();
+ const jlong elapsed = now - _last_sample_time;
+
+ if (elapsed <= 0) {
+ // Avoid sampling nonsense allocation rates
+ _sample_lock.unlock();
+ return;
+ }
+
+ take_sample(now, elapsed, unsampled);
+
+ _sample_lock.unlock();
+}
+
+template
+void ShenandoahAllocRate::take_sample(jlong now, jlong elapsed, size_t unsampled) {
+ assert(_sample_lock.is_locked(), "Caller must hold lock");
+
+ _last_sample_time = now;
+
+ // We are recording this sample, deduct it from the counter. It may be increased
+ // concurrently by other threads outside the lock, so we still use an atomic access.
+ _allocated_bytes_since_last_sample.sub_then_fetch(unsampled);
+
+ const double timestamp = static_cast(_last_sample_time) / Clock::elapsed_frequency();
+ const double rate_seconds = static_cast(unsampled) * Clock::elapsed_frequency() / elapsed;
+
+ _baseline.add(timestamp, rate_seconds);
+ _recent.add(timestamp, rate_seconds);
+ _momentary.add(timestamp, rate_seconds);
+
+ // Careful, still under a lock here
+ log_develop_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp);
+}
+
+template
+ShenandoahAnticipatedConsumption ShenandoahAllocRate::snapshot(const double time_delta, const double standard_deviations) {
+ ShenandoahAnticipatedConsumption result(time_delta);
+ MonitorLocker locker(&_sample_lock, Mutex::_no_safepoint_check_flag);
+
+ result._baseline = upper_bound_no_lock(standard_deviations);
+
+ if (_recent.weighted_average() <= _baseline.weighted_average()) {
+ // We are not accelerating, just use the momentary average.
+ result._momentary = _momentary.weighted_average();
+ } else {
+ result._acceleration = _recent.slope();
+ result._predicted_rate = _recent.predict_y(_recent.last());
+ }
+
+ return result;
+}
+
+#endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_INLINE_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
index 5ce131b3c80..3ccb7cc336b 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
@@ -159,20 +159,6 @@ void ShenandoahArguments::initialize() {
FLAG_SET_DEFAULT(LoopStripMiningIter, 1000);
}
}
-#ifdef ASSERT
- // C2 barrier verification is only reliable when all default barriers are enabled
- if (ShenandoahVerifyOptoBarriers &&
- (!FLAG_IS_DEFAULT(ShenandoahSATBBarrier) ||
- !FLAG_IS_DEFAULT(ShenandoahLoadRefBarrier) ||
- !FLAG_IS_DEFAULT(ShenandoahCASBarrier) ||
- !FLAG_IS_DEFAULT(ShenandoahCloneBarrier)
- )) {
- warning("Unusual barrier configuration, disabling C2 barrier verification");
- FLAG_SET_DEFAULT(ShenandoahVerifyOptoBarriers, false);
- }
-#else
- guarantee(!ShenandoahVerifyOptoBarriers, "Should be disabled");
-#endif // ASSERT
#endif // COMPILER2
// Record more information about previous cycles for improved debugging pleasure
@@ -193,7 +179,7 @@ void ShenandoahArguments::initialize() {
// TLAB sizing policy makes resizing decisions before each GC cycle. It averages
// historical data, assigning more recent data the weight according to TLABAllocationWeight.
// Current default is good for generational collectors that run frequent young GCs.
- // With Shenandoah, GC cycles are much less frequent, so we need we need sizing policy
+ // With Shenandoah, GC cycles are much less frequent, so we need sizing policy
// to converge faster over smaller number of resizing decisions.
if (strcmp(ShenandoahGCMode, "generational") && FLAG_IS_DEFAULT(TLABAllocationWeight)) {
FLAG_SET_DEFAULT(TLABAllocationWeight, 90);
@@ -202,7 +188,7 @@ void ShenandoahArguments::initialize() {
if (GCCardSizeInBytes < ShenandoahMinCardSizeInBytes) {
vm_exit_during_initialization(
- err_msg("GCCardSizeInBytes ( %u ) must be >= %u\n", GCCardSizeInBytes, (unsigned int) ShenandoahMinCardSizeInBytes));
+ err_msg("GCCardSizeInBytes ( %u ) must be >= %u\n", GCCardSizeInBytes, ShenandoahMinCardSizeInBytes));
}
// Gen shen does not support any ShenandoahGCHeuristics value except for the default "adaptive"
@@ -213,6 +199,16 @@ void ShenandoahArguments::initialize() {
FLAG_SET_ERGO(ShenandoahGCHeuristics, "adaptive");
}
+ if (ShenandoahMomentaryAllocRateSampleWindow > ShenandoahRecentAllocRateSampleWindow
+ || ShenandoahRecentAllocRateSampleWindow > ShenandoahAllocRateSampleWindow) {
+ vm_exit_during_initialization(
+ err_msg("Relation must hold: ShenandoahMomentaryAllocRateSampleWindow (%u) "
+ "<= ShenandoahRecentAllocRateSampleWindow (%u) "
+ "<= ShenandoahAllocRateSampleWindow (%u)",
+ ShenandoahMomentaryAllocRateSampleWindow, ShenandoahRecentAllocRateSampleWindow,
+ ShenandoahAllocRateSampleWindow));
+ }
+
FullGCForwarding::initialize_flags(MaxHeapSize);
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
index 268f5b13035..278c04b35d6 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
@@ -466,6 +466,17 @@ void ShenandoahAsserts::assert_not_in_cset_loc(void* interior_loc, const char* f
}
}
+void ShenandoahAsserts::assert_in_young(void* interior_loc, oop obj, const char* file, int line) {
+ assert_correct(interior_loc, obj, file, line);
+
+ ShenandoahHeap* heap = ShenandoahHeap::heap();
+ if (!heap->heap_region_containing(obj)->is_young()) {
+ print_failure(_safe_all, obj, interior_loc, nullptr, "Shenandoah assert_in_young failed",
+ "Object should be in young region",
+ file, line);
+ }
+}
+
void ShenandoahAsserts::print_rp_failure(const char *label, BoolObjectClosure* actual,
const char *file, int line) {
ShenandoahMessageBuffer msg("%s\n", label);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.hpp
index d0fc3e213c8..545415a6531 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.hpp
@@ -80,6 +80,8 @@ public:
static void assert_control_or_vm_thread_at_safepoint(bool at_safepoint, const char* file, int line);
static void assert_generational(const char* file, int line);
+ static void assert_in_young(void* interior_loc, oop obj, const char* file, int line);
+
// Given a possibly invalid oop, extract narrowKlass (if UCCP) and Klass*
// from it safely.
// Note: For -UCCP, returned nk is always 0.
@@ -159,6 +161,13 @@ public:
#define shenandoah_assert_not_in_cset_loc(interior_loc) \
ShenandoahAsserts::assert_not_in_cset_loc(interior_loc, __FILE__, __LINE__)
+#define shenandoah_assert_in_young_if(interior_loc, obj, condition) \
+ if (condition) ShenandoahAsserts::assert_in_young(interior_loc, obj, __FILE__, __LINE__)
+#define shenandoah_assert_in_young_except(interior_loc, obj, exception) \
+ if (!(exception)) ShenandoahAsserts::assert_in_young(interior_loc, obj, __FILE__, __LINE__)
+#define shenandoah_assert_in_young(interior_loc, obj) \
+ ShenandoahAsserts::assert_in_young(interior_loc, obj, __FILE__, __LINE__)
+
#define shenandoah_assert_rp_isalive_installed() \
ShenandoahAsserts::assert_rp_isalive_installed(__FILE__, __LINE__)
#define shenandoah_assert_rp_isalive_not_installed() \
@@ -245,6 +254,10 @@ public:
#define shenandoah_assert_control_or_vm_thread_at_safepoint()
#define shenandoah_assert_generational()
+#define shenandoah_assert_in_young_if(interior_loc, obj, condition)
+#define shenandoah_assert_in_young_except(interior_loc, obj, exception)
+#define shenandoah_assert_in_young(interior_loc, obj)
+
#endif
#define shenandoah_not_implemented \
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp
index 0949959b042..9605eb524cd 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp
@@ -25,7 +25,6 @@
#include "gc/shared/barrierSetNMethod.hpp"
#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-#include "gc/shenandoah/shenandoahBarrierSetClone.inline.hpp"
#include "gc/shenandoah/shenandoahBarrierSetNMethod.hpp"
#include "gc/shenandoah/shenandoahBarrierSetStackChunk.hpp"
#include "gc/shenandoah/shenandoahCardTable.hpp"
@@ -100,7 +99,7 @@ bool ShenandoahBarrierSet::need_card_barrier(DecoratorSet decorators, BasicType
}
void ShenandoahBarrierSet::on_slowpath_allocation_exit(JavaThread* thread, oop new_obj) {
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
if (ReduceInitialCardMarks && ShenandoahCardBarrier && !ShenandoahHeap::heap()->is_in_young(new_obj)) {
log_debug(gc)("Newly allocated object (" PTR_FORMAT ") is not in the young generation", p2i(new_obj));
// This can happen when an object is newly allocated, but we come to a safepoint before returning
@@ -114,7 +113,7 @@ void ShenandoahBarrierSet::on_slowpath_allocation_exit(JavaThread* thread, oop n
cast_from_oop(new_obj), new_obj->size()
);
}
-#endif // COMPILER2_OR_JVMCI
+#endif // COMPILER2
}
void ShenandoahBarrierSet::on_thread_create(Thread* thread) {
@@ -180,12 +179,6 @@ void ShenandoahBarrierSet::on_thread_detach(Thread *thread) {
}
}
-void ShenandoahBarrierSet::clone_barrier_runtime(oop src) {
- if (_heap->has_forwarded_objects()) {
- clone_barrier(src);
- }
-}
-
void ShenandoahBarrierSet::write_ref_array(HeapWord* start, size_t count) {
assert(ShenandoahCardBarrier, "Should have been checked by caller");
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
index 06e16af24c6..8f8dbc9ac83 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
@@ -83,8 +83,6 @@ public:
template
inline void arraycopy_barrier(T* src, T* dst, size_t count);
- inline void clone_barrier(oop src);
- void clone_barrier_runtime(oop src);
// Support for optimizing compilers to call the barrier set on slow path allocations
// that did not enter a TLAB. Used for e.g. ReduceInitialCardMarks to take any
@@ -104,7 +102,7 @@ public:
inline void keep_alive_if_weak(DecoratorSet decorators, oop value);
- inline void enqueue(oop obj);
+ inline void enqueue(oop obj, bool filter = true);
inline oop load_reference_barrier(oop obj);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
index f4a6d2b9038..e7ddfdb0f6a 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
@@ -167,14 +167,15 @@ inline oop ShenandoahBarrierSet::load_reference_barrier(DecoratorSet decorators,
return fwd;
}
-inline void ShenandoahBarrierSet::enqueue(oop obj) {
+inline void ShenandoahBarrierSet::enqueue(oop obj, bool filter) {
assert(obj != nullptr, "checked by caller");
+ shenandoah_assert_correct(nullptr, obj);
assert(_satb_mark_queue_set.is_active(), "only get here when SATB active");
// Filter marked objects before hitting the SATB queues. The same predicate would
// be used by SATBMQ::filter to eliminate already marked objects downstream, but
// filtering here helps to avoid wasteful SATB queueing work to begin with.
- if (!_heap->requires_marking(obj)) return;
+ if (filter && !_heap->requires_marking(obj)) return;
SATBMarkQueue& queue = ShenandoahThreadLocalData::satb_mark_queue(Thread::current());
_satb_mark_queue_set.enqueue_known_active(queue, obj);
@@ -434,12 +435,76 @@ inline oop ShenandoahBarrierSet::AccessBarrier::oop_ato
}
// Clone barrier support
+template
+class ShenandoahUpdateEvacForCloneOopClosure : public BasicOopIterateClosure {
+private:
+ ShenandoahHeap* const _heap;
+ const ShenandoahCollectionSet* const _cset;
+ Thread* const _thread;
+
+ template
+ inline void do_oop_work(T* p) {
+ T o = RawAccess<>::oop_load(p);
+ if (!CompressedOops::is_null(o)) {
+ oop obj = CompressedOops::decode_not_null(o);
+ if (_cset->is_in(obj)) {
+ oop fwd = ShenandoahForwarding::get_forwardee(obj);
+ if (EVAC && obj == fwd) {
+ fwd = _heap->evacuate_object(obj, _thread);
+ }
+ shenandoah_assert_forwarded_except(p, obj, _heap->cancelled_gc());
+ ShenandoahHeap::atomic_update_oop(fwd, p, o);
+ obj = fwd;
+ }
+ }
+ }
+
+public:
+ ShenandoahUpdateEvacForCloneOopClosure() :
+ _heap(ShenandoahHeap::heap()),
+ _cset(_heap->collection_set()),
+ _thread(Thread::current()) {}
+
+ virtual void do_oop(oop* p) { do_oop_work(p); }
+ virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+void ShenandoahBarrierSet::clone_evacuation(oop obj) {
+ assert(_heap->is_evacuation_in_progress(), "only during evacuation");
+ if (need_bulk_update(cast_from_oop(obj))) {
+ ShenandoahUpdateEvacForCloneOopClosure cl;
+ obj->oop_iterate(&cl);
+ }
+}
+
+void ShenandoahBarrierSet::clone_update(oop obj) {
+ assert(_heap->is_update_refs_in_progress(), "only during update-refs");
+ if (need_bulk_update(cast_from_oop(obj))) {
+ ShenandoahUpdateEvacForCloneOopClosure cl;
+ obj->oop_iterate(&cl);
+ }
+}
+
template
void ShenandoahBarrierSet::AccessBarrier::clone_in_heap(oop src, oop dst, size_t size) {
- if (ShenandoahCloneBarrier) {
- ShenandoahBarrierSet::barrier_set()->clone_barrier_runtime(src);
+ // Hot code path, called from compiler/runtime. Make sure fast path is fast.
+
+ // Fix up src before doing the copy, if needed.
+ const char gc_state = ShenandoahThreadLocalData::gc_state(Thread::current());
+ if (gc_state != 0 && ShenandoahCloneBarrier) {
+ ShenandoahBarrierSet* bs = ShenandoahBarrierSet::barrier_set();
+ if ((gc_state & ShenandoahHeap::EVACUATION) != 0) {
+ bs->clone_evacuation(src);
+ } else if ((gc_state & ShenandoahHeap::UPDATE_REFS) != 0) {
+ bs->clone_update(src);
+ }
}
+
Raw::clone(src, dst, size);
+
+ // Current allocator never allocates in old, so clone destination is guaranteed to be in young.
+ // Otherwise we need card barriers.
+ shenandoah_assert_in_young_if(nullptr, dst, ShenandoahCardBarrier);
}
template
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetClone.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetClone.inline.hpp
deleted file mode 100644
index 487fbd9ef62..00000000000
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetClone.inline.hpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2013, 2021, Red Hat, Inc. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHBARRIERSETCLONE_INLINE_HPP
-#define SHARE_GC_SHENANDOAH_SHENANDOAHBARRIERSETCLONE_INLINE_HPP
-
-// No shenandoahBarrierSetClone.hpp
-
-#include "gc/shenandoah/shenandoahBarrierSet.inline.hpp"
-#include "gc/shenandoah/shenandoahCollectionSet.inline.hpp"
-#include "gc/shenandoah/shenandoahHeap.inline.hpp"
-#include "memory/iterator.inline.hpp"
-#include "oops/access.hpp"
-#include "oops/compressedOops.hpp"
-
-template
-class ShenandoahUpdateRefsForOopClosure: public BasicOopIterateClosure {
-private:
- ShenandoahHeap* const _heap;
- ShenandoahBarrierSet* const _bs;
- const ShenandoahCollectionSet* const _cset;
- Thread* const _thread;
-
- template
- inline void do_oop_work(T* p) {
- T o = RawAccess<>::oop_load(p);
- if (!CompressedOops::is_null(o)) {
- oop obj = CompressedOops::decode_not_null(o);
- if (HAS_FWD && _cset->is_in(obj)) {
- oop fwd = _bs->resolve_forwarded_not_null(obj);
- if (EVAC && obj == fwd) {
- fwd = _heap->evacuate_object(obj, _thread);
- }
- shenandoah_assert_forwarded_except(p, obj, _heap->cancelled_gc());
- ShenandoahHeap::atomic_update_oop(fwd, p, o);
- obj = fwd;
- }
- if (ENQUEUE) {
- _bs->enqueue(obj);
- }
- }
- }
-public:
- ShenandoahUpdateRefsForOopClosure() :
- _heap(ShenandoahHeap::heap()),
- _bs(ShenandoahBarrierSet::barrier_set()),
- _cset(_heap->collection_set()),
- _thread(Thread::current()) {
- }
-
- virtual void do_oop(oop* p) { do_oop_work(p); }
- virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-};
-
-void ShenandoahBarrierSet::clone_evacuation(oop obj) {
- assert(_heap->is_evacuation_in_progress(), "only during evacuation");
- if (need_bulk_update(cast_from_oop(obj))) {
- ShenandoahUpdateRefsForOopClosure* has_fwd = */ true, /* evac = */ true, /* enqueue */ false> cl;
- obj->oop_iterate(&cl);
- }
-}
-
-void ShenandoahBarrierSet::clone_update(oop obj) {
- assert(_heap->is_update_refs_in_progress(), "only during update-refs");
- if (need_bulk_update(cast_from_oop(obj))) {
- ShenandoahUpdateRefsForOopClosure* has_fwd = */ true, /* evac = */ false, /* enqueue */ false> cl;
- obj->oop_iterate(&cl);
- }
-}
-
-void ShenandoahBarrierSet::clone_barrier(oop obj) {
- assert(ShenandoahCloneBarrier, "only get here with clone barriers enabled");
- shenandoah_assert_correct(nullptr, obj);
-
- if (_heap->is_evacuation_in_progress()) {
- clone_evacuation(obj);
- } else {
- clone_update(obj);
- }
-}
-
-#endif // SHARE_GC_SHENANDOAH_SHENANDOAHBARRIERSETCLONE_INLINE_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp b/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp
index 9ab45380c61..976a505c713 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahClosures.hpp
@@ -253,4 +253,16 @@ public:
};
#endif // ASSERT
+class ShenandoahMultiThreadClosure : public ThreadClosure {
+ ThreadClosure& _cl1;
+ ThreadClosure& _cl2;
+public:
+ ShenandoahMultiThreadClosure(ThreadClosure& cl1, ThreadClosure& cl2) :
+ _cl1(cl1), _cl2(cl2) {}
+ inline void do_thread(Thread* thread) override {
+ _cl1.do_thread(thread);
+ _cl2.do_thread(thread);
+ }
+};
+
#endif // SHARE_GC_SHENANDOAH_SHENANDOAHCLOSURES_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp
index cfa79fc055e..e3267517e05 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp
@@ -205,8 +205,15 @@ void ShenandoahCollectorPolicy::print_gc_stats(outputStream* out) const {
out->print_cr("enough regions with no live objects to skip evacuation.");
out->cr();
+ size_t gc_attempts = 0;
+ for (int c = 0; c < GCCause::_last_gc_cause; c++) {
+ gc_attempts += _collection_cause_counts[c];
+ }
+
size_t completed_gcs = _success_full_gcs + _success_degenerated_gcs + _success_concurrent_gcs + _success_old_gcs;
- out->print_cr("%5zu Completed GCs", completed_gcs);
+ size_t cancelled_gcs = gc_attempts - completed_gcs;
+ out->print_cr("%5zu GC attempts. %zu Completed GCs (%.2f%%).",
+ gc_attempts, completed_gcs, percent_of(completed_gcs, gc_attempts));
size_t explicit_requests = 0;
size_t implicit_requests = 0;
@@ -220,7 +227,7 @@ void ShenandoahCollectorPolicy::print_gc_stats(outputStream* out) const {
implicit_requests += cause_count;
}
const char* desc = GCCause::to_string(cause);
- out->print_cr(" %5zu caused by %s (%.2f%%)", cause_count, desc, percent_of(cause_count, completed_gcs));
+ out->print_cr(" %5zu caused by %s (%.2f%%)", cause_count, desc, percent_of(cause_count, gc_attempts));
}
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
index 929f3b30afe..07eb653bc94 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
@@ -991,7 +991,7 @@ public:
// state is cached, therefore, during concurrent class unloading phase,
// we will not touch the metadata of unloading nmethods
{
- ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCacheRoots, worker_id);
+ ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCache, worker_id);
ShenandoahIsNMethodAliveClosure is_nmethod_alive;
_nmethod_itr.nmethods_do(&is_nmethod_alive);
}
@@ -1004,9 +1004,8 @@ void ShenandoahConcurrentGC::op_weak_roots() {
assert(heap->is_concurrent_weak_root_in_progress(), "Only during this phase");
{
// Concurrent weak root processing
- ShenandoahTimingsTracker t(ShenandoahPhaseTimings::conc_weak_roots_work);
- ShenandoahGCWorkerPhase worker_phase(ShenandoahPhaseTimings::conc_weak_roots_work);
- ShenandoahConcurrentWeakRootsEvacUpdateTask task(_generation, ShenandoahPhaseTimings::conc_weak_roots_work);
+ ShenandoahGCWorkerPhase worker_phase(ShenandoahPhaseTimings::conc_weak_roots);
+ ShenandoahConcurrentWeakRootsEvacUpdateTask task(_generation, ShenandoahPhaseTimings::conc_weak_roots);
heap->workers()->run_task(&task);
}
@@ -1080,7 +1079,7 @@ public:
}
if (!ShenandoahHeap::heap()->unload_classes()) {
- ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCacheRoots, worker_id);
+ ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCache, worker_id);
ShenandoahEvacUpdateCodeCacheClosure cl;
_nmethod_itr.nmethods_do(&cl);
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp
index be0da3e54ba..31ffbc817f1 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp
@@ -36,7 +36,6 @@
#include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
#include "gc/shenandoah/shenandoahRootProcessor.inline.hpp"
#include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
-#include "gc/shenandoah/shenandoahStringDedup.hpp"
#include "gc/shenandoah/shenandoahTaskqueue.inline.hpp"
#include "gc/shenandoah/shenandoahUtils.hpp"
#include "memory/iterator.inline.hpp"
@@ -57,12 +56,13 @@ public:
void work(uint worker_id) {
ShenandoahConcurrentWorkerSession worker_session(worker_id);
- ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::conc_mark, ShenandoahPhaseTimings::ParallelMark, worker_id, true);
+ ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::conc_mark, ShenandoahPhaseTimings::Work, worker_id, true);
SuspendibleThreadSetJoiner stsj;
- StringDedup::Requests requests;
- _cm->mark_loop(worker_id, _terminator, GENERATION, true /*cancellable*/,
- ShenandoahStringDedup::is_enabled() ? ENQUEUE_DEDUP : NO_DEDUP,
- &requests);
+ _cm->mark_loop(worker_id, _terminator, GENERATION, true /*cancellable*/);
+ // Concurrent marking loop flushes Java thread buffers, coordinating with a handshake.
+ // Here, a GC worker has completed marking work, so it is a good time to flush its SATB buffers too.
+ SATBMarkQueueSet& satb_mq_set = ShenandoahBarrierSet::satb_mark_queue_set();
+ satb_mq_set.flush_queue(ShenandoahThreadLocalData::satb_mark_queue(Thread::current()));
}
};
@@ -71,36 +71,15 @@ class ShenandoahFinalMarkingTask : public WorkerTask {
private:
ShenandoahConcurrentMark* _cm;
TaskTerminator* _terminator;
- bool _dedup_string;
- ThreadsClaimTokenScope _threads_claim_token_scope; // needed for Threads::possibly_parallel_threads_do
public:
- ShenandoahFinalMarkingTask(ShenandoahConcurrentMark* cm, TaskTerminator* terminator, bool dedup_string) :
- WorkerTask("Shenandoah Final Mark"), _cm(cm), _terminator(terminator), _dedup_string(dedup_string),
- _threads_claim_token_scope() {
- }
+ ShenandoahFinalMarkingTask(ShenandoahConcurrentMark* cm, TaskTerminator* terminator) :
+ WorkerTask("Shenandoah Final Mark"), _cm(cm), _terminator(terminator) {}
void work(uint worker_id) {
- ShenandoahHeap* heap = ShenandoahHeap::heap();
-
+ ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::finish_mark, ShenandoahPhaseTimings::Work, worker_id, true);
ShenandoahParallelWorkerSession worker_session(worker_id);
- StringDedup::Requests requests;
- // First drain remaining SATB buffers.
- {
- ShenandoahObjToScanQueue* q = _cm->get_queue(worker_id);
- ShenandoahObjToScanQueue* old_q = _cm->get_old_queue(worker_id);
-
- ShenandoahSATBBufferClosure cl(q, old_q);
- SATBMarkQueueSet& satb_mq_set = ShenandoahBarrierSet::satb_mark_queue_set();
- while (satb_mq_set.apply_closure_to_completed_buffer(&cl)) {}
- assert(!heap->has_forwarded_objects(), "Not expected");
-
- ShenandoahFlushSATB tc(satb_mq_set);
- Threads::possibly_parallel_threads_do(true /* is_par */, &tc);
- }
- _cm->mark_loop(worker_id, _terminator, GENERATION, false /*not cancellable*/,
- _dedup_string ? ENQUEUE_DEDUP : NO_DEDUP,
- &requests);
+ _cm->mark_loop(worker_id, _terminator, GENERATION, false /*not cancellable*/);
assert(_cm->task_queues()->is_empty(), "Should be empty");
}
};
@@ -262,49 +241,54 @@ void ShenandoahConcurrentMark::finish_mark() {
}
void ShenandoahConcurrentMark::finish_mark_work() {
- // Finally mark everything else we've got in our queues during the previous steps.
- // It does two different things for concurrent vs. mark-compact GC:
- // - For concurrent GC, it starts with empty task queues, drains the remaining
- // SATB buffers, and then completes the marking closure.
- // - For mark-compact GC, it starts out with the task queues seeded by initial
- // root scan, and completes the closure, thus marking through all live objects
- // The implementation is the same, so it's shared here.
ShenandoahHeap* const heap = ShenandoahHeap::heap();
- ShenandoahGCPhase phase(ShenandoahPhaseTimings::finish_mark);
- uint nworkers = heap->workers()->active_workers();
- task_queues()->reserve(nworkers);
+ SATBMarkQueueSet& satb_mq_set = ShenandoahBarrierSet::satb_mark_queue_set();
- TaskTerminator terminator(nworkers, task_queues());
-
- switch (_generation->type()) {
- case YOUNG:{
- ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
- heap->workers()->run_task(&task);
- break;
- }
- case OLD:{
- ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
- heap->workers()->run_task(&task);
- break;
- }
- case GLOBAL:{
- ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
- heap->workers()->run_task(&task);
- break;
- }
- case NON_GEN:{
- ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
- heap->workers()->run_task(&task);
- break;
- }
- default:
- ShouldNotReachHere();
+ // First drain all remaining SATB buffers and put them to SATB MQ.
+ // Also, while we are iterating threads, mark the invisible roots.
+ {
+ ShenandoahTimingsTracker t(ShenandoahPhaseTimings::final_mark_flush_satb_roots);
+ ShenandoahInvisibleRootsMarkClosure invisible_cl;
+ ShenandoahFlushSATB flush_cl(satb_mq_set);
+ ShenandoahMultiThreadClosure mux(flush_cl, invisible_cl);
+ Threads::threads_do(&mux);
}
- if (!generation()->is_old() && heap->is_concurrent_young_mark_in_progress()) {
- // Lastly, ensure all the invisible roots are marked.
- ShenandoahInvisibleRootsMarkClosure cl;
- Threads::java_threads_do(&cl);
+
+ // There is a very high chance we have already completed the marking.
+ // But if there is outstanding work, finish it now.
+ if (!task_queues()->is_empty() || satb_mq_set.completed_buffers_num() > 0) {
+ ShenandoahGCPhase phase(ShenandoahPhaseTimings::finish_mark);
+
+ uint nworkers = heap->workers()->active_workers();
+ task_queues()->reserve(nworkers);
+ TaskTerminator terminator(nworkers, task_queues());
+
+ switch (_generation->type()) {
+ case YOUNG:{
+ ShenandoahFinalMarkingTask task(this, &terminator);
+ heap->workers()->run_task(&task);
+ break;
+ }
+ case OLD:{
+ ShenandoahFinalMarkingTask task(this, &terminator);
+ heap->workers()->run_task(&task);
+ break;
+ }
+ case GLOBAL:{
+ ShenandoahFinalMarkingTask task(this, &terminator);
+ heap->workers()->run_task(&task);
+ break;
+ }
+ case NON_GEN:{
+ ShenandoahFinalMarkingTask task(this, &terminator);
+ heap->workers()->run_task(&task);
+ break;
+ }
+ default:
+ ShouldNotReachHere();
+ }
}
assert(task_queues()->is_empty(), "Should be empty");
+ assert(satb_mq_set.completed_buffers_num() == 0, "Should be empty");
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
index 6175f15676c..f2447db8210 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
@@ -68,7 +68,7 @@ void ShenandoahControlThread::run_service() {
// Figure out if we have pending requests.
const bool alloc_failure_pending = ShenandoahCollectorPolicy::is_allocation_failure(cancelled_cause);
- const bool is_gc_requested = _gc_requested.is_set();
+ const bool is_gc_requested = _gc_requested.try_unset();
const GCCause::Cause requested_gc_cause = _requested_gc_cause;
// Choose which GC mode to run in. The block below should select a single mode.
@@ -138,11 +138,6 @@ void ShenandoahControlThread::run_service() {
heuristics->cancel_trigger_request();
- if (mode != stw_degenerated) {
- // If mode is stw_degenerated, count bytes allocated from the start of the conc GC that experienced alloc failure.
- heap->reset_bytes_allocated_since_gc_start();
- }
-
MetaspaceCombinedStats meta_sizes = MetaspaceUtils::get_combined_statistics();
// If GC was requested, we are sampling the counters even without actual triggers
@@ -406,7 +401,6 @@ void ShenandoahControlThread::handle_requested_gc(GCCause::Cause cause) {
}
void ShenandoahControlThread::notify_gc_waiters() {
- _gc_requested.unset();
MonitorLocker ml(&_gc_waiters_lock);
ml.notify_all();
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.cpp
new file mode 100644
index 00000000000..86d0a08bafe
--- /dev/null
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "gc/shenandoah/shenandoahCycleDuration.hpp"
+#include "logging/log.hpp"
+#include "logging/logTag.hpp"
+#include "runtime/mutexLocker.hpp"
+
+#include
+
+
+ShenandoahCycleDuration::ShenandoahCycleDuration(uint size)
+ : _gc_times_lock(Mutex::nosafepoint - 2, "ShenandoahCycleTimes_lock", true)
+ , _gc_times(size) {}
+
+void ShenandoahCycleDuration::record_duration(double timestamp_at_start, double duration) {
+ log_debug(gc, sampling)("Cycle started at: %.3f, completed in %.3fs", timestamp_at_start, duration);
+ MonitorLocker locker(&_gc_times_lock, Mutex::_no_safepoint_check_flag);
+ _gc_times.add(timestamp_at_start, duration);
+}
+
+double ShenandoahCycleDuration::predict_duration(double timestamp_at_start, double margin_of_error) {
+ MonitorLocker locker(&_gc_times_lock, Mutex::_no_safepoint_check_flag);
+
+ const double prediction = _gc_times.predict_y(timestamp_at_start);
+ if (std::isfinite(prediction) && prediction > 0.0) {
+ return prediction + _gc_times.residual_sd() * margin_of_error;
+ }
+
+ // return average time, rather than negative or zero time
+ return _gc_times.average() + _gc_times.sd() * margin_of_error;
+}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.hpp
new file mode 100644
index 00000000000..8bfcc7c3748
--- /dev/null
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHCYCLEDURATION_HPP
+#define SHARE_GC_SHENANDOAH_SHENANDOAHCYCLEDURATION_HPP
+
+#include "gc/shenandoah/shenandoahWeightedSeq.hpp"
+#include "runtime/mutex.hpp"
+
+class ShenandoahCycleDuration {
+ // To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization,
+ // for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history
+ // allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor.
+ static constexpr uint GC_TIME_SAMPLE_SIZE = 15;
+
+ // Written by control thread, read by regulator thread
+ Monitor _gc_times_lock;
+ ShenandoahWeightedSeq _gc_times;
+
+public:
+ explicit ShenandoahCycleDuration(uint size = GC_TIME_SAMPLE_SIZE);
+ void record_duration(double timestamp_at_start, double duration);
+ double predict_duration(double timestamp_at_start, double margin_of_error);
+};
+
+#endif // SHARE_GC_SHENANDOAH_SHENANDOAHCYCLEDURATION_HPP
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp
index 7be3141a4fa..b2d578c599d 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp
@@ -347,7 +347,7 @@ void ShenandoahDegenGC::op_reset() {
void ShenandoahDegenGC::op_mark() {
assert(!_generation->is_concurrent_mark_in_progress(), "Should be reset");
- ShenandoahGCPhase phase(ShenandoahPhaseTimings::degen_gc_stw_mark);
+ ShenandoahGCPhase phase(ShenandoahPhaseTimings::degen_gc_mark);
ShenandoahSTWMark mark(_generation, false /*full gc*/);
mark.mark();
}
@@ -410,7 +410,7 @@ void ShenandoahDegenGC::op_cleanup_early() {
}
void ShenandoahDegenGC::op_evacuate() {
- ShenandoahGCPhase phase(ShenandoahPhaseTimings::degen_gc_stw_evac);
+ ShenandoahGCPhase phase(ShenandoahPhaseTimings::degen_gc_evac);
ShenandoahHeap::heap()->evacuate_collection_set(_generation, false /* concurrent*/);
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
index fdce385e0f6..4c474f2f86c 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
@@ -289,32 +289,6 @@ void ShenandoahFreeSet::resize_old_collector_capacity(size_t regions) {
// else, old generation is already appropriately sized
}
-
-void ShenandoahFreeSet::reset_bytes_allocated_since_gc_start(size_t initial_bytes_allocated) {
- shenandoah_assert_heaplocked();
- // Future inquiries of get_total_bytes_allocated() will return the sum of
- // _total_bytes_previously_allocated and _mutator_bytes_allocated_since_gc_start.
- // Since _mutator_bytes_allocated_since_gc_start does not start at zero, we subtract initial_bytes_allocated so as
- // to not double count these allocated bytes.
- size_t original_mutator_bytes_allocated_since_gc_start = _mutator_bytes_allocated_since_gc_start;
-
- // Setting _mutator_bytes_allocated_since_gc_start before _total_bytes_previously_allocated reduces the damage
- // in the case that the control or regulator thread queries get_bytes_allocated_since_previous_sample() between
- // the two assignments.
- //
- // These are not declared as volatile so the compiler or hardware may reorder the assignments. The implementation of
- // get_bytes_allocated_since_previous_cycle() is robust to this possibility, as are triggering heuristics. The current
- // implementation assumes we are better off to tolerate the very rare race rather than impose a synchronization penalty
- // on every update and fetch. (Perhaps it would be better to make the opposite tradeoff for improved maintainability.)
- _mutator_bytes_allocated_since_gc_start = initial_bytes_allocated;
- _total_bytes_previously_allocated += original_mutator_bytes_allocated_since_gc_start - initial_bytes_allocated;
-}
-
-void ShenandoahFreeSet::increase_bytes_allocated(size_t bytes) {
- shenandoah_assert_heaplocked();
- _mutator_bytes_allocated_since_gc_start += bytes;
-}
-
inline idx_t ShenandoahRegionPartitions::leftmost(ShenandoahFreeSetPartitionId which_partition) const {
assert (which_partition < NumPartitions, "selected free partition must be valid");
idx_t idx = _leftmosts[int(which_partition)];
@@ -1229,8 +1203,6 @@ inline void ShenandoahRegionPartitions::assert_bounds_sanity() {
ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) :
_heap(heap),
_partitions(max_regions, this),
- _total_bytes_previously_allocated(0),
- _mutator_bytes_at_last_sample(0),
_total_humongous_waste(0),
_alloc_bias_weight(0),
_total_young_used(0),
@@ -1242,8 +1214,7 @@ ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) :
_young_unaffiliated_regions(0),
_global_unaffiliated_regions(0),
_total_young_regions(0),
- _total_global_regions(0),
- _mutator_bytes_allocated_since_gc_start(0)
+ _total_global_regions(0)
{
clear_internal();
}
@@ -1507,55 +1478,6 @@ HeapWord* ShenandoahFreeSet::try_allocate_from_mutator(ShenandoahAllocRequest& r
return nullptr;
}
-// This work method takes an argument corresponding to the number of bytes
-// free in a region, and returns the largest amount in heapwords that can be allocated
-// such that both of the following conditions are satisfied:
-//
-// 1. it is a multiple of card size
-// 2. any remaining shard may be filled with a filler object
-//
-// The idea is that the allocation starts and ends at card boundaries. Because
-// a region ('s end) is card-aligned, the remainder shard that must be filled is
-// at the start of the free space.
-//
-// This is merely a helper method to use for the purpose of such a calculation.
-size_t ShenandoahFreeSet::get_usable_free_words(size_t free_bytes) const {
- // e.g. card_size is 512, card_shift is 9, min_fill_size() is 8
- // free is 514
- // usable_free is 512, which is decreased to 0
- size_t usable_free = (free_bytes / CardTable::card_size()) << CardTable::card_shift();
- assert(usable_free <= free_bytes, "Sanity check");
- if ((free_bytes != usable_free) && (free_bytes - usable_free < ShenandoahHeap::min_fill_size() * HeapWordSize)) {
- // After aligning to card multiples, the remainder would be smaller than
- // the minimum filler object, so we'll need to take away another card's
- // worth to construct a filler object.
- if (usable_free >= CardTable::card_size()) {
- usable_free -= CardTable::card_size();
- } else {
- assert(usable_free == 0, "usable_free is a multiple of card_size and card_size > min_fill_size");
- }
- }
-
- return usable_free / HeapWordSize;
-}
-
-// Given a size argument, which is a multiple of card size, a request struct
-// for a PLAB, and an old region, return a pointer to the allocated space for
-// a PLAB which is card-aligned and where any remaining shard in the region
-// has been suitably filled by a filler object.
-// It is assumed (and assertion-checked) that such an allocation is always possible.
-HeapWord* ShenandoahFreeSet::allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r) {
- assert(_heap->mode()->is_generational(), "PLABs are only for generational mode");
- assert(r->is_old(), "All PLABs reside in old-gen");
- assert(!req.is_mutator_alloc(), "PLABs should not be allocated by mutators.");
- assert(is_aligned(size, CardTable::card_size_in_words()), "Align by design");
-
- HeapWord* result = r->allocate_aligned(size, req, CardTable::card_size());
- assert(result != nullptr, "Allocation cannot fail");
- assert(r->top() <= r->end(), "Allocation cannot span end of region");
- assert(is_aligned(result, CardTable::card_size_in_words()), "Align by design");
- return result;
-}
HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, ShenandoahAllocRequest& req, bool& in_new_region) {
assert (has_alloc_capacity(r), "Performance: should avoid full regions on this path: %zu", r->index());
@@ -1607,44 +1529,17 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
// req.size() is in words, r->free() is in bytes.
if (req.is_lab_alloc()) {
size_t adjusted_size = req.size();
- size_t free = r->free(); // free represents bytes available within region r
- if (req.is_old()) {
- // This is a PLAB allocation(lab alloc in old gen)
- assert(_heap->mode()->is_generational(), "PLABs are only for generational mode");
- assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::OldCollector, r->index()),
- "PLABS must be allocated in old_collector_free regions");
-
- // Need to assure that plabs are aligned on multiple of card region
- // Convert free from unaligned bytes to aligned number of words
- size_t usable_free = get_usable_free_words(free);
- if (adjusted_size > usable_free) {
- adjusted_size = usable_free;
- }
- adjusted_size = align_down(adjusted_size, CardTable::card_size_in_words());
- if (adjusted_size >= req.min_size()) {
- result = allocate_aligned_plab(adjusted_size, req, r);
- assert(result != nullptr, "allocate must succeed");
- req.set_actual_size(adjusted_size);
- } else {
- // Otherwise, leave result == nullptr because the adjusted size is smaller than min size.
- log_trace(gc, free)("Failed to shrink PLAB request (%zu) in region %zu to %zu"
- " because min_size() is %zu", req.size(), r->index(), adjusted_size, req.min_size());
- }
+ size_t free = align_down(r->free() >> LogHeapWordSize, MinObjAlignment);
+ if (adjusted_size > free) {
+ adjusted_size = free;
+ }
+ if (adjusted_size >= req.min_size()) {
+ result = r->allocate(adjusted_size, req);
+ assert (result != nullptr, "Allocation must succeed: free %zu, actual %zu", free, adjusted_size);
+ req.set_actual_size(adjusted_size);
} else {
- // This is a GCLAB or a TLAB allocation
- // Convert free from unaligned bytes to aligned number of words
- free = align_down(free >> LogHeapWordSize, MinObjAlignment);
- if (adjusted_size > free) {
- adjusted_size = free;
- }
- if (adjusted_size >= req.min_size()) {
- result = r->allocate(adjusted_size, req);
- assert (result != nullptr, "Allocation must succeed: free %zu, actual %zu", free, adjusted_size);
- req.set_actual_size(adjusted_size);
- } else {
- log_trace(gc, free)("Failed to shrink TLAB or GCLAB request (%zu) in region %zu to %zu"
- " because min_size() is %zu", req.size(), r->index(), adjusted_size, req.min_size());
- }
+ log_trace(gc, free)("Failed to shrink LAB request (%zu) in region %zu to %zu"
+ " because min_size() is %zu", req.size(), r->index(), adjusted_size, req.min_size());
}
} else {
size_t size = req.size();
@@ -1660,7 +1555,6 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
if (req.is_mutator_alloc()) {
assert(req.is_young(), "Mutator allocations always come from young generation.");
_partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize);
- increase_bytes_allocated(req.actual_size() * HeapWordSize);
} else {
assert(req.is_gc_alloc(), "Should be gc_alloc since req wasn't mutator alloc");
@@ -1699,7 +1593,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
size_t waste_bytes = _partitions.retire_from_partition(orig_partition, idx, r->used());
DEBUG_ONLY(boundary_changed = true;)
if (req.is_mutator_alloc() && (waste_bytes > 0)) {
- increase_bytes_allocated(waste_bytes);
+ req.set_waste(waste_bytes / HeapWordSize);
}
}
@@ -1871,15 +1765,9 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req, bo
}
}
_partitions.decrease_empty_region_counts(ShenandoahFreeSetPartitionId::Mutator, num);
- if (waste_bytes > 0) {
- // For humongous allocations, waste_bytes are included in total_used. Since this is not humongous,
- // we need to account separately for the waste_bytes.
- increase_bytes_allocated(waste_bytes);
- }
}
_partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, total_used);
- increase_bytes_allocated(total_used);
req.set_actual_size(words_size);
// If !is_humongous, the "waste" is made availabe for new allocation
if (waste_bytes > 0) {
@@ -3053,25 +2941,27 @@ void ShenandoahFreeSet::log_status_under_lock() {
}
void ShenandoahFreeSet::log_freeset_stats(ShenandoahFreeSetPartitionId partition_id, LogStream& ls) {
- size_t max = 0;
- size_t total_free = 0;
- size_t total_used = 0;
+ size_t max_free_in_single_region = 0;
+ size_t freeset_free = 0;
+ size_t freeset_total_used = 0;
for (idx_t idx = _partitions.leftmost(partition_id);
idx <= _partitions.rightmost(partition_id); idx++) {
if (_partitions.in_free_set(partition_id, idx)) {
ShenandoahHeapRegion *r = _heap->get_region(idx);
size_t free = alloc_capacity(r);
- max = MAX2(max, free);
- total_free += free;
- total_used += r->used();
+ max_free_in_single_region = MAX2(max_free_in_single_region, free);
+ freeset_free += free;
+ freeset_total_used += ShenandoahHeapRegion::region_size_bytes() - free;
}
}
- ls.print(" %s freeset stats: Partition count: %zu, Reserved: " PROPERFMT ", Max free available in a single region: " PROPERFMT ";",
- partition_name(partition_id),
- _partitions.count(partition_id),
- PROPERFMTARGS(total_free), PROPERFMTARGS(max)
+ ls.print_cr(" %s partition stats: regions in capacity: %zu, regions in freeset: %zu. "
+ "Used size including retired regions: " PROPERFMT ", used size in freeset: " PROPERFMT
+ ". Free available size: " PROPERFMT ". Max free available in a single region: " PROPERFMT ".",
+ partition_name(partition_id), _partitions.get_capacity_region_count(partition_id), _partitions.count(partition_id),
+ PROPERFMTARGS(_partitions.get_used(partition_id)), PROPERFMTARGS(freeset_total_used),
+ PROPERFMTARGS(freeset_free), PROPERFMTARGS(max_free_in_single_region)
);
}
@@ -3153,12 +3043,10 @@ void ShenandoahFreeSet::log_status() {
{
idx_t last_idx = 0;
- size_t max = 0;
size_t max_contig = 0;
size_t empty_contig = 0;
- size_t total_used = 0;
- size_t total_free = 0;
+ size_t total_used_in_freeset = 0;
size_t total_free_ext = 0;
for (idx_t idx = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator);
@@ -3166,7 +3054,6 @@ void ShenandoahFreeSet::log_status() {
if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, idx)) {
ShenandoahHeapRegion *r = _heap->get_region(idx);
size_t free = alloc_capacity(r);
- max = MAX2(max, free);
size_t used_in_region = r->used();
if (r->is_empty_or_trash()) {
used_in_region = 0;
@@ -3179,42 +3066,35 @@ void ShenandoahFreeSet::log_status() {
} else {
empty_contig = 0;
}
- total_used += used_in_region;
- total_free += free;
+ total_used_in_freeset += used_in_region;
max_contig = MAX2(max_contig, empty_contig);
last_idx = idx;
}
}
size_t max_humongous = max_contig * ShenandoahHeapRegion::region_size_bytes();
- // capacity() is capacity of mutator
- // used() is used of mutator
- size_t free = capacity_holding_lock() - used_holding_lock();
- // Since certain regions that belonged to the Mutator free partition at the time of most recent rebuild may have been
- // retired, the sum of used and capacities within regions that are still in the Mutator free partition may not match
- // my internally tracked values of used() and free().
- assert(free == total_free, "Free memory (%zu) should match calculated memory (%zu)", free, total_free);
- ls.print("Whole heap stats: Total free: " PROPERFMT ", Total used: " PROPERFMT ", Max free in a single region: " PROPERFMT
- ", Max humongous: " PROPERFMT "; ",
- PROPERFMTARGS(total_free), PROPERFMTARGS(total_used), PROPERFMTARGS(max), PROPERFMTARGS(max_humongous));
- ls.print("Frag stats: ");
- size_t frag_ext;
+ size_t total_free = available_locked() + collector_available_locked();
+ total_free += old_collector_available_locked();
+ ls.print("Whole heap stats: Total free: " PROPERFMT ", Total used: " PROPERFMT
+ ", Max humongous allocatable: " PROPERFMT "; ",
+ PROPERFMTARGS(total_free), PROPERFMTARGS(global_used()), PROPERFMTARGS(max_humongous));
+
+ double frag_ext;
if (total_free_ext > 0) {
- frag_ext = 100 - (100 * max_humongous / total_free_ext);
+ frag_ext = 100 - (100.0 * max_humongous / total_free_ext);
} else {
frag_ext = 0;
}
- ls.print("External: %zu%%, ", frag_ext);
+ ls.print("External fragmentation: %.2f%%; ", frag_ext);
- size_t frag_int;
- if (_partitions.count(ShenandoahFreeSetPartitionId::Mutator) > 0) {
- frag_int = (100 * (total_used / _partitions.count(ShenandoahFreeSetPartitionId::Mutator))
- / ShenandoahHeapRegion::region_size_bytes());
- } else {
- frag_int = 0;
+ double mutator_filling_percentage = 0;
+ size_t mutator_partition = _partitions.count(ShenandoahFreeSetPartitionId::Mutator);
+ if (mutator_partition > 0) {
+ mutator_filling_percentage = 100 * (1.0 * total_used_in_freeset / mutator_partition)
+ / ShenandoahHeapRegion::region_size_bytes();
}
- ls.print("Internal: %zu%%; ", frag_int);
+ ls.print_cr("Mutator freeset filling percentage: %.2f%%", mutator_filling_percentage);
}
log_freeset_stats(ShenandoahFreeSetPartitionId::Mutator, ls);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp
index f7ba1f05f47..7481b81c9c6 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp
@@ -302,11 +302,15 @@ public:
inline void increase_capacity(ShenandoahFreeSetPartitionId which_partition, size_t bytes);
inline void decrease_capacity(ShenandoahFreeSetPartitionId which_partition, size_t bytes);
- inline size_t get_capacity(ShenandoahFreeSetPartitionId which_partition) {
+ inline size_t get_capacity(ShenandoahFreeSetPartitionId which_partition) const {
assert (which_partition < NumPartitions, "Partition must be valid");
return _capacity[int(which_partition)];
}
+ inline size_t get_capacity_region_count(ShenandoahFreeSetPartitionId which_partition) const {
+ return get_capacity(which_partition) / ShenandoahHeapRegion::region_size_bytes();
+ }
+
inline void increase_available(ShenandoahFreeSetPartitionId which_partition, size_t bytes);
inline void decrease_available(ShenandoahFreeSetPartitionId which_partition, size_t bytes);
inline size_t get_available(ShenandoahFreeSetPartitionId which_partition);
@@ -437,9 +441,6 @@ private:
ShenandoahHeap* const _heap;
ShenandoahRegionPartitions _partitions;
- size_t _total_bytes_previously_allocated;
- size_t _mutator_bytes_at_last_sample;
-
// Temporarily holds mutator_Free allocatable bytes between prepare_to_rebuild() and finish_rebuild()
size_t _prepare_to_rebuild_mutator_free;
@@ -452,7 +453,6 @@ private:
// locks will acquire them in the same order: first the global heap lock and then the rebuild lock.
ShenandoahRebuildLock _rebuild_lock;
- HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r);
size_t _total_humongous_waste;
@@ -515,8 +515,6 @@ private:
size_t _total_young_regions;
size_t _total_global_regions;
- size_t _mutator_bytes_allocated_since_gc_start;
-
// If only affiliation changes are promote-in-place and generation sizes have not changed,
// we have AffiliatedChangesAreGlobalNeutral
// If only affiliation changes are non-empty regions moved from Mutator to Collector and young size has not changed,
@@ -640,7 +638,6 @@ private:
// Determine whether we prefer to allocate from left to right or from right to left within the OldCollector free-set.
void establish_old_collector_alloc_bias();
- size_t get_usable_free_words(size_t free_bytes) const;
void reduce_young_reserve(size_t adjusted_young_reserve, size_t requested_young_reserve);
void reduce_old_reserve(size_t adjusted_old_reserve, size_t requested_old_reserve);
@@ -664,37 +661,6 @@ public:
return _partitions.shrink_interval_if_range_modifies_either_boundary(partition, low_idx, high_idx, num_regions);
}
- void reset_bytes_allocated_since_gc_start(size_t initial_bytes_allocated);
-
- void increase_bytes_allocated(size_t bytes);
-
- // Return an approximation of the bytes allocated since GC start. The value returned is monotonically non-decreasing
- // in time within each GC cycle. For certain GC cycles, the value returned may include some bytes allocated before
- // the start of the current GC cycle.
- inline size_t get_bytes_allocated_since_gc_start() const {
- return _mutator_bytes_allocated_since_gc_start;
- }
-
- inline size_t get_total_bytes_allocated() {
- return _mutator_bytes_allocated_since_gc_start + _total_bytes_previously_allocated;
- }
-
- inline size_t get_bytes_allocated_since_previous_sample() {
- const size_t total_bytes_allocated = get_total_bytes_allocated();
- // total_bytes_allocated could overflow (wraps around) size_t in rare condition, we are relying on
- // wrap-around arithmetic of size_t type to produce meaningful result when total_bytes_allocated overflows
- // its 64-bit counter. The expression below is equivalent to code:
- // if (total_bytes < _mutator_bytes_at_last_sample) {
- // // overflow
- // return total_bytes + (SIZE_T_MAX - _mutator_bytes_at_last_sample) + 1;
- // } else {
- // return total_bytes - _mutator_bytes_at_last_sample;
- // }
- const size_t result = total_bytes_allocated - _mutator_bytes_at_last_sample;
- _mutator_bytes_at_last_sample = total_bytes_allocated;
- return result;
- }
-
// Public because ShenandoahRegionPartitions assertions require access.
inline size_t alloc_capacity(ShenandoahHeapRegion *r) const;
inline size_t alloc_capacity(size_t idx) const;
@@ -760,7 +726,7 @@ public:
}
inline size_t total_old_regions() {
- return _partitions.get_capacity(ShenandoahFreeSetPartitionId::OldCollector) / ShenandoahHeapRegion::region_size_bytes();
+ return _partitions.get_capacity_region_count(ShenandoahFreeSetPartitionId::OldCollector);
}
size_t total_global_regions() {
@@ -853,6 +819,10 @@ public:
return _partitions.available_in(ShenandoahFreeSetPartitionId::Collector);
}
+ inline size_t old_collector_available_locked() const {
+ return _partitions.available_in(ShenandoahFreeSetPartitionId::OldCollector);
+ }
+
inline size_t total_humongous_waste() const { return _total_humongous_waste; }
inline size_t humongous_waste_in_mutator() const {
return _partitions.humongous_waste(ShenandoahFreeSetPartitionId::Mutator);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
index 365226a054c..cd04db383ed 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2014, 2021, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -855,15 +855,15 @@ void ShenandoahFullGC::phase3_update_references() {
WorkerThreads* workers = heap->workers();
uint nworkers = workers->active_workers();
{
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::clear();
-#endif
+#endif // COMPILER2
ShenandoahRootAdjuster rp(nworkers, ShenandoahPhaseTimings::full_gc_adjust_roots);
ShenandoahAdjustRootPointersTask task(&rp, _preserved_marks);
workers->run_task(&task);
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::update_pointers();
-#endif
+#endif // COMPILER2
}
ShenandoahAdjustPointersTask adjust_pointers_task;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGC.cpp
index 5a1e8b67694..6c6207e1618 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGC.cpp
@@ -90,9 +90,9 @@ void ShenandoahGC::update_roots(bool full_gc) {
ShenandoahPhaseTimings::degen_gc_update_roots;
ShenandoahGCPhase phase(p);
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::clear();
-#endif
+#endif // COMPILER2
ShenandoahHeap* const heap = ShenandoahHeap::heap();
WorkerThreads* workers = heap->workers();
@@ -102,7 +102,7 @@ void ShenandoahGC::update_roots(bool full_gc) {
ShenandoahUpdateRootsTask update_roots(&root_updater, check_alive);
workers->run_task(&update_roots);
-#if COMPILER2_OR_JVMCI
+#ifdef COMPILER2
DerivedPointerTable::update_pointers();
-#endif
+#endif // COMPILER2
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
index bc2028d077d..94f3409ac41 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
@@ -256,11 +256,6 @@ void ShenandoahGenerationalControlThread::run_gc_cycle(const ShenandoahGCRequest
GCIdMark gc_id_mark;
- if ((gc_mode() != servicing_old) && (gc_mode() != stw_degenerated)) {
- // If mode is stw_degenerated, count bytes allocated from the start of the conc GC that experienced alloc failure.
- _heap->reset_bytes_allocated_since_gc_start();
- }
-
MetaspaceCombinedStats meta_sizes = MetaspaceUtils::get_combined_statistics();
// If GC was requested, we are sampling the counters even without actual triggers
@@ -274,7 +269,7 @@ void ShenandoahGenerationalControlThread::run_gc_cycle(const ShenandoahGCRequest
// Cannot uncommit bitmap slices during concurrent reset
ShenandoahNoUncommitMark forbid_region_uncommit(_heap);
- // When a whitebox full GC is requested, set the tenuring threshold to zero
+ // When a whitebox full GC is requested, set the age census to always tenure
// so that all young objects are promoted to old. This ensures that tests
// using WB.fullGC() to promote objects to old gen will not loop forever.
ShenandoahTenuringOverride tenuring_override(request.cause == GCCause::_wb_full_gc,
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
index f203a8d1238..4af2c9b1e5d 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp
@@ -65,12 +65,11 @@ protected:
};
size_t ShenandoahGenerationalHeap::calculate_min_plab() {
- return align_up(PLAB::min_size(), CardTable::card_size_in_words());
+ return PLAB::min_size();
}
size_t ShenandoahGenerationalHeap::calculate_max_plab() {
- size_t MaxTLABSizeWords = ShenandoahHeapRegion::max_tlab_size_words();
- return align_down(MaxTLABSizeWords, CardTable::card_size_in_words());
+ return ShenandoahHeapRegion::max_tlab_size_words();
}
// Returns size in bytes
@@ -86,8 +85,6 @@ ShenandoahGenerationalHeap::ShenandoahGenerationalHeap(ShenandoahCollectorPolicy
_regulator_thread(nullptr),
_young_gen_memory_pool(nullptr),
_old_gen_memory_pool(nullptr) {
- assert(is_aligned(_min_plab_size, CardTable::card_size_in_words()), "min_plab_size must be aligned");
- assert(is_aligned(_max_plab_size, CardTable::card_size_in_words()), "max_plab_size must be aligned");
}
void ShenandoahGenerationalHeap::initialize_generations() {
@@ -672,7 +669,7 @@ void ShenandoahGenerationalHeap::coalesce_and_fill_old_regions(bool concurrent)
void work(uint worker_id) override {
ShenandoahWorkerTimingsTracker timer(_phase,
- ShenandoahPhaseTimings::ScanClusters,
+ ShenandoahPhaseTimings::Work,
worker_id, true);
ShenandoahHeapRegion* region;
while ((region = _regions.next()) != nullptr) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp
index a072fe2db06..a23812227ba 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp
@@ -28,8 +28,9 @@
#include "gc/shenandoah/shenandoahGlobalGeneration.hpp"
#include "gc/shenandoah/shenandoahHeap.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
+#include "gc/shenandoah/shenandoahOldGeneration.hpp"
#include "gc/shenandoah/shenandoahUtils.hpp"
-#include "gc/shenandoah/shenandoahVerifier.hpp"
+#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
const char* ShenandoahGlobalGeneration::name() const {
@@ -49,10 +50,6 @@ size_t ShenandoahGlobalGeneration::used() const {
return _free_set->global_used();
}
-size_t ShenandoahGlobalGeneration::bytes_allocated_since_gc_start() const {
- return _free_set->get_bytes_allocated_since_gc_start();
-}
-
size_t ShenandoahGlobalGeneration::get_affiliated_region_count() const {
return _free_set->global_affiliated_regions();
}
@@ -61,7 +58,6 @@ size_t ShenandoahGlobalGeneration::get_humongous_waste() const {
return _free_set->total_humongous_waste();
}
-
size_t ShenandoahGlobalGeneration::used_regions() const {
return _free_set->global_affiliated_regions();
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp
index 9f9e4818a95..8b323d6ee47 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp
@@ -26,8 +26,6 @@
#define SHARE_VM_GC_SHENANDOAH_SHENANDOAHGLOBALGENERATION_HPP
#include "gc/shenandoah/shenandoahGeneration.hpp"
-#include "gc/shenandoah/shenandoahOldGeneration.hpp"
-#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
// A "generation" that represents the whole heap.
class ShenandoahGlobalGeneration : public ShenandoahGeneration {
@@ -46,7 +44,6 @@ public:
public:
const char* name() const override;
- size_t bytes_allocated_since_gc_start() const override;
size_t used() const override;
size_t used_regions() const override;
size_t used_regions_size() const override;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
index 2bedc53e24b..c0df4bbe10c 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2022, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -42,6 +42,7 @@
#include "gc/shenandoah/mode/shenandoahGenerationalMode.hpp"
#include "gc/shenandoah/mode/shenandoahPassiveMode.hpp"
#include "gc/shenandoah/mode/shenandoahSATBMode.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.inline.hpp"
#include "gc/shenandoah/shenandoahAllocRequest.hpp"
#include "gc/shenandoah/shenandoahBarrierSet.hpp"
#include "gc/shenandoah/shenandoahClosures.inline.hpp"
@@ -100,9 +101,6 @@
#include "utilities/events.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/powerOfTwo.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmci.hpp"
-#endif
#if INCLUDE_JFR
#include "gc/shenandoah/shenandoahJfrSupport.hpp"
#endif
@@ -557,6 +555,7 @@ ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) :
_active_generation(nullptr),
_initial_size(0),
_committed(0),
+ _alloc_rate_decay(&_alloc_rate),
_max_workers(MAX3(ConcGCThreads, ParallelGCThreads, 1U)),
_workers(nullptr),
_safepoint_workers(nullptr),
@@ -701,6 +700,11 @@ void ShenandoahHeap::post_initialize() {
// Schedule periodic task to report on gc thread CPU utilization
_mmu_tracker.initialize();
+ // Periodically decay allocation rate to compensate for not being updated when allocation rate
+ // is low. Heuristics are evaluated unconditionally from a dedicated thread so it will continue
+ // to see the last (possibly stale) allocation rate if the allocation rate is low.
+ _alloc_rate_decay.enroll();
+
MutexLocker ml(Threads_lock);
ShenandoahInitWorkerGCLABClosure init_gclabs;
@@ -822,8 +826,7 @@ bool ShenandoahHeap::check_soft_max_changed() {
size_t new_soft_max = AtomicAccess::load(&SoftMaxHeapSize);
size_t old_soft_max = soft_max_capacity();
if (new_soft_max != old_soft_max) {
- new_soft_max = MAX2(min_capacity(), new_soft_max);
- new_soft_max = MIN2(max_capacity(), new_soft_max);
+ new_soft_max = clamp(new_soft_max, min_capacity(), max_capacity());
if (new_soft_max != old_soft_max) {
log_info(gc)("Soft Max Heap Size: %zu%s -> %zu%s",
byte_size_in_proper_unit(old_soft_max), proper_unit_for_byte_size(old_soft_max),
@@ -1032,33 +1035,15 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
// memory.
HeapWord* result = _free_set->allocate(req, in_new_region);
- // Record the plab configuration for this result and register the object.
- if (result != nullptr && req.is_old()) {
- if (req.is_lab_alloc()) {
- old_generation()->configure_plab_for_current_thread(req);
- } else {
- // Register the newly allocated object while we're holding the global lock since there's no synchronization
- // built in to the implementation of register_object(). There are potential races when multiple independent
- // threads are allocating objects, some of which might span the same card region. For example, consider
- // a card table's memory region within which three objects are being allocated by three different threads:
- //
- // objects being "concurrently" allocated:
- // [-----a------][-----b-----][--------------c------------------]
- // [---- card table memory range --------------]
- //
- // Before any objects are allocated, this card's memory range holds no objects. Note that allocation of object a
- // wants to set the starts-object, first-start, and last-start attributes of the preceding card region.
- // Allocation of object b wants to set the starts-object, first-start, and last-start attributes of this card region.
- // Allocation of object c also wants to set the starts-object, first-start, and last-start attributes of this
- // card region.
- //
- // The thread allocating b and the thread allocating c can "race" in various ways, resulting in confusion, such as
- // last-start representing object b while first-start represents object c. This is why we need to require all
- // register_object() invocations to be "mutually exclusive" with respect to each card's memory range.
- old_generation()->card_scan()->register_object(result);
+ if (result != nullptr) {
+ if (req.is_mutator_alloc()) {
+ _alloc_rate.allocated((req.actual_size() + req.waste()) * HeapWordSize);
+ }
- if (req.is_promotion()) {
- // Shared promotion.
+ if (req.is_old()) {
+ if (req.is_lab_alloc()) {
+ old_generation()->configure_plab_for_current_thread(req);
+ } else if (req.is_promotion()) {
const size_t actual_size = req.actual_size() * HeapWordSize;
log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
old_generation()->expend_promoted(actual_size);
@@ -1148,10 +1133,12 @@ public:
void work(uint worker_id) {
if (_concurrent) {
+ ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::conc_evac, ShenandoahPhaseTimings::Work, worker_id, true);
ShenandoahConcurrentWorkerSession worker_session(worker_id);
SuspendibleThreadSetJoiner stsj;
do_work();
} else {
+ ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::degen_gc_evac, ShenandoahPhaseTimings::Work, worker_id, true);
ShenandoahParallelWorkerSession worker_session(worker_id);
do_work();
}
@@ -2309,10 +2296,13 @@ void ShenandoahHeap::stop() {
// Step 1. Stop reporting on gc thread cpu utilization
mmu_tracker()->stop();
- // Step 2. Wait until GC worker exits normally (this will cancel any ongoing GC).
+ // Step 2. Stop decaying allocation rate.
+ _alloc_rate_decay.disenroll();
+
+ // Step 3. Wait until GC worker exits normally (this will cancel any ongoing GC).
control_thread()->stop();
- // Stop 4. Shutdown uncommit thread.
+ // Step 4. Shutdown uncommit thread.
if (_uncommit_thread != nullptr) {
_uncommit_thread->stop();
}
@@ -2336,9 +2326,6 @@ void ShenandoahHeap::stw_unload_classes(bool full_gc) {
ShenandoahGCWorkerPhase worker_phase(phase);
bool unloading_occurred = SystemDictionary::do_unloading(gc_timer());
- // Clean JVMCI metadata handles.
- JVMCI_ONLY(JVMCI::do_unloading(unloading_occurred));
-
ShenandoahClassUnloadingTask unlink_task(phase, unloading_occurred);
_workers->run_task(&unlink_task);
}
@@ -2423,27 +2410,6 @@ address ShenandoahHeap::in_cset_fast_test_addr() {
return (address) heap->collection_set()->biased_map_address();
}
-void ShenandoahHeap::reset_bytes_allocated_since_gc_start() {
- // It is important to force_alloc_rate_sample() before the associated generation's bytes_allocated has been reset.
- // Note that we obtain heap lock to prevent additional allocations between sampling bytes_allocated_since_gc_start()
- // and reset_bytes_allocated_since_gc_start()
- {
- ShenandoahHeapLocker locker(lock());
- // unaccounted_bytes is the bytes not accounted for by our forced sample. If the sample interval is too short,
- // the "forced sample" will not happen, and any recently allocated bytes are "unaccounted for". We pretend these
- // bytes are allocated after the start of subsequent gc.
- size_t unaccounted_bytes;
- size_t bytes_allocated = _free_set->get_bytes_allocated_since_gc_start();
- if (mode()->is_generational()) {
- unaccounted_bytes = young_generation()->heuristics()->force_alloc_rate_sample(bytes_allocated);
- } else {
- // Single-gen Shenandoah uses global heuristics.
- unaccounted_bytes = heuristics()->force_alloc_rate_sample(bytes_allocated);
- }
- _free_set->reset_bytes_allocated_since_gc_start(unaccounted_bytes);
- }
-}
-
void ShenandoahHeap::set_degenerated_gc_in_progress(bool in_progress) {
_degenerated_gc_in_progress.set_cond(in_progress);
}
@@ -2575,10 +2541,12 @@ public:
void work(uint worker_id) {
if (CONCURRENT) {
+ ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::conc_update_refs, ShenandoahPhaseTimings::Work, worker_id, true);
ShenandoahConcurrentWorkerSession worker_session(worker_id);
SuspendibleThreadSetJoiner stsj;
do_work(worker_id);
} else {
+ ShenandoahWorkerTimingsTracker timer(ShenandoahPhaseTimings::degen_gc_update_refs, ShenandoahPhaseTimings::Work, worker_id, true);
ShenandoahParallelWorkerSession worker_session(worker_id);
do_work(worker_id);
}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
index fa435eaa1be..ad8cd220968 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@@ -30,6 +30,7 @@
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/markBitMap.hpp"
#include "gc/shenandoah/mode/shenandoahMode.hpp"
+#include "gc/shenandoah/shenandoahAllocRate.hpp"
#include "gc/shenandoah/shenandoahAllocRequest.hpp"
#include "gc/shenandoah/shenandoahAsserts.hpp"
#include "gc/shenandoah/shenandoahController.hpp"
@@ -227,12 +228,13 @@ private:
Atomic _committed;
shenandoah_padding(1);
+ ShenandoahAllocationRate _alloc_rate;
+ ShenandoahDecayAllocRate _alloc_rate_decay;
+
public:
void increase_committed(size_t bytes);
void decrease_committed(size_t bytes);
- void reset_bytes_allocated_since_gc_start();
-
size_t min_capacity() const;
size_t max_capacity() const override;
size_t soft_max_capacity() const;
@@ -243,6 +245,10 @@ public:
void set_soft_max_capacity(size_t v);
+ ShenandoahAllocationRate& alloc_rate() {
+ return _alloc_rate;
+ }
+
// ---------- Periodic Tasks
//
public:
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
index 7853238f080..9040a81848e 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
@@ -386,12 +386,6 @@ public:
HeapWord* get_top_at_evac_start() const { return _top_at_evac_start; }
void record_top_at_evac_start() { _top_at_evac_start = _top; }
- // If next available memory is not aligned on address that is multiple of alignment, fill the empty space
- // so that returned object is aligned on an address that is a multiple of alignment_in_bytes. Requested
- // size is in words. It is assumed that this->is_old(). A pad object is allocated, filled, and registered
- // if necessary to assure the new allocation is properly aligned. Return nullptr if memory is not available.
- inline HeapWord* allocate_aligned(size_t word_size, ShenandoahAllocRequest &req, size_t alignment_in_bytes);
-
// Allocation (return nullptr if full)
inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
index 39b7c732703..f004fdf0ea2 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
@@ -33,59 +33,6 @@
#include "gc/shenandoah/shenandoahHeap.inline.hpp"
#include "gc/shenandoah/shenandoahOldGeneration.hpp"
-HeapWord* ShenandoahHeapRegion::allocate_aligned(size_t size, ShenandoahAllocRequest &req, size_t alignment_in_bytes) {
- shenandoah_assert_heaplocked_or_safepoint();
- assert(req.is_lab_alloc(), "allocate_aligned() only applies to LAB allocations");
- assert(is_object_aligned(size), "alloc size breaks alignment: %zu", size);
- assert(is_old(), "aligned allocations are only taken from OLD regions to support PLABs");
- assert(is_aligned(alignment_in_bytes, HeapWordSize), "Expect heap word alignment");
-
- HeapWord* orig_top = top();
- size_t alignment_in_words = alignment_in_bytes / HeapWordSize;
-
- // unalignment_words is the amount by which current top() exceeds the desired alignment point. We subtract this amount
- // from alignment_in_words to determine padding required to next alignment point.
-
- HeapWord* aligned_obj = (HeapWord*) align_up(orig_top, alignment_in_bytes);
- size_t pad_words = aligned_obj - orig_top;
- if ((pad_words > 0) && (pad_words < ShenandoahHeap::min_fill_size())) {
- pad_words += alignment_in_words;
- aligned_obj += alignment_in_words;
- }
-
- if (pointer_delta(end(), aligned_obj) < size) {
- // Shrink size to fit within available space and align it
- size = pointer_delta(end(), aligned_obj);
- size = align_down(size, alignment_in_words);
- }
-
- // Both originally requested size and adjusted size must be properly aligned
- assert (is_aligned(size, alignment_in_words), "Size must be multiple of alignment constraint");
- if (size >= req.min_size()) {
- // Even if req.min_size() may not be a multiple of card size, we know that size is.
- if (pad_words > 0) {
- assert(pad_words >= ShenandoahHeap::min_fill_size(), "pad_words expanded above to meet size constraint");
- ShenandoahHeap::fill_with_object(orig_top, pad_words);
- ShenandoahGenerationalHeap::heap()->old_generation()->card_scan()->register_object(orig_top);
- }
-
- make_regular_allocation(req.affiliation());
- adjust_alloc_metadata(req, size);
-
- HeapWord* new_top = aligned_obj + size;
- assert(new_top <= end(), "PLAB cannot span end of heap region");
- set_top(new_top);
- // We do not req.set_actual_size() here. The caller sets it.
- req.set_waste(pad_words);
- assert(is_object_aligned(new_top), "new top breaks alignment: " PTR_FORMAT, p2i(new_top));
- assert(is_aligned(aligned_obj, alignment_in_bytes), "obj is not aligned: " PTR_FORMAT, p2i(aligned_obj));
- return aligned_obj;
- } else {
- // The aligned size that fits in this region is smaller than min_size, so don't align top and don't allocate. Return failure.
- return nullptr;
- }
-}
-
HeapWord* ShenandoahHeapRegion::allocate_fill(size_t size) {
shenandoah_assert_heaplocked_or_safepoint();
assert(is_object_aligned(size), "alloc size breaks alignment: %zu", size);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahMark.cpp
index dfd921fdf0b..a72c557a5fe 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahMark.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahMark.cpp
@@ -51,10 +51,11 @@ void ShenandoahMark::end_mark() {
ShenandoahMark::ShenandoahMark(ShenandoahGeneration* generation) :
_generation(generation),
_task_queues(generation->task_queues()),
- _old_gen_task_queues(generation->old_gen_task_queues()) {
+ _old_gen_task_queues(generation->old_gen_task_queues()),
+ _string_dedup(StringDedup::is_enabled()) {
}
-template
+template
void ShenandoahMark::mark_loop_prework(uint w, TaskTerminator *t, StringDedup::Requests* const req, bool update_refs) {
ShenandoahObjToScanQueue* q = get_queue(w);
ShenandoahObjToScanQueue* old_q = get_old_queue(w);
@@ -77,7 +78,7 @@ void ShenandoahMark::mark_loop_prework(uint w, TaskTerminator *t, StringDedup::R
heap->flush_liveness_cache(w);
}
-template
+template
void ShenandoahMark::mark_loop(uint worker_id, TaskTerminator* terminator,
ShenandoahGenerationType generation_type, StringDedup::Requests* const req) {
bool update_refs = ShenandoahHeap::heap()->has_forwarded_objects();
@@ -102,35 +103,24 @@ void ShenandoahMark::mark_loop(uint worker_id, TaskTerminator* terminator,
}
void ShenandoahMark::mark_loop(uint worker_id, TaskTerminator* terminator, ShenandoahGenerationType generation_type,
- bool cancellable, StringDedupMode dedup_mode, StringDedup::Requests* const req) {
- if (cancellable) {
- switch(dedup_mode) {
- case NO_DEDUP:
- mark_loop(worker_id, terminator, generation_type, req);
- break;
- case ENQUEUE_DEDUP:
- mark_loop(worker_id, terminator, generation_type, req);
- break;
- case ALWAYS_DEDUP:
- mark_loop(worker_id, terminator, generation_type, req);
- break;
+ bool cancellable) {
+ if (_string_dedup) {
+ StringDedup::Requests req;
+ if (cancellable) {
+ mark_loop(worker_id, terminator, generation_type, &req);
+ } else {
+ mark_loop(worker_id, terminator, generation_type, &req);
}
} else {
- switch(dedup_mode) {
- case NO_DEDUP:
- mark_loop(worker_id, terminator, generation_type, req);
- break;
- case ENQUEUE_DEDUP:
- mark_loop(worker_id, terminator, generation_type, req);
- break;
- case ALWAYS_DEDUP:
- mark_loop(worker_id, terminator, generation_type, req);
- break;
+ if (cancellable) {
+ mark_loop(worker_id, terminator, generation_type, nullptr);
+ } else {
+ mark_loop(worker_id, terminator, generation_type, nullptr);
}
}
}
-template
+template
void ShenandoahMark::mark_loop_work(T* cl, ShenandoahLiveData* live_data, uint worker_id, TaskTerminator *terminator, StringDedup::Requests* const req) {
uintx stride = ShenandoahMarkLoopStride;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahMark.hpp b/src/hotspot/share/gc/shenandoah/shenandoahMark.hpp
index 2fbb106f4d7..ee29c76dcaf 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahMark.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahMark.hpp
@@ -34,12 +34,6 @@
#include "gc/shenandoah/shenandoahHeap.hpp"
#include "gc/shenandoah/shenandoahTaskqueue.hpp"
-enum StringDedupMode {
- NO_DEDUP, // Do not do anything for String deduplication
- ENQUEUE_DEDUP, // Enqueue candidate Strings for deduplication, if meet age threshold
- ALWAYS_DEDUP // Enqueue Strings for deduplication
-};
-
class ShenandoahMarkingContext;
// Base class for mark
@@ -50,6 +44,7 @@ protected:
ShenandoahGeneration* const _generation;
ShenandoahObjToScanQueueSet* const _task_queues;
ShenandoahObjToScanQueueSet* const _old_gen_task_queues;
+ bool const _string_dedup;
protected:
ShenandoahMark(ShenandoahGeneration* generation);
@@ -76,22 +71,22 @@ public:
private:
// ---------- Marking loop and tasks
- template
+ template
inline void do_task(ShenandoahObjToScanQueue* q, T* cl, ShenandoahLiveData* live_data, StringDedup::Requests* const req, ShenandoahMarkTask* task, uint worker_id);
template
- inline void do_chunked_array_start(ShenandoahObjToScanQueue* q, T* cl, oop array, bool weak);
+ inline void do_chunked_array_start(ShenandoahObjToScanQueue* q, T* cl, oop array, Klass* klass, bool weak);
template
inline void do_chunked_array(ShenandoahObjToScanQueue* q, T* cl, oop array, int chunk, int pow, bool weak);
template
- inline void count_liveness(ShenandoahLiveData* live_data, oop obj, uint worker_id);
+ inline void count_liveness(ShenandoahLiveData* live_data, oop obj, Klass* klass, uint worker_id);
- template
+ template
void mark_loop_work(T* cl, ShenandoahLiveData* live_data, uint worker_id, TaskTerminator *t, StringDedup::Requests* const req);
- template
+ template
void mark_loop_prework(uint worker_id, TaskTerminator *terminator, StringDedup::Requests* const req, bool update_refs);
template
@@ -104,15 +99,14 @@ private:
ShenandoahMarkingContext* const mark_context,
bool weak, oop obj);
- template